|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6544502617801047, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"logits/chosen": -2.729219675064087, |
|
"logits/rejected": -2.713034152984619, |
|
"logps/chosen": -183.00042724609375, |
|
"logps/rejected": -183.33316040039062, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.004850482568144798, |
|
"rewards/margins": -0.007815884426236153, |
|
"rewards/rejected": 0.0029654023237526417, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -2.480727195739746, |
|
"logits/rejected": -2.563934564590454, |
|
"logps/chosen": -159.55963134765625, |
|
"logps/rejected": -157.36929321289062, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004495501983910799, |
|
"rewards/margins": 0.006143546663224697, |
|
"rewards/rejected": -0.0016480451449751854, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": -2.856149911880493, |
|
"logits/rejected": -2.8624300956726074, |
|
"logps/chosen": -241.56802368164062, |
|
"logps/rejected": -251.95797729492188, |
|
"loss": 0.6969, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.01090860366821289, |
|
"rewards/margins": -0.007115649990737438, |
|
"rewards/rejected": -0.0037929536774754524, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.473580837249756, |
|
"logits/rejected": -2.6020100116729736, |
|
"logps/chosen": -138.55348205566406, |
|
"logps/rejected": -167.7603759765625, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01281428337097168, |
|
"rewards/margins": 0.01771531254053116, |
|
"rewards/rejected": -0.0049010273069143295, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.446133613586426, |
|
"logits/rejected": -2.5022342205047607, |
|
"logps/chosen": -140.56512451171875, |
|
"logps/rejected": -178.04331970214844, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.013703584671020508, |
|
"rewards/margins": -0.012241363525390625, |
|
"rewards/rejected": -0.001462221029214561, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -2.5403974056243896, |
|
"logits/rejected": -2.650925874710083, |
|
"logps/chosen": -162.82369995117188, |
|
"logps/rejected": -214.57489013671875, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003328490536659956, |
|
"rewards/margins": 0.004930590279400349, |
|
"rewards/rejected": -0.0016021011397242546, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"logits/chosen": -2.6547656059265137, |
|
"logits/rejected": -2.577648162841797, |
|
"logps/chosen": -219.3771514892578, |
|
"logps/rejected": -215.02362060546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0027452707290649414, |
|
"rewards/margins": 0.0002449510502628982, |
|
"rewards/rejected": 0.0025003196205943823, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.5475914478302, |
|
"logits/rejected": -2.586148500442505, |
|
"logps/chosen": -214.5223388671875, |
|
"logps/rejected": -236.43626403808594, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.007447218522429466, |
|
"rewards/margins": 0.0010163071565330029, |
|
"rewards/rejected": 0.006430912297219038, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-06, |
|
"logits/chosen": -2.657726526260376, |
|
"logits/rejected": -2.7398831844329834, |
|
"logps/chosen": -158.13832092285156, |
|
"logps/rejected": -176.91400146484375, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.009470987133681774, |
|
"rewards/margins": 0.011165929958224297, |
|
"rewards/rejected": -0.0016949418932199478, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.3761770725250244, |
|
"logits/rejected": -2.4064137935638428, |
|
"logps/chosen": -176.59835815429688, |
|
"logps/rejected": -163.67300415039062, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.002156305592507124, |
|
"rewards/margins": 0.004867983516305685, |
|
"rewards/rejected": -0.002711677923798561, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.500000000000001e-06, |
|
"logits/chosen": -2.6009013652801514, |
|
"logits/rejected": -2.645084857940674, |
|
"logps/chosen": -197.91195678710938, |
|
"logps/rejected": -245.907470703125, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.001973224338144064, |
|
"rewards/margins": -0.0031423806212842464, |
|
"rewards/rejected": 0.0011691567488014698, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-06, |
|
"logits/chosen": -2.70407772064209, |
|
"logits/rejected": -2.713822364807129, |
|
"logps/chosen": -190.91189575195312, |
|
"logps/rejected": -190.74317932128906, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0004999642260372639, |
|
"rewards/margins": 0.002835321705788374, |
|
"rewards/rejected": -0.00233535747975111, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"logits/chosen": -2.585596799850464, |
|
"logits/rejected": -2.6356894969940186, |
|
"logps/chosen": -214.14459228515625, |
|
"logps/rejected": -242.9970245361328, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.006649780552834272, |
|
"rewards/margins": 0.01303944643586874, |
|
"rewards/rejected": -0.019689226523041725, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.000000000000001e-06, |
|
"logits/chosen": -2.7044496536254883, |
|
"logits/rejected": -2.6631391048431396, |
|
"logps/chosen": -183.95582580566406, |
|
"logps/rejected": -169.8933868408203, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.01296384446322918, |
|
"rewards/margins": -0.006747078616172075, |
|
"rewards/rejected": -0.0062167649157345295, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.5e-06, |
|
"logits/chosen": -2.6691248416900635, |
|
"logits/rejected": -2.6515817642211914, |
|
"logps/chosen": -161.9134979248047, |
|
"logps/rejected": -170.60101318359375, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.009864186868071556, |
|
"rewards/margins": 0.01341402530670166, |
|
"rewards/rejected": -0.0035498379729688168, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -2.6293349266052246, |
|
"logits/rejected": -2.652617931365967, |
|
"logps/chosen": -161.3071746826172, |
|
"logps/rejected": -169.07638549804688, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.005685711745172739, |
|
"rewards/margins": 0.025187280029058456, |
|
"rewards/rejected": -0.019501566886901855, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.500000000000002e-06, |
|
"logits/chosen": -2.6463351249694824, |
|
"logits/rejected": -2.696180582046509, |
|
"logps/chosen": -154.61085510253906, |
|
"logps/rejected": -148.3529510498047, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0029291389510035515, |
|
"rewards/margins": 0.0034487005323171616, |
|
"rewards/rejected": -0.0005195615813136101, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9e-06, |
|
"logits/chosen": -2.5302553176879883, |
|
"logits/rejected": -2.4991636276245117, |
|
"logps/chosen": -152.20655822753906, |
|
"logps/rejected": -131.0479278564453, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.009920882061123848, |
|
"rewards/margins": -0.004111624322831631, |
|
"rewards/rejected": -0.005809259135276079, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.5e-06, |
|
"logits/chosen": -2.5337584018707275, |
|
"logits/rejected": -2.6624388694763184, |
|
"logps/chosen": -160.88140869140625, |
|
"logps/rejected": -195.38058471679688, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.540081575512886e-05, |
|
"rewards/margins": 0.021059704944491386, |
|
"rewards/rejected": -0.021044302731752396, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -2.7187581062316895, |
|
"logits/rejected": -2.699402093887329, |
|
"logps/chosen": -184.01405334472656, |
|
"logps/rejected": -200.99124145507812, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.007425189018249512, |
|
"rewards/margins": 0.007250881753861904, |
|
"rewards/rejected": -0.014676070772111416, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.05e-05, |
|
"logits/chosen": -2.5241212844848633, |
|
"logits/rejected": -2.5988845825195312, |
|
"logps/chosen": -181.3677215576172, |
|
"logps/rejected": -156.8072509765625, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.006232499144971371, |
|
"rewards/margins": 0.01287851296365261, |
|
"rewards/rejected": -0.006646013353019953, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"logits/chosen": -2.531973361968994, |
|
"logits/rejected": -2.594179153442383, |
|
"logps/chosen": -166.6884307861328, |
|
"logps/rejected": -173.5614013671875, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.007440138608217239, |
|
"rewards/margins": -0.0019538167398422956, |
|
"rewards/rejected": -0.0054863216355443, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"logits/chosen": -2.4815938472747803, |
|
"logits/rejected": -2.4820916652679443, |
|
"logps/chosen": -131.22227478027344, |
|
"logps/rejected": -131.2086181640625, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0006843092851340771, |
|
"rewards/margins": 0.003669046564027667, |
|
"rewards/rejected": -0.004353356547653675, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2e-05, |
|
"logits/chosen": -2.4899590015411377, |
|
"logits/rejected": -2.5294911861419678, |
|
"logps/chosen": -149.33543395996094, |
|
"logps/rejected": -141.8245391845703, |
|
"loss": 0.6983, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.017502309754490852, |
|
"rewards/margins": -0.010094404220581055, |
|
"rewards/rejected": -0.0074079036712646484, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.25e-05, |
|
"logits/chosen": -2.5714809894561768, |
|
"logits/rejected": -2.5547332763671875, |
|
"logps/chosen": -167.8873291015625, |
|
"logps/rejected": -173.3172149658203, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00382807245478034, |
|
"rewards/margins": 0.006511807441711426, |
|
"rewards/rejected": -0.010339880362153053, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"logits/chosen": -2.470759630203247, |
|
"logits/rejected": -2.4105560779571533, |
|
"logps/chosen": -174.09585571289062, |
|
"logps/rejected": -175.88677978515625, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0010800834279507399, |
|
"rewards/margins": 0.004079150035977364, |
|
"rewards/rejected": -0.0029990668408572674, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"logits/chosen": -2.6590945720672607, |
|
"logits/rejected": -2.649517059326172, |
|
"logps/chosen": -174.6903839111328, |
|
"logps/rejected": -165.15533447265625, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.012396741658449173, |
|
"rewards/margins": 0.006480884738266468, |
|
"rewards/rejected": -0.018877625465393066, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"logits/chosen": -2.517886161804199, |
|
"logits/rejected": -2.6399176120758057, |
|
"logps/chosen": -154.28765869140625, |
|
"logps/rejected": -181.42474365234375, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.01030280627310276, |
|
"rewards/margins": 0.01349327526986599, |
|
"rewards/rejected": -0.0237960796803236, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.45e-05, |
|
"logits/chosen": -2.5826056003570557, |
|
"logits/rejected": -2.5789594650268555, |
|
"logps/chosen": -157.37286376953125, |
|
"logps/rejected": -154.72024536132812, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.016228055581450462, |
|
"rewards/margins": 0.007857107557356358, |
|
"rewards/rejected": -0.024085164070129395, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.5e-05, |
|
"logits/chosen": -2.6575980186462402, |
|
"logits/rejected": -2.704176664352417, |
|
"logps/chosen": -159.80322265625, |
|
"logps/rejected": -165.82786560058594, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.00888748187571764, |
|
"rewards/margins": 0.02125699445605278, |
|
"rewards/rejected": -0.030144479125738144, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.55e-05, |
|
"logits/chosen": -2.650428533554077, |
|
"logits/rejected": -2.693262815475464, |
|
"logps/chosen": -164.53692626953125, |
|
"logps/rejected": -174.57235717773438, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.001265859231352806, |
|
"rewards/margins": 0.01664908044040203, |
|
"rewards/rejected": -0.01538322027772665, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": -2.6280901432037354, |
|
"logits/rejected": -2.6152122020721436, |
|
"logps/chosen": -174.9736328125, |
|
"logps/rejected": -209.5363311767578, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.013908598572015762, |
|
"rewards/margins": 0.016062045469880104, |
|
"rewards/rejected": -0.029970645904541016, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.65e-05, |
|
"logits/chosen": -2.5862505435943604, |
|
"logits/rejected": -2.5280563831329346, |
|
"logps/chosen": -160.01731872558594, |
|
"logps/rejected": -172.03590393066406, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03881430625915527, |
|
"rewards/margins": -0.0009270897135138512, |
|
"rewards/rejected": -0.03788721561431885, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"logits/chosen": -2.522751808166504, |
|
"logits/rejected": -2.49782657623291, |
|
"logps/chosen": -167.8050537109375, |
|
"logps/rejected": -177.90399169921875, |
|
"loss": 0.7061, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.031221888959407806, |
|
"rewards/margins": -0.02547764778137207, |
|
"rewards/rejected": -0.005744242575019598, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.75e-05, |
|
"logits/chosen": -2.557039260864258, |
|
"logits/rejected": -2.4520561695098877, |
|
"logps/chosen": -143.8395538330078, |
|
"logps/rejected": -140.37631225585938, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.039483096450567245, |
|
"rewards/margins": -0.0023098706733435392, |
|
"rewards/rejected": -0.03717322647571564, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8e-05, |
|
"logits/chosen": -2.6500961780548096, |
|
"logits/rejected": -2.5340402126312256, |
|
"logps/chosen": -172.447998046875, |
|
"logps/rejected": -198.5001678466797, |
|
"loss": 0.7036, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.035362813621759415, |
|
"rewards/margins": -0.020008588209748268, |
|
"rewards/rejected": -0.015354226343333721, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.85e-05, |
|
"logits/chosen": -2.6206865310668945, |
|
"logits/rejected": -2.6976592540740967, |
|
"logps/chosen": -167.8064422607422, |
|
"logps/rejected": -185.81964111328125, |
|
"loss": 0.6987, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.03894533962011337, |
|
"rewards/margins": -0.009856510907411575, |
|
"rewards/rejected": -0.029088832437992096, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9e-05, |
|
"logits/chosen": -2.434297800064087, |
|
"logits/rejected": -2.651834487915039, |
|
"logps/chosen": -169.50946044921875, |
|
"logps/rejected": -257.7209167480469, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.03687644004821777, |
|
"rewards/margins": -0.0012842637952417135, |
|
"rewards/rejected": -0.035592176020145416, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"logits/chosen": -2.4155445098876953, |
|
"logits/rejected": -2.487833023071289, |
|
"logps/chosen": -180.0078582763672, |
|
"logps/rejected": -207.1255340576172, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.037531279027462006, |
|
"rewards/margins": -0.008796263486146927, |
|
"rewards/rejected": -0.028735019266605377, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -2.6476895809173584, |
|
"logits/rejected": -2.707326650619507, |
|
"logps/chosen": -182.0277862548828, |
|
"logps/rejected": -180.2669677734375, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.029536105692386627, |
|
"rewards/margins": 0.015740489587187767, |
|
"rewards/rejected": -0.04527659714221954, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.05e-05, |
|
"logits/chosen": -2.691012144088745, |
|
"logits/rejected": -2.688505172729492, |
|
"logps/chosen": -173.92041015625, |
|
"logps/rejected": -208.61599731445312, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.01940765418112278, |
|
"rewards/margins": 0.03146040812134743, |
|
"rewards/rejected": -0.05086805671453476, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.1e-05, |
|
"logits/chosen": -2.619537353515625, |
|
"logits/rejected": -2.6051697731018066, |
|
"logps/chosen": -193.06666564941406, |
|
"logps/rejected": -178.87713623046875, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.035286471247673035, |
|
"rewards/margins": -0.010646676644682884, |
|
"rewards/rejected": -0.0246397964656353, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.15e-05, |
|
"logits/chosen": -2.611173391342163, |
|
"logits/rejected": -2.6004340648651123, |
|
"logps/chosen": -188.3818817138672, |
|
"logps/rejected": -214.30926513671875, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0015826929593458772, |
|
"rewards/margins": 0.05373835563659668, |
|
"rewards/rejected": -0.05215566232800484, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"logits/chosen": -2.667587995529175, |
|
"logits/rejected": -2.688136339187622, |
|
"logps/chosen": -188.89466857910156, |
|
"logps/rejected": -207.63580322265625, |
|
"loss": 0.6982, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.04630360007286072, |
|
"rewards/margins": -0.008599425666034222, |
|
"rewards/rejected": -0.03770418092608452, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.25e-05, |
|
"logits/chosen": -2.5165350437164307, |
|
"logits/rejected": -2.441213607788086, |
|
"logps/chosen": -170.480712890625, |
|
"logps/rejected": -159.08981323242188, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03015182353556156, |
|
"rewards/margins": -0.0018717292696237564, |
|
"rewards/rejected": -0.028280090540647507, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"logits/chosen": -2.523725748062134, |
|
"logits/rejected": -2.6105730533599854, |
|
"logps/chosen": -170.0879364013672, |
|
"logps/rejected": -197.1090087890625, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.022913338616490364, |
|
"rewards/margins": 0.010608267970383167, |
|
"rewards/rejected": -0.033521607518196106, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.35e-05, |
|
"logits/chosen": -2.5311026573181152, |
|
"logits/rejected": -2.54695463180542, |
|
"logps/chosen": -178.0673065185547, |
|
"logps/rejected": -182.32875061035156, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.008729481138288975, |
|
"rewards/margins": 0.02449822425842285, |
|
"rewards/rejected": -0.03322770446538925, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.4e-05, |
|
"logits/chosen": -2.6589465141296387, |
|
"logits/rejected": -2.6210758686065674, |
|
"logps/chosen": -182.40536499023438, |
|
"logps/rejected": -169.12103271484375, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.003579068696126342, |
|
"rewards/margins": 0.02751018851995468, |
|
"rewards/rejected": -0.031089257448911667, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.45e-05, |
|
"logits/chosen": -2.513227701187134, |
|
"logits/rejected": -2.543900489807129, |
|
"logps/chosen": -170.51919555664062, |
|
"logps/rejected": -168.29690551757812, |
|
"loss": 0.7047, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.021081138402223587, |
|
"rewards/margins": -0.022327663376927376, |
|
"rewards/rejected": 0.0012465240433812141, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5e-05, |
|
"logits/chosen": -2.434335470199585, |
|
"logits/rejected": -2.4906935691833496, |
|
"logps/chosen": -162.16989135742188, |
|
"logps/rejected": -212.56082153320312, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.027100684121251106, |
|
"rewards/margins": 0.02660501003265381, |
|
"rewards/rejected": -0.053705692291259766, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"logits/chosen": -2.536504030227661, |
|
"logits/rejected": -2.6495611667633057, |
|
"logps/chosen": -171.29580688476562, |
|
"logps/rejected": -191.54605102539062, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.025197075679898262, |
|
"rewards/margins": 0.024894431233406067, |
|
"rewards/rejected": -0.05009150505065918, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"logits/chosen": -2.6077020168304443, |
|
"logits/rejected": -2.564440965652466, |
|
"logps/chosen": -170.64273071289062, |
|
"logps/rejected": -217.3800811767578, |
|
"loss": 0.7054, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.029342200607061386, |
|
"rewards/margins": -0.02205488458275795, |
|
"rewards/rejected": -0.007287311367690563, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"logits/chosen": -2.4113476276397705, |
|
"logits/rejected": -2.4167730808258057, |
|
"logps/chosen": -158.03866577148438, |
|
"logps/rejected": -203.66368103027344, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03733339160680771, |
|
"rewards/margins": -0.011522197164595127, |
|
"rewards/rejected": -0.025811197236180305, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"logits/chosen": -2.5948705673217773, |
|
"logits/rejected": -2.584592342376709, |
|
"logps/chosen": -163.16751098632812, |
|
"logps/rejected": -196.3957061767578, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.050568290054798126, |
|
"rewards/margins": 0.014712072908878326, |
|
"rewards/rejected": -0.06528037041425705, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"logits/chosen": -2.524641513824463, |
|
"logits/rejected": -2.445185661315918, |
|
"logps/chosen": -167.29495239257812, |
|
"logps/rejected": -178.15013122558594, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03202950954437256, |
|
"rewards/margins": 0.00026745768263936043, |
|
"rewards/rejected": -0.03229696676135063, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"logits/chosen": -2.7647242546081543, |
|
"logits/rejected": -2.721259117126465, |
|
"logps/chosen": -179.40574645996094, |
|
"logps/rejected": -180.4486541748047, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0023113014176487923, |
|
"rewards/margins": 0.03354344516992569, |
|
"rewards/rejected": -0.035854749381542206, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"logits/chosen": -2.676018714904785, |
|
"logits/rejected": -2.6648664474487305, |
|
"logps/chosen": -197.95919799804688, |
|
"logps/rejected": -175.45230102539062, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.00892419833689928, |
|
"rewards/margins": 0.036644406616687775, |
|
"rewards/rejected": -0.04556860774755478, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9e-05, |
|
"logits/chosen": -2.644045829772949, |
|
"logits/rejected": -2.7341084480285645, |
|
"logps/chosen": -180.7408905029297, |
|
"logps/rejected": -189.23818969726562, |
|
"loss": 0.6963, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0153807383030653, |
|
"rewards/margins": -0.005131007172167301, |
|
"rewards/rejected": -0.010249733924865723, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.95e-05, |
|
"logits/chosen": -2.6532645225524902, |
|
"logits/rejected": -2.552724599838257, |
|
"logps/chosen": -178.77191162109375, |
|
"logps/rejected": -175.42742919921875, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03405280038714409, |
|
"rewards/margins": 0.020096803084015846, |
|
"rewards/rejected": -0.054149605333805084, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3e-05, |
|
"logits/chosen": -2.5994341373443604, |
|
"logits/rejected": -2.6342806816101074, |
|
"logps/chosen": -176.28402709960938, |
|
"logps/rejected": -210.64498901367188, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.025557922199368477, |
|
"rewards/margins": -0.004868890158832073, |
|
"rewards/rejected": -0.02068903297185898, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.05e-05, |
|
"logits/chosen": -2.58650279045105, |
|
"logits/rejected": -2.7233827114105225, |
|
"logps/chosen": -184.57443237304688, |
|
"logps/rejected": -175.98129272460938, |
|
"loss": 0.7081, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.06466653198003769, |
|
"rewards/margins": -0.02823822945356369, |
|
"rewards/rejected": -0.0364283062517643, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.1e-05, |
|
"logits/chosen": -2.6913092136383057, |
|
"logits/rejected": -2.5653786659240723, |
|
"logps/chosen": -203.69729614257812, |
|
"logps/rejected": -190.5261688232422, |
|
"loss": 0.7161, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.04825315251946449, |
|
"rewards/margins": -0.04361088201403618, |
|
"rewards/rejected": -0.004642271436750889, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.15e-05, |
|
"logits/chosen": -2.666816473007202, |
|
"logits/rejected": -2.604631185531616, |
|
"logps/chosen": -225.96981811523438, |
|
"logps/rejected": -193.25982666015625, |
|
"loss": 0.7001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.040044669061899185, |
|
"rewards/margins": -0.012522673234343529, |
|
"rewards/rejected": -0.027521992102265358, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"logits/chosen": -2.6570017337799072, |
|
"logits/rejected": -2.633661985397339, |
|
"logps/chosen": -188.25892639160156, |
|
"logps/rejected": -171.55096435546875, |
|
"loss": 0.7173, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.08987895399332047, |
|
"rewards/margins": -0.04469916597008705, |
|
"rewards/rejected": -0.04517979919910431, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"logits/chosen": -2.582909107208252, |
|
"logits/rejected": -2.587308883666992, |
|
"logps/chosen": -168.5953369140625, |
|
"logps/rejected": -180.11024475097656, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.038271259516477585, |
|
"rewards/margins": 0.01016156654804945, |
|
"rewards/rejected": -0.04843283072113991, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.3e-05, |
|
"logits/chosen": -2.7339940071105957, |
|
"logits/rejected": -2.6128218173980713, |
|
"logps/chosen": -187.80320739746094, |
|
"logps/rejected": -176.04495239257812, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.08385930210351944, |
|
"rewards/margins": -0.0003412736114114523, |
|
"rewards/rejected": -0.08351802825927734, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.35e-05, |
|
"logits/chosen": -2.406219482421875, |
|
"logits/rejected": -2.435150623321533, |
|
"logps/chosen": -150.17405700683594, |
|
"logps/rejected": -174.16119384765625, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.030688336119055748, |
|
"rewards/margins": 0.027779744938015938, |
|
"rewards/rejected": -0.058468081057071686, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"logits/chosen": -2.7922043800354004, |
|
"logits/rejected": -2.8177366256713867, |
|
"logps/chosen": -202.07823181152344, |
|
"logps/rejected": -233.48065185546875, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10107097774744034, |
|
"rewards/margins": 0.014442582614719868, |
|
"rewards/rejected": -0.11551356315612793, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.45e-05, |
|
"logits/chosen": -2.621925115585327, |
|
"logits/rejected": -2.548135280609131, |
|
"logps/chosen": -163.22723388671875, |
|
"logps/rejected": -183.32742309570312, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.009019946679472923, |
|
"rewards/margins": 0.020815372467041016, |
|
"rewards/rejected": -0.029835321009159088, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.5e-05, |
|
"logits/chosen": -2.7518372535705566, |
|
"logits/rejected": -2.7260899543762207, |
|
"logps/chosen": -223.34564208984375, |
|
"logps/rejected": -245.5427703857422, |
|
"loss": 0.7156, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.12107516080141068, |
|
"rewards/margins": -0.03887636959552765, |
|
"rewards/rejected": -0.08219879120588303, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.55e-05, |
|
"logits/chosen": -2.5108494758605957, |
|
"logits/rejected": -2.4832143783569336, |
|
"logps/chosen": -166.49508666992188, |
|
"logps/rejected": -157.7345428466797, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.05453377217054367, |
|
"rewards/margins": 0.0069806561805307865, |
|
"rewards/rejected": -0.06151442974805832, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.6e-05, |
|
"logits/chosen": -2.636711359024048, |
|
"logits/rejected": -2.6597518920898438, |
|
"logps/chosen": -174.1949005126953, |
|
"logps/rejected": -189.0026397705078, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.005148457363247871, |
|
"rewards/margins": 0.03288703039288521, |
|
"rewards/rejected": -0.03803548216819763, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.65e-05, |
|
"logits/chosen": -2.6395423412323, |
|
"logits/rejected": -2.6543948650360107, |
|
"logps/chosen": -146.5878448486328, |
|
"logps/rejected": -180.54176330566406, |
|
"loss": 0.7075, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0687551274895668, |
|
"rewards/margins": -0.02330932579934597, |
|
"rewards/rejected": -0.045445799827575684, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7e-05, |
|
"logits/chosen": -2.4257636070251465, |
|
"logits/rejected": -2.502183675765991, |
|
"logps/chosen": -179.7578125, |
|
"logps/rejected": -227.62875366210938, |
|
"loss": 0.7125, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.03154797852039337, |
|
"rewards/margins": -0.03330230712890625, |
|
"rewards/rejected": 0.0017543300054967403, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"logits/chosen": -2.3436474800109863, |
|
"logits/rejected": -2.498957633972168, |
|
"logps/chosen": -151.80540466308594, |
|
"logps/rejected": -164.84146118164062, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.017311906442046165, |
|
"rewards/margins": 0.03238987550139427, |
|
"rewards/rejected": -0.049701791256666183, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.8e-05, |
|
"logits/chosen": -2.558389663696289, |
|
"logits/rejected": -2.6029064655303955, |
|
"logps/chosen": -176.9400177001953, |
|
"logps/rejected": -177.18336486816406, |
|
"loss": 0.7002, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04031562805175781, |
|
"rewards/margins": -0.009864617139101028, |
|
"rewards/rejected": -0.030451007187366486, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.85e-05, |
|
"logits/chosen": -2.651350259780884, |
|
"logits/rejected": -2.7427544593811035, |
|
"logps/chosen": -210.8737335205078, |
|
"logps/rejected": -199.6597137451172, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.048238325864076614, |
|
"rewards/margins": 0.031043197959661484, |
|
"rewards/rejected": -0.0792815238237381, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"logits/chosen": -2.4323079586029053, |
|
"logits/rejected": -2.576862335205078, |
|
"logps/chosen": -127.96165466308594, |
|
"logps/rejected": -161.3166961669922, |
|
"loss": 0.724, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.1004774421453476, |
|
"rewards/margins": -0.053969353437423706, |
|
"rewards/rejected": -0.046508073806762695, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"logits/chosen": -2.526383638381958, |
|
"logits/rejected": -2.552208423614502, |
|
"logps/chosen": -183.66246032714844, |
|
"logps/rejected": -195.2118377685547, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10480672121047974, |
|
"rewards/margins": 0.005925657227635384, |
|
"rewards/rejected": -0.11073236912488937, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -2.635021209716797, |
|
"logits/rejected": -2.7062149047851562, |
|
"logps/chosen": -203.2865753173828, |
|
"logps/rejected": -200.7851104736328, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.012298012152314186, |
|
"rewards/margins": 0.08196020871400833, |
|
"rewards/rejected": -0.09425821155309677, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.05e-05, |
|
"logits/chosen": -2.48476505279541, |
|
"logits/rejected": -2.613018274307251, |
|
"logps/chosen": -159.0533905029297, |
|
"logps/rejected": -222.8763427734375, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.017749834805727005, |
|
"rewards/margins": 0.10052147507667542, |
|
"rewards/rejected": -0.11827130615711212, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.1e-05, |
|
"logits/chosen": -2.5390264987945557, |
|
"logits/rejected": -2.5940589904785156, |
|
"logps/chosen": -220.54562377929688, |
|
"logps/rejected": -193.7894744873047, |
|
"loss": 0.7168, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.15124498307704926, |
|
"rewards/margins": -0.04028485342860222, |
|
"rewards/rejected": -0.11096014082431793, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.15e-05, |
|
"logits/chosen": -2.6519391536712646, |
|
"logits/rejected": -2.6670496463775635, |
|
"logps/chosen": -174.1206512451172, |
|
"logps/rejected": -182.67996215820312, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.049321744590997696, |
|
"rewards/margins": 0.012697530910372734, |
|
"rewards/rejected": -0.06201927736401558, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.2e-05, |
|
"logits/chosen": -2.717592239379883, |
|
"logits/rejected": -2.7927956581115723, |
|
"logps/chosen": -169.05294799804688, |
|
"logps/rejected": -190.73846435546875, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03787894546985626, |
|
"rewards/margins": 0.06431596726179123, |
|
"rewards/rejected": -0.1021949052810669, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.25e-05, |
|
"logits/chosen": -2.5930697917938232, |
|
"logits/rejected": -2.509345531463623, |
|
"logps/chosen": -179.2568817138672, |
|
"logps/rejected": -168.377685546875, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05612773820757866, |
|
"rewards/margins": 0.006718709133565426, |
|
"rewards/rejected": -0.06284645199775696, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.3e-05, |
|
"logits/chosen": -2.5787692070007324, |
|
"logits/rejected": -2.64978289604187, |
|
"logps/chosen": -172.04168701171875, |
|
"logps/rejected": -160.20840454101562, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11709931492805481, |
|
"rewards/margins": 0.018632344901561737, |
|
"rewards/rejected": -0.13573165237903595, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.35e-05, |
|
"logits/chosen": -2.480304002761841, |
|
"logits/rejected": -2.4922451972961426, |
|
"logps/chosen": -200.24014282226562, |
|
"logps/rejected": -218.20352172851562, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08723511546850204, |
|
"rewards/margins": 0.12010292708873749, |
|
"rewards/rejected": -0.20733806490898132, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"logits/chosen": -2.7480030059814453, |
|
"logits/rejected": -2.703220844268799, |
|
"logps/chosen": -164.8146514892578, |
|
"logps/rejected": -173.18063354492188, |
|
"loss": 0.7038, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.1111496239900589, |
|
"rewards/margins": -0.00250411219894886, |
|
"rewards/rejected": -0.10864551365375519, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"logits/chosen": -2.4365036487579346, |
|
"logits/rejected": -2.5382070541381836, |
|
"logps/chosen": -173.4228515625, |
|
"logps/rejected": -228.0253448486328, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.022041939198970795, |
|
"rewards/margins": 0.07041654735803604, |
|
"rewards/rejected": -0.09245848655700684, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5e-05, |
|
"logits/chosen": -2.5812180042266846, |
|
"logits/rejected": -2.6432223320007324, |
|
"logps/chosen": -155.36810302734375, |
|
"logps/rejected": -164.1707000732422, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2002555876970291, |
|
"rewards/margins": -0.011146757751703262, |
|
"rewards/rejected": -0.18910883367061615, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.55e-05, |
|
"logits/chosen": -2.6511974334716797, |
|
"logits/rejected": -2.7204787731170654, |
|
"logps/chosen": -172.86270141601562, |
|
"logps/rejected": -176.37405395507812, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11853408813476562, |
|
"rewards/margins": 0.03132196143269539, |
|
"rewards/rejected": -0.14985604584217072, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.600000000000001e-05, |
|
"logits/chosen": -2.753451108932495, |
|
"logits/rejected": -2.6007282733917236, |
|
"logps/chosen": -192.64649963378906, |
|
"logps/rejected": -164.47393798828125, |
|
"loss": 0.7061, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.26501375436782837, |
|
"rewards/margins": -0.007466696202754974, |
|
"rewards/rejected": -0.2575470507144928, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"logits/chosen": -2.6831321716308594, |
|
"logits/rejected": -2.657895088195801, |
|
"logps/chosen": -191.9042510986328, |
|
"logps/rejected": -165.17892456054688, |
|
"loss": 0.7455, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.21064424514770508, |
|
"rewards/margins": -0.08327949792146683, |
|
"rewards/rejected": -0.12736473977565765, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7e-05, |
|
"logits/chosen": -2.5191149711608887, |
|
"logits/rejected": -2.6359121799468994, |
|
"logps/chosen": -171.92013549804688, |
|
"logps/rejected": -177.6357421875, |
|
"loss": 0.7085, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.274119108915329, |
|
"rewards/margins": -0.0034166108816862106, |
|
"rewards/rejected": -0.2707024812698364, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.75e-05, |
|
"logits/chosen": -2.731271505355835, |
|
"logits/rejected": -2.7019336223602295, |
|
"logps/chosen": -180.59613037109375, |
|
"logps/rejected": -184.8212890625, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14632394909858704, |
|
"rewards/margins": 0.09543509036302567, |
|
"rewards/rejected": -0.2417590469121933, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8e-05, |
|
"logits/chosen": -2.490906000137329, |
|
"logits/rejected": -2.6088736057281494, |
|
"logps/chosen": -162.78256225585938, |
|
"logps/rejected": -176.74588012695312, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.14169403910636902, |
|
"rewards/margins": 0.14634834229946136, |
|
"rewards/rejected": -0.2880423963069916, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.85e-05, |
|
"logits/chosen": -2.6869685649871826, |
|
"logits/rejected": -2.6618151664733887, |
|
"logps/chosen": -158.98098754882812, |
|
"logps/rejected": -165.8663330078125, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17300596833229065, |
|
"rewards/margins": 0.07225295156240463, |
|
"rewards/rejected": -0.24525892734527588, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9e-05, |
|
"logits/chosen": -2.655244827270508, |
|
"logits/rejected": -2.7159557342529297, |
|
"logps/chosen": -187.57041931152344, |
|
"logps/rejected": -189.180908203125, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16985346376895905, |
|
"rewards/margins": 0.0829225480556488, |
|
"rewards/rejected": -0.25277602672576904, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"logits/chosen": -2.5713696479797363, |
|
"logits/rejected": -2.619272470474243, |
|
"logps/chosen": -164.45220947265625, |
|
"logps/rejected": -189.011962890625, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1459609568119049, |
|
"rewards/margins": 0.14748626947402954, |
|
"rewards/rejected": -0.29344722628593445, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"logits/chosen": -2.7377047538757324, |
|
"logits/rejected": -2.807655096054077, |
|
"logps/chosen": -212.2166748046875, |
|
"logps/rejected": -205.24362182617188, |
|
"loss": 0.7779, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3374475836753845, |
|
"rewards/margins": -0.14080718159675598, |
|
"rewards/rejected": -0.19664038717746735, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.999997432392803e-05, |
|
"logits/chosen": -2.604559898376465, |
|
"logits/rejected": -2.5846409797668457, |
|
"logps/chosen": -182.06011962890625, |
|
"logps/rejected": -185.51695251464844, |
|
"loss": 0.723, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.2137359231710434, |
|
"rewards/margins": -0.0511900931596756, |
|
"rewards/rejected": -0.1625458300113678, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9999897295764844e-05, |
|
"logits/chosen": -2.7066619396209717, |
|
"logits/rejected": -2.6727826595306396, |
|
"logps/chosen": -194.32232666015625, |
|
"logps/rejected": -219.6756134033203, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2863636910915375, |
|
"rewards/margins": 0.04078531265258789, |
|
"rewards/rejected": -0.32714903354644775, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9999768915668665e-05, |
|
"logits/chosen": -2.5064425468444824, |
|
"logits/rejected": -2.49991512298584, |
|
"logps/chosen": -168.347900390625, |
|
"logps/rejected": -165.95089721679688, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.28736329078674316, |
|
"rewards/margins": 0.0547042116522789, |
|
"rewards/rejected": -0.3420674800872803, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999958918390321e-05, |
|
"logits/chosen": -2.568999767303467, |
|
"logits/rejected": -2.629004716873169, |
|
"logps/chosen": -186.33514404296875, |
|
"logps/rejected": -215.6304168701172, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.23962916433811188, |
|
"rewards/margins": 0.06436805427074432, |
|
"rewards/rejected": -0.3039971888065338, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999935810083766e-05, |
|
"logits/chosen": -2.667313575744629, |
|
"logits/rejected": -2.6559674739837646, |
|
"logps/chosen": -154.65960693359375, |
|
"logps/rejected": -155.13522338867188, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2854609489440918, |
|
"rewards/margins": 0.0853797197341919, |
|
"rewards/rejected": -0.3708406686782837, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999907566694667e-05, |
|
"logits/chosen": -2.5488526821136475, |
|
"logits/rejected": -2.485109567642212, |
|
"logps/chosen": -222.78338623046875, |
|
"logps/rejected": -222.6953887939453, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28172624111175537, |
|
"rewards/margins": 0.07234585285186768, |
|
"rewards/rejected": -0.35407203435897827, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9998741882810384e-05, |
|
"logits/chosen": -2.872877597808838, |
|
"logits/rejected": -2.8446714878082275, |
|
"logps/chosen": -182.39260864257812, |
|
"logps/rejected": -213.32289123535156, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2876412272453308, |
|
"rewards/margins": 0.08153553307056427, |
|
"rewards/rejected": -0.3691767454147339, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999835674911443e-05, |
|
"logits/chosen": -2.7004892826080322, |
|
"logits/rejected": -2.6362013816833496, |
|
"logps/chosen": -162.95108032226562, |
|
"logps/rejected": -161.67657470703125, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3595612049102783, |
|
"rewards/margins": 0.053390078246593475, |
|
"rewards/rejected": -0.4129512906074524, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999792026664991e-05, |
|
"logits/chosen": -2.8488755226135254, |
|
"logits/rejected": -2.914703845977783, |
|
"logps/chosen": -173.82363891601562, |
|
"logps/rejected": -182.16908264160156, |
|
"loss": 0.7065, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.48598194122314453, |
|
"rewards/margins": 0.03310241550207138, |
|
"rewards/rejected": -0.5190844535827637, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9997432436313384e-05, |
|
"logits/chosen": -2.5648086071014404, |
|
"logits/rejected": -2.5932183265686035, |
|
"logps/chosen": -185.70846557617188, |
|
"logps/rejected": -189.32725524902344, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4091907739639282, |
|
"rewards/margins": 0.04006391391158104, |
|
"rewards/rejected": -0.4492546617984772, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.99968932591069e-05, |
|
"logits/chosen": -2.770993232727051, |
|
"logits/rejected": -2.7647171020507812, |
|
"logps/chosen": -208.0184326171875, |
|
"logps/rejected": -202.5152587890625, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.49601614475250244, |
|
"rewards/margins": 0.027304889634251595, |
|
"rewards/rejected": -0.5233210921287537, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999630273613799e-05, |
|
"logits/chosen": -2.4182567596435547, |
|
"logits/rejected": -2.611553430557251, |
|
"logps/chosen": -138.6142578125, |
|
"logps/rejected": -187.48898315429688, |
|
"loss": 0.7333, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5143216252326965, |
|
"rewards/margins": -0.011710070073604584, |
|
"rewards/rejected": -0.5026116371154785, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999566086861961e-05, |
|
"logits/chosen": -2.7679452896118164, |
|
"logits/rejected": -2.8067147731781006, |
|
"logps/chosen": -186.18055725097656, |
|
"logps/rejected": -198.92605590820312, |
|
"loss": 0.8218, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.6542762517929077, |
|
"rewards/margins": -0.2115614116191864, |
|
"rewards/rejected": -0.4427148103713989, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999496765787024e-05, |
|
"logits/chosen": -2.842291831970215, |
|
"logits/rejected": -2.8023149967193604, |
|
"logps/chosen": -162.9304962158203, |
|
"logps/rejected": -182.9091796875, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5319749116897583, |
|
"rewards/margins": 0.08390979468822479, |
|
"rewards/rejected": -0.6158846616744995, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9994223105313774e-05, |
|
"logits/chosen": -2.980092763900757, |
|
"logits/rejected": -2.9631030559539795, |
|
"logps/chosen": -217.55894470214844, |
|
"logps/rejected": -226.1593780517578, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4032873809337616, |
|
"rewards/margins": 0.0588911734521389, |
|
"rewards/rejected": -0.4621785283088684, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9993427212479606e-05, |
|
"logits/chosen": -2.5391757488250732, |
|
"logits/rejected": -2.725529909133911, |
|
"logps/chosen": -179.54200744628906, |
|
"logps/rejected": -204.42666625976562, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.39779067039489746, |
|
"rewards/margins": 0.0027880650013685226, |
|
"rewards/rejected": -0.40057870745658875, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999257998100254e-05, |
|
"logits/chosen": -2.733851671218872, |
|
"logits/rejected": -2.87308931350708, |
|
"logps/chosen": -176.08485412597656, |
|
"logps/rejected": -193.49484252929688, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40423309803009033, |
|
"rewards/margins": 0.11808924376964569, |
|
"rewards/rejected": -0.5223223567008972, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999168141262289e-05, |
|
"logits/chosen": -2.7447009086608887, |
|
"logits/rejected": -2.808476448059082, |
|
"logps/chosen": -224.22067260742188, |
|
"logps/rejected": -265.44964599609375, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5002200603485107, |
|
"rewards/margins": 0.40809527039527893, |
|
"rewards/rejected": -0.9083153605461121, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9990731509186376e-05, |
|
"logits/chosen": -2.639643430709839, |
|
"logits/rejected": -2.7004880905151367, |
|
"logps/chosen": -126.65451049804688, |
|
"logps/rejected": -152.20460510253906, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5373567342758179, |
|
"rewards/margins": 0.048874713480472565, |
|
"rewards/rejected": -0.5862314105033875, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.998973027264419e-05, |
|
"logits/chosen": -2.6515791416168213, |
|
"logits/rejected": -2.7365236282348633, |
|
"logps/chosen": -174.46641540527344, |
|
"logps/rejected": -225.31158447265625, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5558931827545166, |
|
"rewards/margins": 0.04842944070696831, |
|
"rewards/rejected": -0.6043226718902588, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.998867770505295e-05, |
|
"logits/chosen": -2.724031686782837, |
|
"logits/rejected": -2.709028482437134, |
|
"logps/chosen": -166.08251953125, |
|
"logps/rejected": -183.7383575439453, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5829455256462097, |
|
"rewards/margins": 0.03844447806477547, |
|
"rewards/rejected": -0.6213899850845337, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9987573808574726e-05, |
|
"logits/chosen": -2.696485996246338, |
|
"logits/rejected": -2.779482364654541, |
|
"logps/chosen": -161.10256958007812, |
|
"logps/rejected": -185.7605743408203, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.41624942421913147, |
|
"rewards/margins": 0.23752669990062714, |
|
"rewards/rejected": -0.6537761092185974, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9986418585477016e-05, |
|
"logits/chosen": -2.7433488368988037, |
|
"logits/rejected": -2.8069894313812256, |
|
"logps/chosen": -160.246826171875, |
|
"logps/rejected": -171.8643798828125, |
|
"loss": 0.728, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4797200560569763, |
|
"rewards/margins": -0.032596245408058167, |
|
"rewards/rejected": -0.44712376594543457, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.998521203813274e-05, |
|
"logits/chosen": -2.738048791885376, |
|
"logits/rejected": -2.747162342071533, |
|
"logps/chosen": -190.9744873046875, |
|
"logps/rejected": -181.24745178222656, |
|
"loss": 0.755, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5226253271102905, |
|
"rewards/margins": -0.04068867489695549, |
|
"rewards/rejected": -0.48193663358688354, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9983954169020256e-05, |
|
"logits/chosen": -2.6100339889526367, |
|
"logits/rejected": -2.634096145629883, |
|
"logps/chosen": -176.01905822753906, |
|
"logps/rejected": -161.22412109375, |
|
"loss": 0.748, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5368779301643372, |
|
"rewards/margins": -0.09010656177997589, |
|
"rewards/rejected": -0.4467713534832001, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9982644980723334e-05, |
|
"logits/chosen": -2.797285556793213, |
|
"logits/rejected": -2.825375556945801, |
|
"logps/chosen": -141.42039489746094, |
|
"logps/rejected": -148.95436096191406, |
|
"loss": 0.7302, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5232774615287781, |
|
"rewards/margins": -0.04633237421512604, |
|
"rewards/rejected": -0.47694510221481323, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.998128447593117e-05, |
|
"logits/chosen": -2.6646504402160645, |
|
"logits/rejected": -2.7834081649780273, |
|
"logps/chosen": -230.68499755859375, |
|
"logps/rejected": -245.01414489746094, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.687300980091095, |
|
"rewards/margins": 0.14017513394355774, |
|
"rewards/rejected": -0.8274761438369751, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997987265743834e-05, |
|
"logits/chosen": -2.775637149810791, |
|
"logits/rejected": -2.738879919052124, |
|
"logps/chosen": -166.31509399414062, |
|
"logps/rejected": -169.01321411132812, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4408762454986572, |
|
"rewards/margins": 0.11477227509021759, |
|
"rewards/rejected": -0.5556485652923584, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997840952814484e-05, |
|
"logits/chosen": -2.637038469314575, |
|
"logits/rejected": -2.666442394256592, |
|
"logps/chosen": -151.3613739013672, |
|
"logps/rejected": -160.8176727294922, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.4970799684524536, |
|
"rewards/margins": 0.08162279427051544, |
|
"rewards/rejected": -0.5787028074264526, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9976895091056075e-05, |
|
"logits/chosen": -2.7471365928649902, |
|
"logits/rejected": -2.6176395416259766, |
|
"logps/chosen": -204.17808532714844, |
|
"logps/rejected": -224.61956787109375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6104612946510315, |
|
"rewards/margins": 0.11507527530193329, |
|
"rewards/rejected": -0.725536584854126, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9975329349282826e-05, |
|
"logits/chosen": -2.7353034019470215, |
|
"logits/rejected": -2.743035078048706, |
|
"logps/chosen": -184.143798828125, |
|
"logps/rejected": -197.323974609375, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6166396141052246, |
|
"rewards/margins": 0.08574585616588593, |
|
"rewards/rejected": -0.7023855447769165, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9973712306041256e-05, |
|
"logits/chosen": -2.6548473834991455, |
|
"logits/rejected": -2.6762022972106934, |
|
"logps/chosen": -193.84849548339844, |
|
"logps/rejected": -175.150634765625, |
|
"loss": 0.7597, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6773683428764343, |
|
"rewards/margins": -0.10929510742425919, |
|
"rewards/rejected": -0.5680732131004333, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997204396465292e-05, |
|
"logits/chosen": -2.787050724029541, |
|
"logits/rejected": -2.777578353881836, |
|
"logps/chosen": -195.51785278320312, |
|
"logps/rejected": -190.51622009277344, |
|
"loss": 0.7073, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5759019255638123, |
|
"rewards/margins": 0.019099120050668716, |
|
"rewards/rejected": -0.5950011014938354, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.997032432854472e-05, |
|
"logits/chosen": -2.6198296546936035, |
|
"logits/rejected": -2.661146640777588, |
|
"logps/chosen": -149.22142028808594, |
|
"logps/rejected": -173.4526824951172, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5247339010238647, |
|
"rewards/margins": 0.05187266319990158, |
|
"rewards/rejected": -0.5766065120697021, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996855340124894e-05, |
|
"logits/chosen": -2.637789487838745, |
|
"logits/rejected": -2.6017353534698486, |
|
"logps/chosen": -158.05914306640625, |
|
"logps/rejected": -175.1423797607422, |
|
"loss": 0.6985, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5851290822029114, |
|
"rewards/margins": 0.03735842555761337, |
|
"rewards/rejected": -0.6224875450134277, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996673118640323e-05, |
|
"logits/chosen": -2.37221360206604, |
|
"logits/rejected": -2.5038788318634033, |
|
"logps/chosen": -143.90518188476562, |
|
"logps/rejected": -222.84115600585938, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.36046820878982544, |
|
"rewards/margins": 0.14497990906238556, |
|
"rewards/rejected": -0.5054481029510498, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996485768775055e-05, |
|
"logits/chosen": -2.807823419570923, |
|
"logits/rejected": -2.800899028778076, |
|
"logps/chosen": -169.72129821777344, |
|
"logps/rejected": -181.5267333984375, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.543328046798706, |
|
"rewards/margins": 0.11098619550466537, |
|
"rewards/rejected": -0.6543142199516296, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996293290913926e-05, |
|
"logits/chosen": -2.795060634613037, |
|
"logits/rejected": -2.9101216793060303, |
|
"logps/chosen": -136.24945068359375, |
|
"logps/rejected": -162.91119384765625, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3900853395462036, |
|
"rewards/margins": 0.19977441430091858, |
|
"rewards/rejected": -0.5898597836494446, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9960956854522986e-05, |
|
"logits/chosen": -2.7642905712127686, |
|
"logits/rejected": -2.7773826122283936, |
|
"logps/chosen": -158.06378173828125, |
|
"logps/rejected": -221.30577087402344, |
|
"loss": 0.625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4831012189388275, |
|
"rewards/margins": 0.18627440929412842, |
|
"rewards/rejected": -0.6693755984306335, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.995892952796074e-05, |
|
"logits/chosen": -2.7154903411865234, |
|
"logits/rejected": -2.7624685764312744, |
|
"logps/chosen": -187.28146362304688, |
|
"logps/rejected": -189.83071899414062, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6895617842674255, |
|
"rewards/margins": 0.06184637174010277, |
|
"rewards/rejected": -0.7514082193374634, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.995685093361682e-05, |
|
"logits/chosen": -2.7003986835479736, |
|
"logits/rejected": -2.754859209060669, |
|
"logps/chosen": -160.55992126464844, |
|
"logps/rejected": -172.67434692382812, |
|
"loss": 0.725, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6082602739334106, |
|
"rewards/margins": -0.01646682247519493, |
|
"rewards/rejected": -0.5917934775352478, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9954721075760824e-05, |
|
"logits/chosen": -2.7585508823394775, |
|
"logits/rejected": -2.7562849521636963, |
|
"logps/chosen": -186.20509338378906, |
|
"logps/rejected": -190.92132568359375, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5081749558448792, |
|
"rewards/margins": 0.10397283732891083, |
|
"rewards/rejected": -0.6121478080749512, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.995253995876767e-05, |
|
"logits/chosen": -2.808187246322632, |
|
"logits/rejected": -2.869479179382324, |
|
"logps/chosen": -172.86203002929688, |
|
"logps/rejected": -175.89739990234375, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.31780511140823364, |
|
"rewards/margins": 0.3512324392795563, |
|
"rewards/rejected": -0.6690375804901123, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.995030758711756e-05, |
|
"logits/chosen": -2.9907169342041016, |
|
"logits/rejected": -2.968203544616699, |
|
"logps/chosen": -191.64285278320312, |
|
"logps/rejected": -177.73028564453125, |
|
"loss": 0.7513, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7068686485290527, |
|
"rewards/margins": -0.05182289704680443, |
|
"rewards/rejected": -0.6550456881523132, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.994802396539598e-05, |
|
"logits/chosen": -2.8123016357421875, |
|
"logits/rejected": -2.8668291568756104, |
|
"logps/chosen": -172.08924865722656, |
|
"logps/rejected": -195.8844451904297, |
|
"loss": 0.696, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5609222054481506, |
|
"rewards/margins": 0.05493137985467911, |
|
"rewards/rejected": -0.6158535480499268, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.994568909829368e-05, |
|
"logits/chosen": -2.892430305480957, |
|
"logits/rejected": -2.762629985809326, |
|
"logps/chosen": -216.95150756835938, |
|
"logps/rejected": -187.0184783935547, |
|
"loss": 0.7119, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7327225208282471, |
|
"rewards/margins": -0.021204425022006035, |
|
"rewards/rejected": -0.7115181684494019, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9943302990606684e-05, |
|
"logits/chosen": -2.7017452716827393, |
|
"logits/rejected": -2.7307963371276855, |
|
"logps/chosen": -198.5173797607422, |
|
"logps/rejected": -185.40316772460938, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6512372493743896, |
|
"rewards/margins": 0.10363547503948212, |
|
"rewards/rejected": -0.754872739315033, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.994086564723626e-05, |
|
"logits/chosen": -2.835409641265869, |
|
"logits/rejected": -2.8388915061950684, |
|
"logps/chosen": -173.46127319335938, |
|
"logps/rejected": -185.3079376220703, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6533925533294678, |
|
"rewards/margins": 0.033594585955142975, |
|
"rewards/rejected": -0.6869871616363525, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9938377073188905e-05, |
|
"logits/chosen": -2.9569547176361084, |
|
"logits/rejected": -2.9091203212738037, |
|
"logps/chosen": -201.7881317138672, |
|
"logps/rejected": -180.14285278320312, |
|
"loss": 0.7056, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7249323725700378, |
|
"rewards/margins": 0.014494583010673523, |
|
"rewards/rejected": -0.7394269704818726, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.993583727357638e-05, |
|
"logits/chosen": -2.6668853759765625, |
|
"logits/rejected": -2.694221258163452, |
|
"logps/chosen": -198.40594482421875, |
|
"logps/rejected": -201.02980041503906, |
|
"loss": 0.7579, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.652782678604126, |
|
"rewards/margins": -0.08612270653247833, |
|
"rewards/rejected": -0.5666600465774536, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.993324625361565e-05, |
|
"logits/chosen": -2.757725954055786, |
|
"logits/rejected": -2.7627735137939453, |
|
"logps/chosen": -200.16226196289062, |
|
"logps/rejected": -188.62083435058594, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44451814889907837, |
|
"rewards/margins": 0.1431855410337448, |
|
"rewards/rejected": -0.5877037048339844, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.993060401862888e-05, |
|
"logits/chosen": -2.7355546951293945, |
|
"logits/rejected": -2.7701869010925293, |
|
"logps/chosen": -170.37046813964844, |
|
"logps/rejected": -182.28440856933594, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5462682247161865, |
|
"rewards/margins": 0.08645598590373993, |
|
"rewards/rejected": -0.6327242255210876, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9927910574043465e-05, |
|
"logits/chosen": -2.893017530441284, |
|
"logits/rejected": -2.9069924354553223, |
|
"logps/chosen": -215.6539764404297, |
|
"logps/rejected": -256.93194580078125, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.635912299156189, |
|
"rewards/margins": 0.1641692817211151, |
|
"rewards/rejected": -0.8000816702842712, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.992516592539196e-05, |
|
"logits/chosen": -2.544395685195923, |
|
"logits/rejected": -2.563568353652954, |
|
"logps/chosen": -144.60275268554688, |
|
"logps/rejected": -150.6583709716797, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40625688433647156, |
|
"rewards/margins": 0.11070521920919418, |
|
"rewards/rejected": -0.5169621706008911, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9922370078312105e-05, |
|
"logits/chosen": -2.6519908905029297, |
|
"logits/rejected": -2.6390066146850586, |
|
"logps/chosen": -187.1649932861328, |
|
"logps/rejected": -163.97708129882812, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4761255383491516, |
|
"rewards/margins": 0.15067748725414276, |
|
"rewards/rejected": -0.6268030405044556, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.991952303854682e-05, |
|
"logits/chosen": -2.823216676712036, |
|
"logits/rejected": -2.8248276710510254, |
|
"logps/chosen": -171.64088439941406, |
|
"logps/rejected": -205.40994262695312, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.49252718687057495, |
|
"rewards/margins": 0.3139771521091461, |
|
"rewards/rejected": -0.8065043687820435, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9916624811944175e-05, |
|
"logits/chosen": -2.6669604778289795, |
|
"logits/rejected": -2.720827102661133, |
|
"logps/chosen": -145.63650512695312, |
|
"logps/rejected": -143.77822875976562, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4779016673564911, |
|
"rewards/margins": 0.07923712581396103, |
|
"rewards/rejected": -0.5571387410163879, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.991367540445735e-05, |
|
"logits/chosen": -2.780358076095581, |
|
"logits/rejected": -2.7683119773864746, |
|
"logps/chosen": -163.51113891601562, |
|
"logps/rejected": -147.0047149658203, |
|
"loss": 0.7295, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6608467698097229, |
|
"rewards/margins": -0.023487316444516182, |
|
"rewards/rejected": -0.6373594403266907, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.991067482214471e-05, |
|
"logits/chosen": -2.660963296890259, |
|
"logits/rejected": -2.6401174068450928, |
|
"logps/chosen": -173.09176635742188, |
|
"logps/rejected": -171.8109130859375, |
|
"loss": 0.7108, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6182070970535278, |
|
"rewards/margins": -0.01903488114476204, |
|
"rewards/rejected": -0.5991722345352173, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9907623071169686e-05, |
|
"logits/chosen": -2.7048563957214355, |
|
"logits/rejected": -2.5354063510894775, |
|
"logps/chosen": -228.5687255859375, |
|
"logps/rejected": -184.3094482421875, |
|
"loss": 0.7866, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7742022275924683, |
|
"rewards/margins": -0.1047876849770546, |
|
"rewards/rejected": -0.6694144606590271, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.990452015780085e-05, |
|
"logits/chosen": -2.678699016571045, |
|
"logits/rejected": -2.6732139587402344, |
|
"logps/chosen": -217.17123413085938, |
|
"logps/rejected": -211.92489624023438, |
|
"loss": 0.8004, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7689281702041626, |
|
"rewards/margins": -0.17873500287532806, |
|
"rewards/rejected": -0.5901932120323181, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9901366088411846e-05, |
|
"logits/chosen": -2.650327444076538, |
|
"logits/rejected": -2.6380615234375, |
|
"logps/chosen": -160.45660400390625, |
|
"logps/rejected": -149.87843322753906, |
|
"loss": 0.7334, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4278574585914612, |
|
"rewards/margins": -0.050989780575037, |
|
"rewards/rejected": -0.3768676817417145, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.98981608694814e-05, |
|
"logits/chosen": -2.636261463165283, |
|
"logits/rejected": -2.6120524406433105, |
|
"logps/chosen": -189.56497192382812, |
|
"logps/rejected": -184.8382568359375, |
|
"loss": 0.7106, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6862293481826782, |
|
"rewards/margins": 0.02107788249850273, |
|
"rewards/rejected": -0.7073072791099548, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9894904507593316e-05, |
|
"logits/chosen": -2.681283712387085, |
|
"logits/rejected": -2.6194002628326416, |
|
"logps/chosen": -158.73968505859375, |
|
"logps/rejected": -180.73330688476562, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4341495633125305, |
|
"rewards/margins": 0.13567671179771423, |
|
"rewards/rejected": -0.5698262453079224, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.989159700943643e-05, |
|
"logits/chosen": -2.822274923324585, |
|
"logits/rejected": -2.8219101428985596, |
|
"logps/chosen": -182.0315704345703, |
|
"logps/rejected": -193.2274169921875, |
|
"loss": 0.7126, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6531980037689209, |
|
"rewards/margins": 0.01201358437538147, |
|
"rewards/rejected": -0.6652116179466248, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.988823838180464e-05, |
|
"logits/chosen": -2.804276943206787, |
|
"logits/rejected": -2.8529317378997803, |
|
"logps/chosen": -183.54083251953125, |
|
"logps/rejected": -199.4310302734375, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5883633494377136, |
|
"rewards/margins": 0.03549729287624359, |
|
"rewards/rejected": -0.6238605976104736, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.988482863159684e-05, |
|
"logits/chosen": -2.7629952430725098, |
|
"logits/rejected": -2.852982759475708, |
|
"logps/chosen": -223.6051483154297, |
|
"logps/rejected": -215.63427734375, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5412927865982056, |
|
"rewards/margins": 0.22360366582870483, |
|
"rewards/rejected": -0.7648964524269104, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.988136776581696e-05, |
|
"logits/chosen": -2.696824789047241, |
|
"logits/rejected": -2.7131996154785156, |
|
"logps/chosen": -161.2986297607422, |
|
"logps/rejected": -180.24172973632812, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.47621679306030273, |
|
"rewards/margins": 0.07477270066738129, |
|
"rewards/rejected": -0.5509894490242004, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9877855791573915e-05, |
|
"logits/chosen": -2.5992307662963867, |
|
"logits/rejected": -2.5558767318725586, |
|
"logps/chosen": -177.31790161132812, |
|
"logps/rejected": -173.00013732910156, |
|
"loss": 0.7614, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6591533422470093, |
|
"rewards/margins": -0.10336636006832123, |
|
"rewards/rejected": -0.5557870268821716, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9874292716081595e-05, |
|
"logits/chosen": -2.480238914489746, |
|
"logits/rejected": -2.531926155090332, |
|
"logps/chosen": -173.81201171875, |
|
"logps/rejected": -177.76727294921875, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4206813871860504, |
|
"rewards/margins": 0.10889497399330139, |
|
"rewards/rejected": -0.5295763611793518, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9870678546658865e-05, |
|
"logits/chosen": -2.68393611907959, |
|
"logits/rejected": -2.8312528133392334, |
|
"logps/chosen": -238.86911010742188, |
|
"logps/rejected": -268.19732666015625, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5521610975265503, |
|
"rewards/margins": 0.11217445880174637, |
|
"rewards/rejected": -0.6643356084823608, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9867013290729535e-05, |
|
"logits/chosen": -2.580007314682007, |
|
"logits/rejected": -2.5705273151397705, |
|
"logps/chosen": -165.80308532714844, |
|
"logps/rejected": -203.9613800048828, |
|
"loss": 0.7206, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6492197513580322, |
|
"rewards/margins": 0.02221706137061119, |
|
"rewards/rejected": -0.6714367866516113, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.986329695582237e-05, |
|
"logits/chosen": -2.7853593826293945, |
|
"logits/rejected": -2.7307794094085693, |
|
"logps/chosen": -211.93991088867188, |
|
"logps/rejected": -200.86334228515625, |
|
"loss": 0.7051, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5223960876464844, |
|
"rewards/margins": 0.0009156223386526108, |
|
"rewards/rejected": -0.5233116745948792, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.985952954957103e-05, |
|
"logits/chosen": -2.6804401874542236, |
|
"logits/rejected": -2.6449456214904785, |
|
"logps/chosen": -187.6370391845703, |
|
"logps/rejected": -193.33245849609375, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.513522207736969, |
|
"rewards/margins": 0.04058818519115448, |
|
"rewards/rejected": -0.5541103482246399, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.985571107971408e-05, |
|
"logits/chosen": -2.622426986694336, |
|
"logits/rejected": -2.618734836578369, |
|
"logps/chosen": -152.2515869140625, |
|
"logps/rejected": -168.34747314453125, |
|
"loss": 0.7282, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5195332765579224, |
|
"rewards/margins": -0.02337510883808136, |
|
"rewards/rejected": -0.4961581528186798, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9851841554095e-05, |
|
"logits/chosen": -2.6712746620178223, |
|
"logits/rejected": -2.6609811782836914, |
|
"logps/chosen": -198.9906005859375, |
|
"logps/rejected": -164.44154357910156, |
|
"loss": 0.7105, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4613041877746582, |
|
"rewards/margins": -0.0029089637100696564, |
|
"rewards/rejected": -0.45839521288871765, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9847920980662134e-05, |
|
"logits/chosen": -2.6356289386749268, |
|
"logits/rejected": -2.6573214530944824, |
|
"logps/chosen": -175.52487182617188, |
|
"logps/rejected": -187.29832458496094, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40463200211524963, |
|
"rewards/margins": 0.12262441217899323, |
|
"rewards/rejected": -0.527256429195404, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.984394936746865e-05, |
|
"logits/chosen": -2.357494354248047, |
|
"logits/rejected": -2.411952018737793, |
|
"logps/chosen": -139.59608459472656, |
|
"logps/rejected": -156.5337371826172, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4118153154850006, |
|
"rewards/margins": 0.04770222678780556, |
|
"rewards/rejected": -0.45951756834983826, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.98399267226726e-05, |
|
"logits/chosen": -2.5636932849884033, |
|
"logits/rejected": -2.6556129455566406, |
|
"logps/chosen": -175.670166015625, |
|
"logps/rejected": -179.19627380371094, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6697508692741394, |
|
"rewards/margins": 0.1040661484003067, |
|
"rewards/rejected": -0.7738169431686401, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9835853054536846e-05, |
|
"logits/chosen": -2.5892560482025146, |
|
"logits/rejected": -2.579235315322876, |
|
"logps/chosen": -168.14564514160156, |
|
"logps/rejected": -163.5547332763672, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5034769177436829, |
|
"rewards/margins": 0.1633673459291458, |
|
"rewards/rejected": -0.6668442487716675, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9831728371429046e-05, |
|
"logits/chosen": -2.5526325702667236, |
|
"logits/rejected": -2.602790355682373, |
|
"logps/chosen": -167.693115234375, |
|
"logps/rejected": -191.10301208496094, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5507184267044067, |
|
"rewards/margins": 0.07049673795700073, |
|
"rewards/rejected": -0.6212151646614075, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.982755268182164e-05, |
|
"logits/chosen": -2.581120729446411, |
|
"logits/rejected": -2.61881947517395, |
|
"logps/chosen": -176.85264587402344, |
|
"logps/rejected": -202.72483825683594, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6271121501922607, |
|
"rewards/margins": 0.17561408877372742, |
|
"rewards/rejected": -0.8027262687683105, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.982332599429187e-05, |
|
"logits/chosen": -2.5083706378936768, |
|
"logits/rejected": -2.5868587493896484, |
|
"logps/chosen": -145.9921112060547, |
|
"logps/rejected": -150.42713928222656, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.45637643337249756, |
|
"rewards/margins": 0.06362758576869965, |
|
"rewards/rejected": -0.5200040340423584, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.981904831752171e-05, |
|
"logits/chosen": -2.563215970993042, |
|
"logits/rejected": -2.6187920570373535, |
|
"logps/chosen": -147.48265075683594, |
|
"logps/rejected": -155.67405700683594, |
|
"loss": 0.7287, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5919477939605713, |
|
"rewards/margins": -0.029735613614320755, |
|
"rewards/rejected": -0.5622121691703796, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.981471966029787e-05, |
|
"logits/chosen": -2.447539806365967, |
|
"logits/rejected": -2.4295990467071533, |
|
"logps/chosen": -153.93881225585938, |
|
"logps/rejected": -169.62408447265625, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6545721292495728, |
|
"rewards/margins": 0.11100000143051147, |
|
"rewards/rejected": -0.7655720710754395, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.981034003151178e-05, |
|
"logits/chosen": -2.4045794010162354, |
|
"logits/rejected": -2.446890354156494, |
|
"logps/chosen": -134.2223358154297, |
|
"logps/rejected": -149.72105407714844, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.46206632256507874, |
|
"rewards/margins": 0.19417575001716614, |
|
"rewards/rejected": -0.6562421321868896, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.980590944015958e-05, |
|
"logits/chosen": -2.68265700340271, |
|
"logits/rejected": -2.667114496231079, |
|
"logps/chosen": -167.9902801513672, |
|
"logps/rejected": -171.3512420654297, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5563911199569702, |
|
"rewards/margins": 0.08469439297914505, |
|
"rewards/rejected": -0.6410855054855347, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.98014278953421e-05, |
|
"logits/chosen": -2.5728375911712646, |
|
"logits/rejected": -2.681403875350952, |
|
"logps/chosen": -159.7633056640625, |
|
"logps/rejected": -211.886962890625, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.531548023223877, |
|
"rewards/margins": 0.1921529322862625, |
|
"rewards/rejected": -0.7237009406089783, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.979689540626479e-05, |
|
"logits/chosen": -2.324286937713623, |
|
"logits/rejected": -2.453277349472046, |
|
"logps/chosen": -168.53738403320312, |
|
"logps/rejected": -180.93719482421875, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4037819802761078, |
|
"rewards/margins": 0.17486746609210968, |
|
"rewards/rejected": -0.5786494016647339, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9792311982237774e-05, |
|
"logits/chosen": -2.773432493209839, |
|
"logits/rejected": -2.74585223197937, |
|
"logps/chosen": -157.39044189453125, |
|
"logps/rejected": -167.32725524902344, |
|
"loss": 0.6255, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5002456307411194, |
|
"rewards/margins": 0.22034718096256256, |
|
"rewards/rejected": -0.7205928564071655, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9787677632675825e-05, |
|
"logits/chosen": -2.6729888916015625, |
|
"logits/rejected": -2.7148032188415527, |
|
"logps/chosen": -162.77774047851562, |
|
"logps/rejected": -221.85386657714844, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6836182475090027, |
|
"rewards/margins": 0.14588770270347595, |
|
"rewards/rejected": -0.829505980014801, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.978299236709826e-05, |
|
"logits/chosen": -2.556713581085205, |
|
"logits/rejected": -2.5743775367736816, |
|
"logps/chosen": -197.9322052001953, |
|
"logps/rejected": -203.74917602539062, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7575306296348572, |
|
"rewards/margins": 0.05150505527853966, |
|
"rewards/rejected": -0.80903559923172, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.977825619512904e-05, |
|
"logits/chosen": -2.390803813934326, |
|
"logits/rejected": -2.551340341567993, |
|
"logps/chosen": -152.84097290039062, |
|
"logps/rejected": -192.7227325439453, |
|
"loss": 0.7108, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5909304618835449, |
|
"rewards/margins": -0.009836459532380104, |
|
"rewards/rejected": -0.581093966960907, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.977346912649666e-05, |
|
"logits/chosen": -2.451486587524414, |
|
"logits/rejected": -2.423938751220703, |
|
"logps/chosen": -208.7131805419922, |
|
"logps/rejected": -195.0940399169922, |
|
"loss": 0.7075, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5515446066856384, |
|
"rewards/margins": 0.014907769858837128, |
|
"rewards/rejected": -0.5664523839950562, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9768631171034175e-05, |
|
"logits/chosen": -2.4102437496185303, |
|
"logits/rejected": -2.524508237838745, |
|
"logps/chosen": -171.2530975341797, |
|
"logps/rejected": -197.77008056640625, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6411187648773193, |
|
"rewards/margins": 0.20387138426303864, |
|
"rewards/rejected": -0.8449901342391968, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9763742338679145e-05, |
|
"logits/chosen": -2.6650915145874023, |
|
"logits/rejected": -2.5682055950164795, |
|
"logps/chosen": -280.52142333984375, |
|
"logps/rejected": -250.82424926757812, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7708845138549805, |
|
"rewards/margins": 0.03139163926243782, |
|
"rewards/rejected": -0.8022761344909668, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.975880263947367e-05, |
|
"logits/chosen": -2.66872239112854, |
|
"logits/rejected": -2.6272640228271484, |
|
"logps/chosen": -206.67318725585938, |
|
"logps/rejected": -169.04757690429688, |
|
"loss": 0.725, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7810104489326477, |
|
"rewards/margins": -0.02610369399189949, |
|
"rewards/rejected": -0.7549068331718445, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9753812083564304e-05, |
|
"logits/chosen": -2.4464945793151855, |
|
"logits/rejected": -2.4812588691711426, |
|
"logps/chosen": -153.33660888671875, |
|
"logps/rejected": -176.61399841308594, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.44514474272727966, |
|
"rewards/margins": 0.24550259113311768, |
|
"rewards/rejected": -0.6906473636627197, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.974877068120208e-05, |
|
"logits/chosen": -2.635669231414795, |
|
"logits/rejected": -2.648510456085205, |
|
"logps/chosen": -182.48388671875, |
|
"logps/rejected": -196.19873046875, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6389026641845703, |
|
"rewards/margins": 0.10669447481632233, |
|
"rewards/rejected": -0.7455971240997314, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.974367844274248e-05, |
|
"logits/chosen": -2.5759198665618896, |
|
"logits/rejected": -2.723337173461914, |
|
"logps/chosen": -179.0137939453125, |
|
"logps/rejected": -255.97052001953125, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4965980052947998, |
|
"rewards/margins": 0.18533286452293396, |
|
"rewards/rejected": -0.6819308996200562, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.973853537864538e-05, |
|
"logits/chosen": -2.7438008785247803, |
|
"logits/rejected": -2.8121700286865234, |
|
"logps/chosen": -160.9208221435547, |
|
"logps/rejected": -169.40225219726562, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6913070678710938, |
|
"rewards/margins": 0.10465458035469055, |
|
"rewards/rejected": -0.7959617376327515, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.973334149947508e-05, |
|
"logits/chosen": -2.70800518989563, |
|
"logits/rejected": -2.6374926567077637, |
|
"logps/chosen": -183.4274444580078, |
|
"logps/rejected": -180.7521209716797, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7033702731132507, |
|
"rewards/margins": 0.2224593162536621, |
|
"rewards/rejected": -0.9258295893669128, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.972809681590026e-05, |
|
"logits/chosen": -2.66047739982605, |
|
"logits/rejected": -2.710866928100586, |
|
"logps/chosen": -194.5672607421875, |
|
"logps/rejected": -210.85208129882812, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8557997345924377, |
|
"rewards/margins": 0.16004985570907593, |
|
"rewards/rejected": -1.0158495903015137, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.972280133869396e-05, |
|
"logits/chosen": -2.433838129043579, |
|
"logits/rejected": -2.564758539199829, |
|
"logps/chosen": -171.2584686279297, |
|
"logps/rejected": -213.25494384765625, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.43924885988235474, |
|
"rewards/margins": 0.2442169040441513, |
|
"rewards/rejected": -0.6834657788276672, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.971745507873352e-05, |
|
"logits/chosen": -2.681500196456909, |
|
"logits/rejected": -2.6873316764831543, |
|
"logps/chosen": -150.2528839111328, |
|
"logps/rejected": -154.3059844970703, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7566659450531006, |
|
"rewards/margins": 0.23124736547470093, |
|
"rewards/rejected": -0.9879133701324463, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.971205804700063e-05, |
|
"logits/chosen": -2.464470624923706, |
|
"logits/rejected": -2.3958442211151123, |
|
"logps/chosen": -293.574462890625, |
|
"logps/rejected": -252.32489013671875, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5428895354270935, |
|
"rewards/margins": 0.14122185111045837, |
|
"rewards/rejected": -0.6841113567352295, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.970661025458125e-05, |
|
"logits/chosen": -2.5775954723358154, |
|
"logits/rejected": -2.6041054725646973, |
|
"logps/chosen": -170.66627502441406, |
|
"logps/rejected": -163.29644775390625, |
|
"loss": 0.721, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9231572151184082, |
|
"rewards/margins": 0.11258751899003983, |
|
"rewards/rejected": -1.0357446670532227, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9701111712665625e-05, |
|
"logits/chosen": -2.6646294593811035, |
|
"logits/rejected": -2.7594456672668457, |
|
"logps/chosen": -200.36981201171875, |
|
"logps/rejected": -186.59059143066406, |
|
"loss": 0.7514, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.3253411054611206, |
|
"rewards/margins": -0.0334821492433548, |
|
"rewards/rejected": -1.2918590307235718, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.969556243254822e-05, |
|
"logits/chosen": -2.5144646167755127, |
|
"logits/rejected": -2.594902992248535, |
|
"logps/chosen": -147.14231872558594, |
|
"logps/rejected": -176.36328125, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6609407067298889, |
|
"rewards/margins": 0.12413067370653152, |
|
"rewards/rejected": -0.7850713729858398, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.968996242562774e-05, |
|
"logits/chosen": -2.6077287197113037, |
|
"logits/rejected": -2.6607818603515625, |
|
"logps/chosen": -199.4670867919922, |
|
"logps/rejected": -201.5868377685547, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8754231333732605, |
|
"rewards/margins": 0.13014619052410126, |
|
"rewards/rejected": -1.005569338798523, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.968431170340706e-05, |
|
"logits/chosen": -2.740494966506958, |
|
"logits/rejected": -2.620009660720825, |
|
"logps/chosen": -210.96929931640625, |
|
"logps/rejected": -204.82090759277344, |
|
"loss": 0.7721, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2561805248260498, |
|
"rewards/margins": -0.05324437841773033, |
|
"rewards/rejected": -1.2029361724853516, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9678610277493275e-05, |
|
"logits/chosen": -2.6105682849884033, |
|
"logits/rejected": -2.5892579555511475, |
|
"logps/chosen": -198.35635375976562, |
|
"logps/rejected": -207.89016723632812, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.989006757736206, |
|
"rewards/margins": 0.060812097042798996, |
|
"rewards/rejected": -1.0498188734054565, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.967285815959759e-05, |
|
"logits/chosen": -2.714409589767456, |
|
"logits/rejected": -2.7895750999450684, |
|
"logps/chosen": -208.87754821777344, |
|
"logps/rejected": -222.0183563232422, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8426048159599304, |
|
"rewards/margins": 0.3045133650302887, |
|
"rewards/rejected": -1.1471182107925415, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9667055361535354e-05, |
|
"logits/chosen": -2.748204231262207, |
|
"logits/rejected": -2.832871675491333, |
|
"logps/chosen": -201.07078552246094, |
|
"logps/rejected": -212.4540252685547, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2435117959976196, |
|
"rewards/margins": 0.14082738757133484, |
|
"rewards/rejected": -1.384339451789856, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9661201895226e-05, |
|
"logits/chosen": -2.7127251625061035, |
|
"logits/rejected": -2.751798629760742, |
|
"logps/chosen": -220.7108154296875, |
|
"logps/rejected": -195.08290100097656, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7872539758682251, |
|
"rewards/margins": 0.12220478057861328, |
|
"rewards/rejected": -0.9094586968421936, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.965529777269306e-05, |
|
"logits/chosen": -2.6204776763916016, |
|
"logits/rejected": -2.664301872253418, |
|
"logps/chosen": -166.66172790527344, |
|
"logps/rejected": -205.79847717285156, |
|
"loss": 0.7922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9887948036193848, |
|
"rewards/margins": -0.06140782684087753, |
|
"rewards/rejected": -0.927386999130249, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.964934300606411e-05, |
|
"logits/chosen": -2.48382830619812, |
|
"logits/rejected": -2.493025541305542, |
|
"logps/chosen": -169.9669189453125, |
|
"logps/rejected": -181.6856689453125, |
|
"loss": 0.7197, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6650858521461487, |
|
"rewards/margins": 0.017848990857601166, |
|
"rewards/rejected": -0.6829348802566528, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.964333760757074e-05, |
|
"logits/chosen": -2.648463726043701, |
|
"logits/rejected": -2.6576759815216064, |
|
"logps/chosen": -178.34747314453125, |
|
"logps/rejected": -188.2498779296875, |
|
"loss": 0.6239, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.95872962474823, |
|
"rewards/margins": 0.26567110419273376, |
|
"rewards/rejected": -1.2244007587432861, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.963728158954856e-05, |
|
"logits/chosen": -2.9130921363830566, |
|
"logits/rejected": -2.894216299057007, |
|
"logps/chosen": -221.04931640625, |
|
"logps/rejected": -237.8624725341797, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.084302306175232, |
|
"rewards/margins": 0.13000546395778656, |
|
"rewards/rejected": -1.2143077850341797, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.963117496443715e-05, |
|
"logits/chosen": -2.68157958984375, |
|
"logits/rejected": -2.8279314041137695, |
|
"logps/chosen": -165.5657196044922, |
|
"logps/rejected": -202.75865173339844, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7596678733825684, |
|
"rewards/margins": 0.2009599506855011, |
|
"rewards/rejected": -0.9606277942657471, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9625017744780045e-05, |
|
"logits/chosen": -2.614312171936035, |
|
"logits/rejected": -2.6585357189178467, |
|
"logps/chosen": -216.45230102539062, |
|
"logps/rejected": -195.06643676757812, |
|
"loss": 0.7323, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9276168346405029, |
|
"rewards/margins": -0.02379007264971733, |
|
"rewards/rejected": -0.9038268327713013, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.96188099432247e-05, |
|
"logits/chosen": -2.7345945835113525, |
|
"logits/rejected": -2.702479362487793, |
|
"logps/chosen": -227.8175048828125, |
|
"logps/rejected": -238.62513732910156, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9885910749435425, |
|
"rewards/margins": 0.07757923752069473, |
|
"rewards/rejected": -1.0661702156066895, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9612551572522464e-05, |
|
"logits/chosen": -2.6463451385498047, |
|
"logits/rejected": -2.6843183040618896, |
|
"logps/chosen": -150.67254638671875, |
|
"logps/rejected": -155.01316833496094, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0718083381652832, |
|
"rewards/margins": 0.0895613357424736, |
|
"rewards/rejected": -1.1613696813583374, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.960624264552858e-05, |
|
"logits/chosen": -2.6091978549957275, |
|
"logits/rejected": -2.6224098205566406, |
|
"logps/chosen": -134.08544921875, |
|
"logps/rejected": -145.00137329101562, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6298830509185791, |
|
"rewards/margins": 0.1894720196723938, |
|
"rewards/rejected": -0.8193551301956177, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9599883175202124e-05, |
|
"logits/chosen": -2.689610004425049, |
|
"logits/rejected": -2.641515016555786, |
|
"logps/chosen": -175.27386474609375, |
|
"logps/rejected": -176.47059631347656, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8815383911132812, |
|
"rewards/margins": 0.05579657852649689, |
|
"rewards/rejected": -0.9373350143432617, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9593473174605974e-05, |
|
"logits/chosen": -2.673809051513672, |
|
"logits/rejected": -2.6921756267547607, |
|
"logps/chosen": -210.48333740234375, |
|
"logps/rejected": -222.2653045654297, |
|
"loss": 0.7605, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.9074676036834717, |
|
"rewards/margins": -0.08400504291057587, |
|
"rewards/rejected": -0.8234625458717346, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.958701265690685e-05, |
|
"logits/chosen": -2.608705759048462, |
|
"logits/rejected": -2.622281074523926, |
|
"logps/chosen": -194.5504150390625, |
|
"logps/rejected": -197.58035278320312, |
|
"loss": 0.7456, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.1094239950180054, |
|
"rewards/margins": -0.014511600136756897, |
|
"rewards/rejected": -1.0949124097824097, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.958050163537519e-05, |
|
"logits/chosen": -2.598935127258301, |
|
"logits/rejected": -2.648134231567383, |
|
"logps/chosen": -135.84756469726562, |
|
"logps/rejected": -160.54318237304688, |
|
"loss": 0.7163, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7799862623214722, |
|
"rewards/margins": 0.07278753817081451, |
|
"rewards/rejected": -0.8527737855911255, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.957394012338519e-05, |
|
"logits/chosen": -2.5633938312530518, |
|
"logits/rejected": -2.528005838394165, |
|
"logps/chosen": -179.5242919921875, |
|
"logps/rejected": -175.9528350830078, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6330910325050354, |
|
"rewards/margins": 0.09188088774681091, |
|
"rewards/rejected": -0.7249718904495239, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.956732813441477e-05, |
|
"logits/chosen": -2.681288957595825, |
|
"logits/rejected": -2.742069959640503, |
|
"logps/chosen": -157.13189697265625, |
|
"logps/rejected": -170.25006103515625, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7350885272026062, |
|
"rewards/margins": 0.18710875511169434, |
|
"rewards/rejected": -0.9221972227096558, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.956066568204552e-05, |
|
"logits/chosen": -2.6132476329803467, |
|
"logits/rejected": -2.6484899520874023, |
|
"logps/chosen": -175.98236083984375, |
|
"logps/rejected": -185.96463012695312, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6415359973907471, |
|
"rewards/margins": 0.3225414752960205, |
|
"rewards/rejected": -0.9640775322914124, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.955395277996268e-05, |
|
"logits/chosen": -2.654163122177124, |
|
"logits/rejected": -2.6380820274353027, |
|
"logps/chosen": -191.48721313476562, |
|
"logps/rejected": -160.77561950683594, |
|
"loss": 0.724, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.868155837059021, |
|
"rewards/margins": 0.054594431072473526, |
|
"rewards/rejected": -0.9227503538131714, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.954718944195512e-05, |
|
"logits/chosen": -2.6072440147399902, |
|
"logits/rejected": -2.6623966693878174, |
|
"logps/chosen": -184.31289672851562, |
|
"logps/rejected": -173.9798126220703, |
|
"loss": 0.7546, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8332484364509583, |
|
"rewards/margins": -0.08707739412784576, |
|
"rewards/rejected": -0.7461711168289185, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.954037568191534e-05, |
|
"logits/chosen": -2.6110591888427734, |
|
"logits/rejected": -2.568448781967163, |
|
"logps/chosen": -222.3007049560547, |
|
"logps/rejected": -193.83935546875, |
|
"loss": 0.769, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9347423315048218, |
|
"rewards/margins": -0.09616127610206604, |
|
"rewards/rejected": -0.8385810256004333, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9533511513839384e-05, |
|
"logits/chosen": -2.7532308101654053, |
|
"logits/rejected": -2.7335729598999023, |
|
"logps/chosen": -218.836669921875, |
|
"logps/rejected": -247.48204040527344, |
|
"loss": 0.7134, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9885715246200562, |
|
"rewards/margins": 0.15268389880657196, |
|
"rewards/rejected": -1.1412553787231445, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9526596951826824e-05, |
|
"logits/chosen": -2.6721489429473877, |
|
"logits/rejected": -2.6597042083740234, |
|
"logps/chosen": -187.82127380371094, |
|
"logps/rejected": -174.8271026611328, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5805238485336304, |
|
"rewards/margins": 0.22618348896503448, |
|
"rewards/rejected": -0.8067073225975037, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.951963201008076e-05, |
|
"logits/chosen": -2.855266571044922, |
|
"logits/rejected": -2.8631551265716553, |
|
"logps/chosen": -243.0494384765625, |
|
"logps/rejected": -228.80966186523438, |
|
"loss": 0.7247, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.934760332107544, |
|
"rewards/margins": 0.034967467188835144, |
|
"rewards/rejected": -0.9697277545928955, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.951261670290781e-05, |
|
"logits/chosen": -2.664848566055298, |
|
"logits/rejected": -2.730018138885498, |
|
"logps/chosen": -192.22723388671875, |
|
"logps/rejected": -171.22459411621094, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6986839771270752, |
|
"rewards/margins": 0.09998993575572968, |
|
"rewards/rejected": -0.7986739277839661, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.950555104471799e-05, |
|
"logits/chosen": -2.6024093627929688, |
|
"logits/rejected": -2.6131458282470703, |
|
"logps/chosen": -164.9868927001953, |
|
"logps/rejected": -153.97813415527344, |
|
"loss": 0.7123, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7849129438400269, |
|
"rewards/margins": 0.029222920536994934, |
|
"rewards/rejected": -0.8141359090805054, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.949843505002477e-05, |
|
"logits/chosen": -2.509110689163208, |
|
"logits/rejected": -2.5836997032165527, |
|
"logps/chosen": -156.37550354003906, |
|
"logps/rejected": -177.84518432617188, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.40633201599121094, |
|
"rewards/margins": 0.2858356237411499, |
|
"rewards/rejected": -0.6921676397323608, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9491268733445034e-05, |
|
"logits/chosen": -2.5929789543151855, |
|
"logits/rejected": -2.593715190887451, |
|
"logps/chosen": -159.74212646484375, |
|
"logps/rejected": -177.87425231933594, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4330030679702759, |
|
"rewards/margins": 0.15460006892681122, |
|
"rewards/rejected": -0.5876031517982483, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9484052109698984e-05, |
|
"logits/chosen": -2.581435441970825, |
|
"logits/rejected": -2.598817825317383, |
|
"logps/chosen": -169.76776123046875, |
|
"logps/rejected": -170.72422790527344, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5581039786338806, |
|
"rewards/margins": 0.1779794692993164, |
|
"rewards/rejected": -0.736083447933197, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.947678519361021e-05, |
|
"logits/chosen": -2.5178732872009277, |
|
"logits/rejected": -2.5693912506103516, |
|
"logps/chosen": -173.69699096679688, |
|
"logps/rejected": -174.0066680908203, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5450050830841064, |
|
"rewards/margins": 0.11375146359205246, |
|
"rewards/rejected": -0.6587565541267395, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.946946800010556e-05, |
|
"logits/chosen": -2.5523407459259033, |
|
"logits/rejected": -2.537888288497925, |
|
"logps/chosen": -175.92843627929688, |
|
"logps/rejected": -160.6066436767578, |
|
"loss": 0.7642, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7941773533821106, |
|
"rewards/margins": -0.09637541323900223, |
|
"rewards/rejected": -0.697801947593689, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.946210054421518e-05, |
|
"logits/chosen": -2.688391923904419, |
|
"logits/rejected": -2.702990770339966, |
|
"logps/chosen": -222.63352966308594, |
|
"logps/rejected": -215.47323608398438, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6142177581787109, |
|
"rewards/margins": 0.2073356807231903, |
|
"rewards/rejected": -0.8215534687042236, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.945468284107246e-05, |
|
"logits/chosen": -2.5451714992523193, |
|
"logits/rejected": -2.5484871864318848, |
|
"logps/chosen": -196.5067138671875, |
|
"logps/rejected": -190.59320068359375, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5240797996520996, |
|
"rewards/margins": 0.11533726006746292, |
|
"rewards/rejected": -0.6394170522689819, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.944721490591401e-05, |
|
"logits/chosen": -2.472393274307251, |
|
"logits/rejected": -2.587679386138916, |
|
"logps/chosen": -165.8987274169922, |
|
"logps/rejected": -188.1234130859375, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4879373013973236, |
|
"rewards/margins": 0.2605348229408264, |
|
"rewards/rejected": -0.7484720349311829, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9439696754079595e-05, |
|
"logits/chosen": -2.800347089767456, |
|
"logits/rejected": -2.7822184562683105, |
|
"logps/chosen": -248.7989044189453, |
|
"logps/rejected": -214.1162567138672, |
|
"loss": 0.8436, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8426647782325745, |
|
"rewards/margins": -0.20171405375003815, |
|
"rewards/rejected": -0.6409507393836975, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9432128401012144e-05, |
|
"logits/chosen": -2.5632007122039795, |
|
"logits/rejected": -2.5047194957733154, |
|
"logps/chosen": -185.19070434570312, |
|
"logps/rejected": -204.64364624023438, |
|
"loss": 0.7785, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7219403386116028, |
|
"rewards/margins": -0.07751601189374924, |
|
"rewards/rejected": -0.6444243788719177, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9424509862257706e-05, |
|
"logits/chosen": -2.529867649078369, |
|
"logits/rejected": -2.5607314109802246, |
|
"logps/chosen": -219.9513397216797, |
|
"logps/rejected": -240.11671447753906, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4247688055038452, |
|
"rewards/margins": 0.10957615077495575, |
|
"rewards/rejected": -0.5343449115753174, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.941684115346541e-05, |
|
"logits/chosen": -2.7805376052856445, |
|
"logits/rejected": -2.837836742401123, |
|
"logps/chosen": -177.21543884277344, |
|
"logps/rejected": -213.7078094482422, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5495901107788086, |
|
"rewards/margins": 0.268564373254776, |
|
"rewards/rejected": -0.8181545734405518, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.940912229038745e-05, |
|
"logits/chosen": -2.56017804145813, |
|
"logits/rejected": -2.5503697395324707, |
|
"logps/chosen": -170.02735900878906, |
|
"logps/rejected": -161.25509643554688, |
|
"loss": 0.7194, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5123621225357056, |
|
"rewards/margins": -0.0268712155520916, |
|
"rewards/rejected": -0.4854908883571625, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9401353288879024e-05, |
|
"logits/chosen": -2.5448572635650635, |
|
"logits/rejected": -2.56659197807312, |
|
"logps/chosen": -169.89077758789062, |
|
"logps/rejected": -190.93545532226562, |
|
"loss": 0.6295, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4579932391643524, |
|
"rewards/margins": 0.1804841011762619, |
|
"rewards/rejected": -0.6384773254394531, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9393534164898335e-05, |
|
"logits/chosen": -2.55391526222229, |
|
"logits/rejected": -2.5868449211120605, |
|
"logps/chosen": -158.18197631835938, |
|
"logps/rejected": -197.00271606445312, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6932105422019958, |
|
"rewards/margins": 0.13315898180007935, |
|
"rewards/rejected": -0.8263695240020752, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9385664934506526e-05, |
|
"logits/chosen": -2.554259777069092, |
|
"logits/rejected": -2.684239625930786, |
|
"logps/chosen": -159.8101348876953, |
|
"logps/rejected": -183.30532836914062, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5770647525787354, |
|
"rewards/margins": 0.14322030544281006, |
|
"rewards/rejected": -0.7202850580215454, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.937774561386768e-05, |
|
"logits/chosen": -2.5128979682922363, |
|
"logits/rejected": -2.6388025283813477, |
|
"logps/chosen": -174.58401489257812, |
|
"logps/rejected": -184.96910095214844, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5345829129219055, |
|
"rewards/margins": 0.3504784107208252, |
|
"rewards/rejected": -0.8850612640380859, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.936977621924875e-05, |
|
"logits/chosen": -2.6937482357025146, |
|
"logits/rejected": -2.650275707244873, |
|
"logps/chosen": -165.79302978515625, |
|
"logps/rejected": -177.26170349121094, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.614943265914917, |
|
"rewards/margins": 0.14949731528759003, |
|
"rewards/rejected": -0.7644405961036682, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9361756767019564e-05, |
|
"logits/chosen": -2.5641212463378906, |
|
"logits/rejected": -2.6175105571746826, |
|
"logps/chosen": -182.44837951660156, |
|
"logps/rejected": -225.40805053710938, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6991927027702332, |
|
"rewards/margins": 0.0908452570438385, |
|
"rewards/rejected": -0.790037989616394, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.935368727365276e-05, |
|
"logits/chosen": -2.6357033252716064, |
|
"logits/rejected": -2.57110857963562, |
|
"logps/chosen": -182.70291137695312, |
|
"logps/rejected": -223.89857482910156, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7485102415084839, |
|
"rewards/margins": 0.1697012186050415, |
|
"rewards/rejected": -0.9182114601135254, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.934556775572377e-05, |
|
"logits/chosen": -2.6168265342712402, |
|
"logits/rejected": -2.6644468307495117, |
|
"logps/chosen": -165.17758178710938, |
|
"logps/rejected": -188.50119018554688, |
|
"loss": 0.7554, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.5862306356430054, |
|
"rewards/margins": -0.08211595565080643, |
|
"rewards/rejected": -0.5041146874427795, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9337398229910784e-05, |
|
"logits/chosen": -2.595906972885132, |
|
"logits/rejected": -2.594231128692627, |
|
"logps/chosen": -188.159912109375, |
|
"logps/rejected": -176.15074157714844, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6705946922302246, |
|
"rewards/margins": 0.03840280696749687, |
|
"rewards/rejected": -0.7089974284172058, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.932917871299471e-05, |
|
"logits/chosen": -2.672065496444702, |
|
"logits/rejected": -2.6840691566467285, |
|
"logps/chosen": -173.274169921875, |
|
"logps/rejected": -183.5944061279297, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.48323020339012146, |
|
"rewards/margins": 0.2292642742395401, |
|
"rewards/rejected": -0.7124944925308228, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9320909221859134e-05, |
|
"logits/chosen": -2.660583972930908, |
|
"logits/rejected": -2.7142574787139893, |
|
"logps/chosen": -192.77145385742188, |
|
"logps/rejected": -185.00450134277344, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7117425799369812, |
|
"rewards/margins": 0.10454593598842621, |
|
"rewards/rejected": -0.8162885308265686, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9312589773490304e-05, |
|
"logits/chosen": -2.5676193237304688, |
|
"logits/rejected": -2.510430335998535, |
|
"logps/chosen": -176.65432739257812, |
|
"logps/rejected": -154.3079833984375, |
|
"loss": 0.7678, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8498928546905518, |
|
"rewards/margins": -0.05464668944478035, |
|
"rewards/rejected": -0.7952461242675781, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.930422038497708e-05, |
|
"logits/chosen": -2.5558042526245117, |
|
"logits/rejected": -2.672940492630005, |
|
"logps/chosen": -173.69500732421875, |
|
"logps/rejected": -180.78118896484375, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5608630776405334, |
|
"rewards/margins": 0.38767609000205994, |
|
"rewards/rejected": -0.948539137840271, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.92958010735109e-05, |
|
"logits/chosen": -2.508685350418091, |
|
"logits/rejected": -2.4861252307891846, |
|
"logps/chosen": -196.0287628173828, |
|
"logps/rejected": -212.8455810546875, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6239847540855408, |
|
"rewards/margins": 0.18039949238300323, |
|
"rewards/rejected": -0.8043842315673828, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.928733185638575e-05, |
|
"logits/chosen": -2.641526222229004, |
|
"logits/rejected": -2.630765199661255, |
|
"logps/chosen": -205.0409698486328, |
|
"logps/rejected": -201.60269165039062, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7301749587059021, |
|
"rewards/margins": 0.1289985626935959, |
|
"rewards/rejected": -0.859173595905304, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.927881275099815e-05, |
|
"logits/chosen": -2.5749480724334717, |
|
"logits/rejected": -2.5817878246307373, |
|
"logps/chosen": -187.1511688232422, |
|
"logps/rejected": -236.642822265625, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6212616562843323, |
|
"rewards/margins": 0.13300594687461853, |
|
"rewards/rejected": -0.7542675733566284, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.927024377484705e-05, |
|
"logits/chosen": -2.6451222896575928, |
|
"logits/rejected": -2.700425863265991, |
|
"logps/chosen": -176.2154083251953, |
|
"logps/rejected": -211.41744995117188, |
|
"loss": 0.7046, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8719490766525269, |
|
"rewards/margins": 0.05133984610438347, |
|
"rewards/rejected": -0.9232889413833618, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9261624945533855e-05, |
|
"logits/chosen": -2.5780887603759766, |
|
"logits/rejected": -2.669583320617676, |
|
"logps/chosen": -181.3977813720703, |
|
"logps/rejected": -242.43626403808594, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6802850365638733, |
|
"rewards/margins": 0.029887204989790916, |
|
"rewards/rejected": -0.7101722955703735, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.925295628076241e-05, |
|
"logits/chosen": -2.6090972423553467, |
|
"logits/rejected": -2.683279275894165, |
|
"logps/chosen": -192.7779541015625, |
|
"logps/rejected": -241.75607299804688, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7047219276428223, |
|
"rewards/margins": 0.2544190287590027, |
|
"rewards/rejected": -0.9591410160064697, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9244237798338866e-05, |
|
"logits/chosen": -2.7260890007019043, |
|
"logits/rejected": -2.7258718013763428, |
|
"logps/chosen": -206.13421630859375, |
|
"logps/rejected": -211.11663818359375, |
|
"loss": 0.7085, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.855944037437439, |
|
"rewards/margins": 0.07277999073266983, |
|
"rewards/rejected": -0.9287241101264954, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.923546951617175e-05, |
|
"logits/chosen": -2.648552417755127, |
|
"logits/rejected": -2.5934343338012695, |
|
"logps/chosen": -170.73721313476562, |
|
"logps/rejected": -186.487548828125, |
|
"loss": 0.6374, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7491826415061951, |
|
"rewards/margins": 0.1971735656261444, |
|
"rewards/rejected": -0.9463562369346619, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.922665145227187e-05, |
|
"logits/chosen": -2.5815610885620117, |
|
"logits/rejected": -2.5234570503234863, |
|
"logps/chosen": -153.86798095703125, |
|
"logps/rejected": -140.2138671875, |
|
"loss": 0.8232, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9449439644813538, |
|
"rewards/margins": -0.13928692042827606, |
|
"rewards/rejected": -0.8056570291519165, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9217783624752266e-05, |
|
"logits/chosen": -2.4257960319519043, |
|
"logits/rejected": -2.481285333633423, |
|
"logps/chosen": -129.22506713867188, |
|
"logps/rejected": -133.507080078125, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7729750275611877, |
|
"rewards/margins": 0.07025709748268127, |
|
"rewards/rejected": -0.8432320356369019, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.920886605182823e-05, |
|
"logits/chosen": -2.8374218940734863, |
|
"logits/rejected": -2.840282440185547, |
|
"logps/chosen": -183.80465698242188, |
|
"logps/rejected": -185.8097381591797, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8535721898078918, |
|
"rewards/margins": 0.04765475541353226, |
|
"rewards/rejected": -0.9012269377708435, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.919989875181722e-05, |
|
"logits/chosen": -2.7011358737945557, |
|
"logits/rejected": -2.758044719696045, |
|
"logps/chosen": -176.79345703125, |
|
"logps/rejected": -174.94239807128906, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.159913182258606, |
|
"rewards/margins": 0.09292294830083847, |
|
"rewards/rejected": -1.2528361082077026, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.919088174313884e-05, |
|
"logits/chosen": -2.614689826965332, |
|
"logits/rejected": -2.645404577255249, |
|
"logps/chosen": -134.0303955078125, |
|
"logps/rejected": -162.16595458984375, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6662919521331787, |
|
"rewards/margins": 0.26484376192092896, |
|
"rewards/rejected": -0.9311355948448181, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.91818150443148e-05, |
|
"logits/chosen": -2.6832685470581055, |
|
"logits/rejected": -2.670450448989868, |
|
"logps/chosen": -196.30052185058594, |
|
"logps/rejected": -179.43482971191406, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9087074995040894, |
|
"rewards/margins": 0.1679982841014862, |
|
"rewards/rejected": -1.076705813407898, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.917269867396886e-05, |
|
"logits/chosen": -2.8207449913024902, |
|
"logits/rejected": -2.773184061050415, |
|
"logps/chosen": -198.20509338378906, |
|
"logps/rejected": -184.33151245117188, |
|
"loss": 0.7475, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9387542009353638, |
|
"rewards/margins": 0.003928817808628082, |
|
"rewards/rejected": -0.9426830410957336, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.916353265082686e-05, |
|
"logits/chosen": -2.75034236907959, |
|
"logits/rejected": -2.7130117416381836, |
|
"logps/chosen": -187.1002197265625, |
|
"logps/rejected": -193.2103271484375, |
|
"loss": 0.8817, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -1.0758837461471558, |
|
"rewards/margins": -0.2836476266384125, |
|
"rewards/rejected": -0.7922362089157104, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9154316993716565e-05, |
|
"logits/chosen": -2.7884602546691895, |
|
"logits/rejected": -2.889566421508789, |
|
"logps/chosen": -173.12225341796875, |
|
"logps/rejected": -181.60842895507812, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0141631364822388, |
|
"rewards/margins": 0.16198423504829407, |
|
"rewards/rejected": -1.1761474609375, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9145051721567734e-05, |
|
"logits/chosen": -2.711033344268799, |
|
"logits/rejected": -2.731414318084717, |
|
"logps/chosen": -190.81759643554688, |
|
"logps/rejected": -214.6211700439453, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9142743945121765, |
|
"rewards/margins": 0.34260398149490356, |
|
"rewards/rejected": -1.25687837600708, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.913573685341205e-05, |
|
"logits/chosen": -2.534449338912964, |
|
"logits/rejected": -2.6240017414093018, |
|
"logps/chosen": -158.4725341796875, |
|
"logps/rejected": -144.93075561523438, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8281882405281067, |
|
"rewards/margins": 0.08525969088077545, |
|
"rewards/rejected": -0.9134478569030762, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9126372408383025e-05, |
|
"logits/chosen": -2.8126165866851807, |
|
"logits/rejected": -2.9268059730529785, |
|
"logps/chosen": -165.80160522460938, |
|
"logps/rejected": -195.13014221191406, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0856614112854004, |
|
"rewards/margins": 0.1156027615070343, |
|
"rewards/rejected": -1.2012642621994019, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.911695840571605e-05, |
|
"logits/chosen": -2.8474719524383545, |
|
"logits/rejected": -2.870950698852539, |
|
"logps/chosen": -185.34619140625, |
|
"logps/rejected": -204.24951171875, |
|
"loss": 0.6981, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9124755859375, |
|
"rewards/margins": 0.032730571925640106, |
|
"rewards/rejected": -0.9452061057090759, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.910749486474828e-05, |
|
"logits/chosen": -2.7202816009521484, |
|
"logits/rejected": -2.7738454341888428, |
|
"logps/chosen": -183.0021514892578, |
|
"logps/rejected": -176.14559936523438, |
|
"loss": 0.7423, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8500471711158752, |
|
"rewards/margins": -0.009690776467323303, |
|
"rewards/rejected": -0.8403564691543579, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.909798180491865e-05, |
|
"logits/chosen": -2.8064842224121094, |
|
"logits/rejected": -2.8379313945770264, |
|
"logps/chosen": -181.36424255371094, |
|
"logps/rejected": -186.806884765625, |
|
"loss": 0.7156, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9925443530082703, |
|
"rewards/margins": 0.02362808585166931, |
|
"rewards/rejected": -1.0161724090576172, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9088419245767803e-05, |
|
"logits/chosen": -2.591299057006836, |
|
"logits/rejected": -2.6505491733551025, |
|
"logps/chosen": -165.84970092773438, |
|
"logps/rejected": -189.2133331298828, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6631417870521545, |
|
"rewards/margins": 0.1258070468902588, |
|
"rewards/rejected": -0.7889488339424133, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.907880720693804e-05, |
|
"logits/chosen": -2.9960107803344727, |
|
"logits/rejected": -2.9374802112579346, |
|
"logps/chosen": -213.4750518798828, |
|
"logps/rejected": -237.82672119140625, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8233738541603088, |
|
"rewards/margins": 0.1019565686583519, |
|
"rewards/rejected": -0.9253304600715637, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9069145708173324e-05, |
|
"logits/chosen": -2.6092634201049805, |
|
"logits/rejected": -2.6358261108398438, |
|
"logps/chosen": -197.22938537597656, |
|
"logps/rejected": -190.7332000732422, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7702199220657349, |
|
"rewards/margins": 0.23700743913650513, |
|
"rewards/rejected": -1.0072274208068848, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9059434769319205e-05, |
|
"logits/chosen": -2.731299638748169, |
|
"logits/rejected": -2.8226795196533203, |
|
"logps/chosen": -202.76451110839844, |
|
"logps/rejected": -244.38844299316406, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6375839114189148, |
|
"rewards/margins": 0.2866172790527344, |
|
"rewards/rejected": -0.9242011904716492, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.904967441032278e-05, |
|
"logits/chosen": -2.478205680847168, |
|
"logits/rejected": -2.541795253753662, |
|
"logps/chosen": -195.00408935546875, |
|
"logps/rejected": -225.96731567382812, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6428064107894897, |
|
"rewards/margins": 0.17480693757534027, |
|
"rewards/rejected": -0.8176133632659912, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.903986465123266e-05, |
|
"logits/chosen": -2.692394733428955, |
|
"logits/rejected": -2.7363274097442627, |
|
"logps/chosen": -167.2650146484375, |
|
"logps/rejected": -220.62228393554688, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8301478624343872, |
|
"rewards/margins": 0.07167816907167435, |
|
"rewards/rejected": -0.901826024055481, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.903000551219894e-05, |
|
"logits/chosen": -2.782505989074707, |
|
"logits/rejected": -2.81825590133667, |
|
"logps/chosen": -153.23898315429688, |
|
"logps/rejected": -157.7719268798828, |
|
"loss": 0.7488, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8630144596099854, |
|
"rewards/margins": -0.03182988613843918, |
|
"rewards/rejected": -0.8311845660209656, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.902009701347313e-05, |
|
"logits/chosen": -2.7017691135406494, |
|
"logits/rejected": -2.751739978790283, |
|
"logps/chosen": -199.3386688232422, |
|
"logps/rejected": -189.38465881347656, |
|
"loss": 0.7063, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7444071769714355, |
|
"rewards/margins": 0.05856693163514137, |
|
"rewards/rejected": -0.8029740452766418, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.901013917540814e-05, |
|
"logits/chosen": -2.62956166267395, |
|
"logits/rejected": -2.65138578414917, |
|
"logps/chosen": -209.6748046875, |
|
"logps/rejected": -200.0463409423828, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.778890073299408, |
|
"rewards/margins": 0.03998234495520592, |
|
"rewards/rejected": -0.8188724517822266, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.900013201845821e-05, |
|
"logits/chosen": -2.604013204574585, |
|
"logits/rejected": -2.6575911045074463, |
|
"logps/chosen": -192.33749389648438, |
|
"logps/rejected": -196.74574279785156, |
|
"loss": 0.7202, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7086251378059387, |
|
"rewards/margins": 0.026372164487838745, |
|
"rewards/rejected": -0.7349973917007446, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.899007556317893e-05, |
|
"logits/chosen": -2.672982931137085, |
|
"logits/rejected": -2.7704596519470215, |
|
"logps/chosen": -232.0260009765625, |
|
"logps/rejected": -230.7442169189453, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.703597366809845, |
|
"rewards/margins": 0.1020139679312706, |
|
"rewards/rejected": -0.8056113123893738, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8979969830227086e-05, |
|
"logits/chosen": -2.7685508728027344, |
|
"logits/rejected": -2.798677921295166, |
|
"logps/chosen": -170.82254028320312, |
|
"logps/rejected": -211.26219177246094, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6501718759536743, |
|
"rewards/margins": 0.23585857450962067, |
|
"rewards/rejected": -0.8860303163528442, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.896981484036074e-05, |
|
"logits/chosen": -2.742246150970459, |
|
"logits/rejected": -2.7133634090423584, |
|
"logps/chosen": -188.86024475097656, |
|
"logps/rejected": -199.79791259765625, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5299299955368042, |
|
"rewards/margins": 0.2819333076477051, |
|
"rewards/rejected": -0.8118634223937988, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.895961061443911e-05, |
|
"logits/chosen": -2.726637840270996, |
|
"logits/rejected": -2.728475332260132, |
|
"logps/chosen": -204.32278442382812, |
|
"logps/rejected": -233.36709594726562, |
|
"loss": 0.8344, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8258933424949646, |
|
"rewards/margins": -0.08616884052753448, |
|
"rewards/rejected": -0.7397244572639465, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.894935717342255e-05, |
|
"logits/chosen": -2.757063865661621, |
|
"logits/rejected": -2.7701942920684814, |
|
"logps/chosen": -192.92251586914062, |
|
"logps/rejected": -192.76185607910156, |
|
"loss": 0.7007, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.71357262134552, |
|
"rewards/margins": 0.04112683981657028, |
|
"rewards/rejected": -0.7546994686126709, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8939054538372496e-05, |
|
"logits/chosen": -2.6160852909088135, |
|
"logits/rejected": -2.6638543605804443, |
|
"logps/chosen": -153.6888427734375, |
|
"logps/rejected": -204.19139099121094, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5954157710075378, |
|
"rewards/margins": 0.2527124583721161, |
|
"rewards/rejected": -0.8481282591819763, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8928702730451456e-05, |
|
"logits/chosen": -2.635458469390869, |
|
"logits/rejected": -2.740834951400757, |
|
"logps/chosen": -229.58181762695312, |
|
"logps/rejected": -209.87832641601562, |
|
"loss": 0.7217, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7451784610748291, |
|
"rewards/margins": 0.019871072843670845, |
|
"rewards/rejected": -0.7650495171546936, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.891830177092294e-05, |
|
"logits/chosen": -2.515532970428467, |
|
"logits/rejected": -2.5850772857666016, |
|
"logps/chosen": -173.58749389648438, |
|
"logps/rejected": -197.440185546875, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6022768616676331, |
|
"rewards/margins": 0.11426748335361481, |
|
"rewards/rejected": -0.7165443301200867, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8907851681151396e-05, |
|
"logits/chosen": -2.708148956298828, |
|
"logits/rejected": -2.735069751739502, |
|
"logps/chosen": -146.8754425048828, |
|
"logps/rejected": -182.95477294921875, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.35234156250953674, |
|
"rewards/margins": 0.24402308464050293, |
|
"rewards/rejected": -0.5963646173477173, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.889735248260221e-05, |
|
"logits/chosen": -2.657132625579834, |
|
"logits/rejected": -2.7396936416625977, |
|
"logps/chosen": -172.76268005371094, |
|
"logps/rejected": -185.73153686523438, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7841547727584839, |
|
"rewards/margins": 0.08935101330280304, |
|
"rewards/rejected": -0.8735058307647705, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8886804196841626e-05, |
|
"logits/chosen": -2.6601943969726562, |
|
"logits/rejected": -2.6764605045318604, |
|
"logps/chosen": -182.19937133789062, |
|
"logps/rejected": -196.57550048828125, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7912774085998535, |
|
"rewards/margins": 0.19412587583065033, |
|
"rewards/rejected": -0.9854032397270203, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.887620684553674e-05, |
|
"logits/chosen": -2.5685248374938965, |
|
"logits/rejected": -2.5472164154052734, |
|
"logps/chosen": -161.2342529296875, |
|
"logps/rejected": -201.6751708984375, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6504772901535034, |
|
"rewards/margins": 0.089483842253685, |
|
"rewards/rejected": -0.7399611473083496, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.886556045045542e-05, |
|
"logits/chosen": -2.7824556827545166, |
|
"logits/rejected": -2.772510528564453, |
|
"logps/chosen": -183.0452880859375, |
|
"logps/rejected": -197.40408325195312, |
|
"loss": 0.8643, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.0302786827087402, |
|
"rewards/margins": -0.2259717881679535, |
|
"rewards/rejected": -0.8043068647384644, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8854865033466275e-05, |
|
"logits/chosen": -2.457235097885132, |
|
"logits/rejected": -2.5225629806518555, |
|
"logps/chosen": -137.17259216308594, |
|
"logps/rejected": -151.13467407226562, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6421889662742615, |
|
"rewards/margins": 0.3034355044364929, |
|
"rewards/rejected": -0.9456245303153992, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.88441206165386e-05, |
|
"logits/chosen": -2.7572522163391113, |
|
"logits/rejected": -2.7597944736480713, |
|
"logps/chosen": -194.38626098632812, |
|
"logps/rejected": -208.2905731201172, |
|
"loss": 0.848, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8563791513442993, |
|
"rewards/margins": -0.21535280346870422, |
|
"rewards/rejected": -0.6410263180732727, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8833327221742356e-05, |
|
"logits/chosen": -2.589763641357422, |
|
"logits/rejected": -2.5469565391540527, |
|
"logps/chosen": -159.60533142089844, |
|
"logps/rejected": -153.5530548095703, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6452800035476685, |
|
"rewards/margins": 0.2377607673406601, |
|
"rewards/rejected": -0.8830407857894897, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.88224848712481e-05, |
|
"logits/chosen": -2.6565144062042236, |
|
"logits/rejected": -2.694272994995117, |
|
"logps/chosen": -175.2017059326172, |
|
"logps/rejected": -171.8043975830078, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.514467179775238, |
|
"rewards/margins": 0.11190642416477203, |
|
"rewards/rejected": -0.6263736486434937, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.881159358732694e-05, |
|
"logits/chosen": -2.5067477226257324, |
|
"logits/rejected": -2.551682710647583, |
|
"logps/chosen": -187.95533752441406, |
|
"logps/rejected": -237.65907287597656, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6460933685302734, |
|
"rewards/margins": 0.15910674631595612, |
|
"rewards/rejected": -0.8052000403404236, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8800653392350526e-05, |
|
"logits/chosen": -2.4550201892852783, |
|
"logits/rejected": -2.519341468811035, |
|
"logps/chosen": -156.3563690185547, |
|
"logps/rejected": -171.7753143310547, |
|
"loss": 0.7012, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7904012799263, |
|
"rewards/margins": 0.05794687569141388, |
|
"rewards/rejected": -0.8483481407165527, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8789664308790936e-05, |
|
"logits/chosen": -2.7174458503723145, |
|
"logits/rejected": -2.6885735988616943, |
|
"logps/chosen": -172.3370361328125, |
|
"logps/rejected": -164.46519470214844, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6009785532951355, |
|
"rewards/margins": 0.08145736157894135, |
|
"rewards/rejected": -0.682435929775238, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8778626359220715e-05, |
|
"logits/chosen": -2.731224536895752, |
|
"logits/rejected": -2.6857399940490723, |
|
"logps/chosen": -185.449951171875, |
|
"logps/rejected": -206.4256591796875, |
|
"loss": 0.7672, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7531261444091797, |
|
"rewards/margins": -0.07965384423732758, |
|
"rewards/rejected": -0.6734722852706909, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8767539566312734e-05, |
|
"logits/chosen": -2.5604355335235596, |
|
"logits/rejected": -2.704071044921875, |
|
"logps/chosen": -174.9366912841797, |
|
"logps/rejected": -202.98768615722656, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6721182465553284, |
|
"rewards/margins": 0.21909648180007935, |
|
"rewards/rejected": -0.8912147283554077, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.875640395284023e-05, |
|
"logits/chosen": -2.758918046951294, |
|
"logits/rejected": -2.7820916175842285, |
|
"logps/chosen": -195.2515869140625, |
|
"logps/rejected": -231.6067352294922, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.38608187437057495, |
|
"rewards/margins": 0.4270917773246765, |
|
"rewards/rejected": -0.8131736516952515, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.874521954167671e-05, |
|
"logits/chosen": -2.7890138626098633, |
|
"logits/rejected": -2.8068411350250244, |
|
"logps/chosen": -209.79307556152344, |
|
"logps/rejected": -207.8020477294922, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6039459109306335, |
|
"rewards/margins": 0.1874314397573471, |
|
"rewards/rejected": -0.7913773059844971, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8733986355795905e-05, |
|
"logits/chosen": -2.675286054611206, |
|
"logits/rejected": -2.699337959289551, |
|
"logps/chosen": -233.40780639648438, |
|
"logps/rejected": -208.18724060058594, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6822580695152283, |
|
"rewards/margins": 0.1497945785522461, |
|
"rewards/rejected": -0.8320526480674744, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8722704418271745e-05, |
|
"logits/chosen": -2.5140862464904785, |
|
"logits/rejected": -2.632192850112915, |
|
"logps/chosen": -176.1000213623047, |
|
"logps/rejected": -196.43846130371094, |
|
"loss": 0.7119, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7464176416397095, |
|
"rewards/margins": 0.01839565485715866, |
|
"rewards/rejected": -0.7648133039474487, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.871137375227829e-05, |
|
"logits/chosen": -2.6084189414978027, |
|
"logits/rejected": -2.6140480041503906, |
|
"logps/chosen": -188.7124481201172, |
|
"logps/rejected": -177.5293426513672, |
|
"loss": 0.748, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.618392288684845, |
|
"rewards/margins": 0.016466360539197922, |
|
"rewards/rejected": -0.6348586678504944, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.869999438108971e-05, |
|
"logits/chosen": -2.6994099617004395, |
|
"logits/rejected": -2.743457555770874, |
|
"logps/chosen": -182.1778564453125, |
|
"logps/rejected": -194.54884338378906, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5676161050796509, |
|
"rewards/margins": 0.1599595993757248, |
|
"rewards/rejected": -0.7275756597518921, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.8688566328080215e-05, |
|
"logits/chosen": -2.5730295181274414, |
|
"logits/rejected": -2.573648452758789, |
|
"logps/chosen": -199.49908447265625, |
|
"logps/rejected": -235.17568969726562, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.379108190536499, |
|
"rewards/margins": 0.30178508162498474, |
|
"rewards/rejected": -0.6808933019638062, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.867708961672399e-05, |
|
"logits/chosen": -2.5466060638427734, |
|
"logits/rejected": -2.661322593688965, |
|
"logps/chosen": -183.09585571289062, |
|
"logps/rejected": -185.6378936767578, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.49351951479911804, |
|
"rewards/margins": 0.11638712882995605, |
|
"rewards/rejected": -0.6099066734313965, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.866556427059519e-05, |
|
"logits/chosen": -2.7101516723632812, |
|
"logits/rejected": -2.644563913345337, |
|
"logps/chosen": -187.73348999023438, |
|
"logps/rejected": -169.40293884277344, |
|
"loss": 0.7781, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8404239416122437, |
|
"rewards/margins": -0.026825089007616043, |
|
"rewards/rejected": -0.8135988116264343, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.865399031336787e-05, |
|
"logits/chosen": -2.4861361980438232, |
|
"logits/rejected": -2.565809726715088, |
|
"logps/chosen": -146.98423767089844, |
|
"logps/rejected": -163.1490478515625, |
|
"loss": 0.7333, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5940142869949341, |
|
"rewards/margins": 0.03410058468580246, |
|
"rewards/rejected": -0.6281149387359619, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.8642367768815936e-05, |
|
"logits/chosen": -2.6412410736083984, |
|
"logits/rejected": -2.747821807861328, |
|
"logps/chosen": -174.8326873779297, |
|
"logps/rejected": -219.83612060546875, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4866630434989929, |
|
"rewards/margins": 0.2575409412384033, |
|
"rewards/rejected": -0.744204044342041, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.863069666081307e-05, |
|
"logits/chosen": -2.6463019847869873, |
|
"logits/rejected": -2.796405553817749, |
|
"logps/chosen": -164.44869995117188, |
|
"logps/rejected": -217.47230529785156, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5829634070396423, |
|
"rewards/margins": 0.26363080739974976, |
|
"rewards/rejected": -0.8465942144393921, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.861897701333274e-05, |
|
"logits/chosen": -2.6675515174865723, |
|
"logits/rejected": -2.6761202812194824, |
|
"logps/chosen": -187.8043212890625, |
|
"logps/rejected": -175.5827178955078, |
|
"loss": 0.8043, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8770172595977783, |
|
"rewards/margins": -0.06917458772659302, |
|
"rewards/rejected": -0.8078427314758301, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.86072088504481e-05, |
|
"logits/chosen": -2.6193923950195312, |
|
"logits/rejected": -2.652172088623047, |
|
"logps/chosen": -180.9799346923828, |
|
"logps/rejected": -197.08245849609375, |
|
"loss": 0.756, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8450060486793518, |
|
"rewards/margins": -0.008023982867598534, |
|
"rewards/rejected": -0.8369821310043335, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.859539219633199e-05, |
|
"logits/chosen": -2.4201056957244873, |
|
"logits/rejected": -2.547577381134033, |
|
"logps/chosen": -142.2472381591797, |
|
"logps/rejected": -171.77197265625, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25414225459098816, |
|
"rewards/margins": 0.2882387042045593, |
|
"rewards/rejected": -0.5423809289932251, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.8583527075256804e-05, |
|
"logits/chosen": -2.6075916290283203, |
|
"logits/rejected": -2.6171302795410156, |
|
"logps/chosen": -185.60699462890625, |
|
"logps/rejected": -199.539306640625, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6799944043159485, |
|
"rewards/margins": 0.21594738960266113, |
|
"rewards/rejected": -0.8959417939186096, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.857161351159454e-05, |
|
"logits/chosen": -2.7377195358276367, |
|
"logits/rejected": -2.722515821456909, |
|
"logps/chosen": -225.74755859375, |
|
"logps/rejected": -224.8936004638672, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8174278140068054, |
|
"rewards/margins": 0.10962995886802673, |
|
"rewards/rejected": -0.9270578026771545, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.8559651529816664e-05, |
|
"logits/chosen": -2.597797393798828, |
|
"logits/rejected": -2.6779723167419434, |
|
"logps/chosen": -159.58380126953125, |
|
"logps/rejected": -185.880615234375, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7312408089637756, |
|
"rewards/margins": 0.1112150177359581, |
|
"rewards/rejected": -0.8424558639526367, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.854764115449411e-05, |
|
"logits/chosen": -2.7188682556152344, |
|
"logits/rejected": -2.691462993621826, |
|
"logps/chosen": -143.06845092773438, |
|
"logps/rejected": -142.25157165527344, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.675825297832489, |
|
"rewards/margins": 0.130048006772995, |
|
"rewards/rejected": -0.8058732748031616, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.853558241029723e-05, |
|
"logits/chosen": -2.629163980484009, |
|
"logits/rejected": -2.5702826976776123, |
|
"logps/chosen": -224.11196899414062, |
|
"logps/rejected": -171.19097900390625, |
|
"loss": 0.718, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6522189378738403, |
|
"rewards/margins": 0.046554647386074066, |
|
"rewards/rejected": -0.6987735629081726, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.8523475321995715e-05, |
|
"logits/chosen": -2.7033562660217285, |
|
"logits/rejected": -2.5383901596069336, |
|
"logps/chosen": -181.98219299316406, |
|
"logps/rejected": -172.13865661621094, |
|
"loss": 0.719, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5484719276428223, |
|
"rewards/margins": 0.0074146464467048645, |
|
"rewards/rejected": -0.5558865666389465, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.8511319914458555e-05, |
|
"logits/chosen": -2.5836706161499023, |
|
"logits/rejected": -2.5528974533081055, |
|
"logps/chosen": -223.9678497314453, |
|
"logps/rejected": -211.67184448242188, |
|
"loss": 0.7904, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7669371366500854, |
|
"rewards/margins": -0.11547183990478516, |
|
"rewards/rejected": -0.6514652967453003, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.849911621265401e-05, |
|
"logits/chosen": -2.652245044708252, |
|
"logits/rejected": -2.6907477378845215, |
|
"logps/chosen": -172.45669555664062, |
|
"logps/rejected": -173.8350067138672, |
|
"loss": 0.7916, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.713798999786377, |
|
"rewards/margins": -0.09893038123846054, |
|
"rewards/rejected": -0.6148686408996582, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.848686424164953e-05, |
|
"logits/chosen": -2.703127384185791, |
|
"logits/rejected": -2.704859733581543, |
|
"logps/chosen": -214.36392211914062, |
|
"logps/rejected": -185.55931091308594, |
|
"loss": 0.7281, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7356032133102417, |
|
"rewards/margins": -0.018105890601873398, |
|
"rewards/rejected": -0.7174972891807556, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.84745640266117e-05, |
|
"logits/chosen": -2.690885066986084, |
|
"logits/rejected": -2.740234851837158, |
|
"logps/chosen": -176.29266357421875, |
|
"logps/rejected": -206.47596740722656, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6022865176200867, |
|
"rewards/margins": 0.31845831871032715, |
|
"rewards/rejected": -0.9207448363304138, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.846221559280624e-05, |
|
"logits/chosen": -2.6713180541992188, |
|
"logits/rejected": -2.7118892669677734, |
|
"logps/chosen": -144.1902313232422, |
|
"logps/rejected": -170.08709716796875, |
|
"loss": 0.7913, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.737549364566803, |
|
"rewards/margins": -0.0928279459476471, |
|
"rewards/rejected": -0.6447213888168335, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.844981896559787e-05, |
|
"logits/chosen": -2.7589731216430664, |
|
"logits/rejected": -2.6973843574523926, |
|
"logps/chosen": -185.05255126953125, |
|
"logps/rejected": -177.13946533203125, |
|
"loss": 0.7678, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8261479735374451, |
|
"rewards/margins": -0.09880837798118591, |
|
"rewards/rejected": -0.7273396253585815, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8437374170450344e-05, |
|
"logits/chosen": -2.659578323364258, |
|
"logits/rejected": -2.617631196975708, |
|
"logps/chosen": -175.1558074951172, |
|
"logps/rejected": -198.41343688964844, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8928545117378235, |
|
"rewards/margins": 0.10537480562925339, |
|
"rewards/rejected": -0.9982293844223022, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.842488123292632e-05, |
|
"logits/chosen": -2.6015379428863525, |
|
"logits/rejected": -2.5835628509521484, |
|
"logps/chosen": -188.88092041015625, |
|
"logps/rejected": -183.85733032226562, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7475460171699524, |
|
"rewards/margins": 0.20796708762645721, |
|
"rewards/rejected": -0.9555131196975708, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8412340178687374e-05, |
|
"logits/chosen": -2.652589797973633, |
|
"logits/rejected": -2.7427022457122803, |
|
"logps/chosen": -198.62112426757812, |
|
"logps/rejected": -206.33192443847656, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6447871923446655, |
|
"rewards/margins": 0.38781607151031494, |
|
"rewards/rejected": -1.0326032638549805, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.839975103349391e-05, |
|
"logits/chosen": -2.79940128326416, |
|
"logits/rejected": -2.8857078552246094, |
|
"logps/chosen": -206.7621307373047, |
|
"logps/rejected": -231.99472045898438, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5604078769683838, |
|
"rewards/margins": 0.5052842497825623, |
|
"rewards/rejected": -1.0656920671463013, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8387113823205096e-05, |
|
"logits/chosen": -2.562626361846924, |
|
"logits/rejected": -2.5439820289611816, |
|
"logps/chosen": -198.9171142578125, |
|
"logps/rejected": -184.92816162109375, |
|
"loss": 0.7251, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6547099351882935, |
|
"rewards/margins": 0.07420587539672852, |
|
"rewards/rejected": -0.7289157509803772, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8374428573778864e-05, |
|
"logits/chosen": -2.6110103130340576, |
|
"logits/rejected": -2.612607479095459, |
|
"logps/chosen": -176.07400512695312, |
|
"logps/rejected": -181.38418579101562, |
|
"loss": 0.8065, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.758232593536377, |
|
"rewards/margins": -0.0620691180229187, |
|
"rewards/rejected": -0.6961634755134583, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8361695311271795e-05, |
|
"logits/chosen": -2.8273181915283203, |
|
"logits/rejected": -2.830404758453369, |
|
"logps/chosen": -205.66763305664062, |
|
"logps/rejected": -222.3781280517578, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8250411152839661, |
|
"rewards/margins": 0.13250316679477692, |
|
"rewards/rejected": -0.9575443267822266, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.83489140618391e-05, |
|
"logits/chosen": -2.666761875152588, |
|
"logits/rejected": -2.757248878479004, |
|
"logps/chosen": -215.3235626220703, |
|
"logps/rejected": -216.06568908691406, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7060136795043945, |
|
"rewards/margins": 0.16570770740509033, |
|
"rewards/rejected": -0.8717214465141296, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.833608485173457e-05, |
|
"logits/chosen": -2.3725719451904297, |
|
"logits/rejected": -2.4071693420410156, |
|
"logps/chosen": -146.78704833984375, |
|
"logps/rejected": -186.87950134277344, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4698937237262726, |
|
"rewards/margins": 0.2167559713125229, |
|
"rewards/rejected": -0.6866496801376343, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8323207707310496e-05, |
|
"logits/chosen": -2.6677677631378174, |
|
"logits/rejected": -2.5784990787506104, |
|
"logps/chosen": -204.05075073242188, |
|
"logps/rejected": -224.5699920654297, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4006405174732208, |
|
"rewards/margins": 0.5226905941963196, |
|
"rewards/rejected": -0.923331081867218, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.831028265501764e-05, |
|
"logits/chosen": -2.6300296783447266, |
|
"logits/rejected": -2.810920238494873, |
|
"logps/chosen": -177.45635986328125, |
|
"logps/rejected": -234.4469451904297, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4587719440460205, |
|
"rewards/margins": 0.5265656113624573, |
|
"rewards/rejected": -0.9853376150131226, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.829730972140517e-05, |
|
"logits/chosen": -2.5978078842163086, |
|
"logits/rejected": -2.570225954055786, |
|
"logps/chosen": -133.3565673828125, |
|
"logps/rejected": -136.15902709960938, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3776158094406128, |
|
"rewards/margins": 0.1921352744102478, |
|
"rewards/rejected": -0.5697510242462158, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8284288933120594e-05, |
|
"logits/chosen": -2.5982682704925537, |
|
"logits/rejected": -2.6651642322540283, |
|
"logps/chosen": -181.2775115966797, |
|
"logps/rejected": -203.33876037597656, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4960383474826813, |
|
"rewards/margins": 0.31738704442977905, |
|
"rewards/rejected": -0.8134254217147827, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8271220316909735e-05, |
|
"logits/chosen": -2.761739730834961, |
|
"logits/rejected": -2.715710401535034, |
|
"logps/chosen": -191.56158447265625, |
|
"logps/rejected": -175.9281463623047, |
|
"loss": 0.7011, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.62202388048172, |
|
"rewards/margins": 0.03054755926132202, |
|
"rewards/rejected": -0.652571439743042, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.825810389961666e-05, |
|
"logits/chosen": -2.614866018295288, |
|
"logits/rejected": -2.6899471282958984, |
|
"logps/chosen": -135.42494201660156, |
|
"logps/rejected": -168.2887725830078, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5677900910377502, |
|
"rewards/margins": 0.13706976175308228, |
|
"rewards/rejected": -0.7048598527908325, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8244939708183596e-05, |
|
"logits/chosen": -2.643815040588379, |
|
"logits/rejected": -2.6820266246795654, |
|
"logps/chosen": -178.99908447265625, |
|
"logps/rejected": -212.68692016601562, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5018581748008728, |
|
"rewards/margins": 0.3394116163253784, |
|
"rewards/rejected": -0.841269850730896, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.823172776965094e-05, |
|
"logits/chosen": -2.702993869781494, |
|
"logits/rejected": -2.7458367347717285, |
|
"logps/chosen": -239.1004638671875, |
|
"logps/rejected": -227.72450256347656, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4328669309616089, |
|
"rewards/margins": 0.1465311497449875, |
|
"rewards/rejected": -0.5793980360031128, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.821846811115713e-05, |
|
"logits/chosen": -2.658914804458618, |
|
"logits/rejected": -2.728074073791504, |
|
"logps/chosen": -207.8568115234375, |
|
"logps/rejected": -194.11788940429688, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5129541754722595, |
|
"rewards/margins": 0.19354072213172913, |
|
"rewards/rejected": -0.706494927406311, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.820516075993865e-05, |
|
"logits/chosen": -2.741415023803711, |
|
"logits/rejected": -2.6675500869750977, |
|
"logps/chosen": -156.90858459472656, |
|
"logps/rejected": -166.85830688476562, |
|
"loss": 0.8793, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.9281965494155884, |
|
"rewards/margins": -0.2124374508857727, |
|
"rewards/rejected": -0.7157591581344604, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.819180574332994e-05, |
|
"logits/chosen": -2.7196927070617676, |
|
"logits/rejected": -2.748945713043213, |
|
"logps/chosen": -202.15493774414062, |
|
"logps/rejected": -194.124755859375, |
|
"loss": 0.7879, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8033719062805176, |
|
"rewards/margins": -0.02236950770020485, |
|
"rewards/rejected": -0.7810022830963135, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8178403088763355e-05, |
|
"logits/chosen": -2.678246259689331, |
|
"logits/rejected": -2.729729652404785, |
|
"logps/chosen": -160.1791534423828, |
|
"logps/rejected": -185.8133544921875, |
|
"loss": 0.7682, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5224413871765137, |
|
"rewards/margins": -0.03584878519177437, |
|
"rewards/rejected": -0.4865925908088684, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8164952823769085e-05, |
|
"logits/chosen": -2.649317979812622, |
|
"logits/rejected": -2.655850410461426, |
|
"logps/chosen": -241.83953857421875, |
|
"logps/rejected": -226.36309814453125, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6058496236801147, |
|
"rewards/margins": 0.16187947988510132, |
|
"rewards/rejected": -0.7677291035652161, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.815145497597514e-05, |
|
"logits/chosen": -2.711923837661743, |
|
"logits/rejected": -2.7140746116638184, |
|
"logps/chosen": -157.05551147460938, |
|
"logps/rejected": -153.10430908203125, |
|
"loss": 0.7461, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6182957291603088, |
|
"rewards/margins": -0.003988802433013916, |
|
"rewards/rejected": -0.6143069267272949, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8137909573107246e-05, |
|
"logits/chosen": -2.632993459701538, |
|
"logits/rejected": -2.7051479816436768, |
|
"logps/chosen": -193.94879150390625, |
|
"logps/rejected": -189.07920837402344, |
|
"loss": 0.6392, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6661685705184937, |
|
"rewards/margins": 0.24933494627475739, |
|
"rewards/rejected": -0.9155035614967346, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.812431664298883e-05, |
|
"logits/chosen": -2.8046534061431885, |
|
"logits/rejected": -2.7321465015411377, |
|
"logps/chosen": -205.9781951904297, |
|
"logps/rejected": -212.83636474609375, |
|
"loss": 0.6998, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.49066948890686035, |
|
"rewards/margins": 0.02486901544034481, |
|
"rewards/rejected": -0.5155385136604309, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.811067621354094e-05, |
|
"logits/chosen": -2.471921920776367, |
|
"logits/rejected": -2.464104175567627, |
|
"logps/chosen": -146.0660400390625, |
|
"logps/rejected": -147.8424835205078, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5911726951599121, |
|
"rewards/margins": 0.050756677985191345, |
|
"rewards/rejected": -0.6419293284416199, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"logits/chosen": -2.733851671218872, |
|
"logits/rejected": -2.734400510787964, |
|
"logps/chosen": -201.3523712158203, |
|
"logps/rejected": -213.01361083984375, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4939979016780853, |
|
"rewards/margins": 0.25225183367729187, |
|
"rewards/rejected": -0.7462497353553772, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8083252968828665e-05, |
|
"logits/chosen": -2.5853540897369385, |
|
"logits/rejected": -2.5439412593841553, |
|
"logps/chosen": -240.09124755859375, |
|
"logps/rejected": -200.9342803955078, |
|
"loss": 0.7954, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7007974982261658, |
|
"rewards/margins": -0.10030128061771393, |
|
"rewards/rejected": -0.6004961133003235, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8069470209893974e-05, |
|
"logits/chosen": -2.4914681911468506, |
|
"logits/rejected": -2.5363824367523193, |
|
"logps/chosen": -176.0529327392578, |
|
"logps/rejected": -212.58383178710938, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4798307418823242, |
|
"rewards/margins": 0.2897476851940155, |
|
"rewards/rejected": -0.7695784568786621, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8055640064289086e-05, |
|
"logits/chosen": -2.627121686935425, |
|
"logits/rejected": -2.6512184143066406, |
|
"logps/chosen": -174.460205078125, |
|
"logps/rejected": -205.20346069335938, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5525593757629395, |
|
"rewards/margins": 0.1847618669271469, |
|
"rewards/rejected": -0.7373212575912476, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.80417625604223e-05, |
|
"logits/chosen": -2.405069589614868, |
|
"logits/rejected": -2.5229604244232178, |
|
"logps/chosen": -179.3883819580078, |
|
"logps/rejected": -195.5463409423828, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7376325726509094, |
|
"rewards/margins": 0.28595036268234253, |
|
"rewards/rejected": -1.023582935333252, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.8027837726799205e-05, |
|
"logits/chosen": -2.6785571575164795, |
|
"logits/rejected": -2.725235939025879, |
|
"logps/chosen": -177.39048767089844, |
|
"logps/rejected": -180.63433837890625, |
|
"loss": 0.7008, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4321979880332947, |
|
"rewards/margins": 0.021436618641018867, |
|
"rewards/rejected": -0.4536346197128296, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.801386559202259e-05, |
|
"logits/chosen": -2.338463544845581, |
|
"logits/rejected": -2.3258635997772217, |
|
"logps/chosen": -136.97865295410156, |
|
"logps/rejected": -175.3179168701172, |
|
"loss": 0.6352, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3434058427810669, |
|
"rewards/margins": 0.15200775861740112, |
|
"rewards/rejected": -0.495413601398468, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.799984618479242e-05, |
|
"logits/chosen": -2.6278395652770996, |
|
"logits/rejected": -2.508350372314453, |
|
"logps/chosen": -154.35214233398438, |
|
"logps/rejected": -188.7537841796875, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4511483311653137, |
|
"rewards/margins": 0.18060770630836487, |
|
"rewards/rejected": -0.6317560076713562, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.798577953390577e-05, |
|
"logits/chosen": -2.561044931411743, |
|
"logits/rejected": -2.6394059658050537, |
|
"logps/chosen": -197.1724395751953, |
|
"logps/rejected": -189.59738159179688, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5811476111412048, |
|
"rewards/margins": 0.09012848883867264, |
|
"rewards/rejected": -0.6712760329246521, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.797166566825675e-05, |
|
"logits/chosen": -2.4525938034057617, |
|
"logits/rejected": -2.509936809539795, |
|
"logps/chosen": -134.09764099121094, |
|
"logps/rejected": -151.4327850341797, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.24274428188800812, |
|
"rewards/margins": 0.2151957005262375, |
|
"rewards/rejected": -0.4579399824142456, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.795750461683644e-05, |
|
"logits/chosen": -2.5733039379119873, |
|
"logits/rejected": -2.624908924102783, |
|
"logps/chosen": -152.46946716308594, |
|
"logps/rejected": -176.73748779296875, |
|
"loss": 0.7849, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5938777923583984, |
|
"rewards/margins": -0.11171096563339233, |
|
"rewards/rejected": -0.4821667969226837, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.794329640873285e-05, |
|
"logits/chosen": -2.552907705307007, |
|
"logits/rejected": -2.564852237701416, |
|
"logps/chosen": -215.24732971191406, |
|
"logps/rejected": -214.79995727539062, |
|
"loss": 0.725, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.522678017616272, |
|
"rewards/margins": 0.005169212818145752, |
|
"rewards/rejected": -0.527847170829773, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7929041073130867e-05, |
|
"logits/chosen": -2.681658983230591, |
|
"logits/rejected": -2.733393669128418, |
|
"logps/chosen": -154.22958374023438, |
|
"logps/rejected": -182.50245666503906, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.47931969165802, |
|
"rewards/margins": 0.07224328815937042, |
|
"rewards/rejected": -0.5515629649162292, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7914738639312165e-05, |
|
"logits/chosen": -2.462939739227295, |
|
"logits/rejected": -2.5003418922424316, |
|
"logps/chosen": -152.27236938476562, |
|
"logps/rejected": -162.45211791992188, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.47114118933677673, |
|
"rewards/margins": 0.16562382876873016, |
|
"rewards/rejected": -0.6367650628089905, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.790038913665519e-05, |
|
"logits/chosen": -2.4999754428863525, |
|
"logits/rejected": -2.5065388679504395, |
|
"logps/chosen": -213.994873046875, |
|
"logps/rejected": -214.79046630859375, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5417348146438599, |
|
"rewards/margins": 0.19294968247413635, |
|
"rewards/rejected": -0.7346844673156738, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.788599259463502e-05, |
|
"logits/chosen": -2.5474274158477783, |
|
"logits/rejected": -2.5991437435150146, |
|
"logps/chosen": -180.50039672851562, |
|
"logps/rejected": -225.59510803222656, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7096745371818542, |
|
"rewards/margins": 0.2028300166130066, |
|
"rewards/rejected": -0.9125044941902161, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.787154904282341e-05, |
|
"logits/chosen": -2.5795090198516846, |
|
"logits/rejected": -2.577873468399048, |
|
"logps/chosen": -163.51168823242188, |
|
"logps/rejected": -140.55410766601562, |
|
"loss": 0.794, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.5473388433456421, |
|
"rewards/margins": -0.11595198512077332, |
|
"rewards/rejected": -0.4313868582248688, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7857058510888645e-05, |
|
"logits/chosen": -2.516535758972168, |
|
"logits/rejected": -2.5428926944732666, |
|
"logps/chosen": -208.22772216796875, |
|
"logps/rejected": -231.32977294921875, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5902161598205566, |
|
"rewards/margins": 0.16028547286987305, |
|
"rewards/rejected": -0.7505015730857849, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7842521028595526e-05, |
|
"logits/chosen": -2.379845380783081, |
|
"logits/rejected": -2.3832809925079346, |
|
"logps/chosen": -200.7274169921875, |
|
"logps/rejected": -164.21742248535156, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5952718257904053, |
|
"rewards/margins": 0.09693758189678192, |
|
"rewards/rejected": -0.6922094225883484, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7827936625805284e-05, |
|
"logits/chosen": -2.6402270793914795, |
|
"logits/rejected": -2.7206149101257324, |
|
"logps/chosen": -166.23167419433594, |
|
"logps/rejected": -214.43209838867188, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5481399893760681, |
|
"rewards/margins": 0.19679725170135498, |
|
"rewards/rejected": -0.7449373006820679, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7813305332475535e-05, |
|
"logits/chosen": -2.5474841594696045, |
|
"logits/rejected": -2.398405075073242, |
|
"logps/chosen": -184.8474884033203, |
|
"logps/rejected": -174.7438507080078, |
|
"loss": 0.814, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6769263744354248, |
|
"rewards/margins": -0.10798013210296631, |
|
"rewards/rejected": -0.5689462423324585, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.77986271786602e-05, |
|
"logits/chosen": -2.4804301261901855, |
|
"logits/rejected": -2.41290545463562, |
|
"logps/chosen": -196.86683654785156, |
|
"logps/rejected": -196.78880310058594, |
|
"loss": 0.7094, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5920239686965942, |
|
"rewards/margins": 0.030932970345020294, |
|
"rewards/rejected": -0.6229569315910339, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.778390219450949e-05, |
|
"logits/chosen": -2.37176775932312, |
|
"logits/rejected": -2.5298891067504883, |
|
"logps/chosen": -161.76962280273438, |
|
"logps/rejected": -179.2218475341797, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6016180515289307, |
|
"rewards/margins": 0.17984583973884583, |
|
"rewards/rejected": -0.7814638614654541, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.776913041026976e-05, |
|
"logits/chosen": -2.4908242225646973, |
|
"logits/rejected": -2.6029117107391357, |
|
"logps/chosen": -147.92144775390625, |
|
"logps/rejected": -160.30494689941406, |
|
"loss": 0.7192, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4882547855377197, |
|
"rewards/margins": 0.03175659850239754, |
|
"rewards/rejected": -0.5200113654136658, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.775431185628353e-05, |
|
"logits/chosen": -2.4055118560791016, |
|
"logits/rejected": -2.442111015319824, |
|
"logps/chosen": -168.19540405273438, |
|
"logps/rejected": -166.55137634277344, |
|
"loss": 0.7503, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6233083605766296, |
|
"rewards/margins": -0.03455538675189018, |
|
"rewards/rejected": -0.5887529850006104, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7739446562989384e-05, |
|
"logits/chosen": -2.4722681045532227, |
|
"logits/rejected": -2.572155237197876, |
|
"logps/chosen": -207.08755493164062, |
|
"logps/rejected": -210.96417236328125, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5305930376052856, |
|
"rewards/margins": 0.21656833589076996, |
|
"rewards/rejected": -0.7471613883972168, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.772453456092191e-05, |
|
"logits/chosen": -2.158127546310425, |
|
"logits/rejected": -2.197726249694824, |
|
"logps/chosen": -188.421142578125, |
|
"logps/rejected": -190.73631286621094, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4641679525375366, |
|
"rewards/margins": 0.13141167163848877, |
|
"rewards/rejected": -0.5955795645713806, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7709575880711634e-05, |
|
"logits/chosen": -2.5840139389038086, |
|
"logits/rejected": -2.645707130432129, |
|
"logps/chosen": -188.22262573242188, |
|
"logps/rejected": -196.6393585205078, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7071235775947571, |
|
"rewards/margins": 0.0883268266916275, |
|
"rewards/rejected": -0.7954504489898682, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.769457055308497e-05, |
|
"logits/chosen": -2.4420454502105713, |
|
"logits/rejected": -2.4484505653381348, |
|
"logps/chosen": -169.99232482910156, |
|
"logps/rejected": -169.7140350341797, |
|
"loss": 0.7436, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5594102144241333, |
|
"rewards/margins": -0.01718483492732048, |
|
"rewards/rejected": -0.5422253608703613, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.767951860886415e-05, |
|
"logits/chosen": -2.530424118041992, |
|
"logits/rejected": -2.601032257080078, |
|
"logps/chosen": -208.61627197265625, |
|
"logps/rejected": -235.3765411376953, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7119523882865906, |
|
"rewards/margins": 0.13131096959114075, |
|
"rewards/rejected": -0.8432632684707642, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.766442007896715e-05, |
|
"logits/chosen": -2.662515640258789, |
|
"logits/rejected": -2.574281930923462, |
|
"logps/chosen": -206.23391723632812, |
|
"logps/rejected": -185.04434204101562, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6086325645446777, |
|
"rewards/margins": 0.1881732940673828, |
|
"rewards/rejected": -0.7968058586120605, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.764927499440767e-05, |
|
"logits/chosen": -2.4306862354278564, |
|
"logits/rejected": -2.48148512840271, |
|
"logps/chosen": -149.6880340576172, |
|
"logps/rejected": -164.9370574951172, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5055115222930908, |
|
"rewards/margins": 0.18054383993148804, |
|
"rewards/rejected": -0.6860553026199341, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.763408338629498e-05, |
|
"logits/chosen": -2.6945927143096924, |
|
"logits/rejected": -2.712038278579712, |
|
"logps/chosen": -195.50482177734375, |
|
"logps/rejected": -207.51431274414062, |
|
"loss": 0.7339, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7351135015487671, |
|
"rewards/margins": 0.00292108952999115, |
|
"rewards/rejected": -0.7380346059799194, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.761884528583396e-05, |
|
"logits/chosen": -2.4739484786987305, |
|
"logits/rejected": -2.5098178386688232, |
|
"logps/chosen": -199.3061065673828, |
|
"logps/rejected": -180.8711395263672, |
|
"loss": 0.7055, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6267393827438354, |
|
"rewards/margins": 0.034921929240226746, |
|
"rewards/rejected": -0.6616613268852234, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.760356072432498e-05, |
|
"logits/chosen": -2.5606906414031982, |
|
"logits/rejected": -2.6224427223205566, |
|
"logps/chosen": -147.893798828125, |
|
"logps/rejected": -156.9450225830078, |
|
"loss": 0.7825, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8380388021469116, |
|
"rewards/margins": -0.05697247013449669, |
|
"rewards/rejected": -0.7810662388801575, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.7588229733163834e-05, |
|
"logits/chosen": -2.479783058166504, |
|
"logits/rejected": -2.5184426307678223, |
|
"logps/chosen": -214.60830688476562, |
|
"logps/rejected": -204.91534423828125, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.745360255241394, |
|
"rewards/margins": 0.17358280718326569, |
|
"rewards/rejected": -0.9189431071281433, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.757285234384169e-05, |
|
"logits/chosen": -2.5682218074798584, |
|
"logits/rejected": -2.5506396293640137, |
|
"logps/chosen": -158.36090087890625, |
|
"logps/rejected": -163.76829528808594, |
|
"loss": 0.7196, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7472302913665771, |
|
"rewards/margins": 0.03290612995624542, |
|
"rewards/rejected": -0.7801364064216614, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.755742858794503e-05, |
|
"logits/chosen": -2.476301908493042, |
|
"logits/rejected": -2.4283623695373535, |
|
"logps/chosen": -180.48712158203125, |
|
"logps/rejected": -190.4640655517578, |
|
"loss": 0.7285, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5941387414932251, |
|
"rewards/margins": -0.008222660049796104, |
|
"rewards/rejected": -0.5859161019325256, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.754195849715557e-05, |
|
"logits/chosen": -2.4996767044067383, |
|
"logits/rejected": -2.5315232276916504, |
|
"logps/chosen": -211.7996826171875, |
|
"logps/rejected": -221.70632934570312, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6848435997962952, |
|
"rewards/margins": 0.2251822054386139, |
|
"rewards/rejected": -0.9100258350372314, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.75264421032502e-05, |
|
"logits/chosen": -2.3915770053863525, |
|
"logits/rejected": -2.443027973175049, |
|
"logps/chosen": -189.36318969726562, |
|
"logps/rejected": -234.4507293701172, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6983102560043335, |
|
"rewards/margins": 0.33968544006347656, |
|
"rewards/rejected": -1.03799569606781, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.751087943810093e-05, |
|
"logits/chosen": -2.4122979640960693, |
|
"logits/rejected": -2.4623374938964844, |
|
"logps/chosen": -150.90496826171875, |
|
"logps/rejected": -155.33792114257812, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6139358282089233, |
|
"rewards/margins": 0.2593153715133667, |
|
"rewards/rejected": -0.87325119972229, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.749527053367481e-05, |
|
"logits/chosen": -2.4751663208007812, |
|
"logits/rejected": -2.5102648735046387, |
|
"logps/chosen": -199.7110595703125, |
|
"logps/rejected": -211.5991973876953, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5334303379058838, |
|
"rewards/margins": 0.10008269548416138, |
|
"rewards/rejected": -0.6335129737854004, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.747961542203386e-05, |
|
"logits/chosen": -2.458789110183716, |
|
"logits/rejected": -2.4540915489196777, |
|
"logps/chosen": -202.07606506347656, |
|
"logps/rejected": -233.15286254882812, |
|
"loss": 0.7338, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7183946371078491, |
|
"rewards/margins": -0.020335379987955093, |
|
"rewards/rejected": -0.6980592608451843, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.746391413533503e-05, |
|
"logits/chosen": -2.577547788619995, |
|
"logits/rejected": -2.6487934589385986, |
|
"logps/chosen": -200.61962890625, |
|
"logps/rejected": -197.4369354248047, |
|
"loss": 0.6989, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6610112190246582, |
|
"rewards/margins": 0.05467906594276428, |
|
"rewards/rejected": -0.7156902551651001, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.74481667058301e-05, |
|
"logits/chosen": -2.460482120513916, |
|
"logits/rejected": -2.428621768951416, |
|
"logps/chosen": -173.61727905273438, |
|
"logps/rejected": -174.00869750976562, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6227931380271912, |
|
"rewards/margins": 0.061159878969192505, |
|
"rewards/rejected": -0.6839529275894165, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.743237316586564e-05, |
|
"logits/chosen": -2.7055728435516357, |
|
"logits/rejected": -2.694801092147827, |
|
"logps/chosen": -233.16864013671875, |
|
"logps/rejected": -219.25518798828125, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7030515670776367, |
|
"rewards/margins": 0.3454846143722534, |
|
"rewards/rejected": -1.0485361814498901, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.741653354788295e-05, |
|
"logits/chosen": -2.5690486431121826, |
|
"logits/rejected": -2.5955686569213867, |
|
"logps/chosen": -219.93161010742188, |
|
"logps/rejected": -219.0428466796875, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6727169156074524, |
|
"rewards/margins": 0.190028116106987, |
|
"rewards/rejected": -0.8627450466156006, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.7400647884417956e-05, |
|
"logits/chosen": -2.4904708862304688, |
|
"logits/rejected": -2.62727689743042, |
|
"logps/chosen": -178.09906005859375, |
|
"logps/rejected": -193.81495666503906, |
|
"loss": 0.9252, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8763137459754944, |
|
"rewards/margins": -0.29989194869995117, |
|
"rewards/rejected": -0.5764217376708984, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.7384716208101166e-05, |
|
"logits/chosen": -2.443995952606201, |
|
"logits/rejected": -2.4667203426361084, |
|
"logps/chosen": -187.14244079589844, |
|
"logps/rejected": -178.54718017578125, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5054149031639099, |
|
"rewards/margins": 0.07299378514289856, |
|
"rewards/rejected": -0.5784087777137756, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.736873855165762e-05, |
|
"logits/chosen": -2.535909652709961, |
|
"logits/rejected": -2.5671615600585938, |
|
"logps/chosen": -188.70001220703125, |
|
"logps/rejected": -204.35406494140625, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.46517521142959595, |
|
"rewards/margins": 0.3614599108695984, |
|
"rewards/rejected": -0.8266351222991943, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.735271494790678e-05, |
|
"logits/chosen": -2.528953790664673, |
|
"logits/rejected": -2.6590449810028076, |
|
"logps/chosen": -187.15150451660156, |
|
"logps/rejected": -224.23590087890625, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5339723229408264, |
|
"rewards/margins": 0.28138864040374756, |
|
"rewards/rejected": -0.8153610229492188, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.733664542976253e-05, |
|
"logits/chosen": -2.4930217266082764, |
|
"logits/rejected": -2.5632638931274414, |
|
"logps/chosen": -190.24932861328125, |
|
"logps/rejected": -229.89834594726562, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7542927265167236, |
|
"rewards/margins": 0.1013142317533493, |
|
"rewards/rejected": -0.8556069731712341, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.732053003023301e-05, |
|
"logits/chosen": -2.4712095260620117, |
|
"logits/rejected": -2.5324177742004395, |
|
"logps/chosen": -174.3723907470703, |
|
"logps/rejected": -206.7808837890625, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6538185477256775, |
|
"rewards/margins": 0.04219439998269081, |
|
"rewards/rejected": -0.6960129141807556, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.730436878242064e-05, |
|
"logits/chosen": -2.5648436546325684, |
|
"logits/rejected": -2.6675539016723633, |
|
"logps/chosen": -201.86000061035156, |
|
"logps/rejected": -222.20242309570312, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.752447783946991, |
|
"rewards/margins": 0.23879489302635193, |
|
"rewards/rejected": -0.9912427067756653, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7288161719522016e-05, |
|
"logits/chosen": -2.55771541595459, |
|
"logits/rejected": -2.597944974899292, |
|
"logps/chosen": -168.26791381835938, |
|
"logps/rejected": -158.33251953125, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.43088653683662415, |
|
"rewards/margins": 0.134020134806633, |
|
"rewards/rejected": -0.5649065971374512, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.727190887482783e-05, |
|
"logits/chosen": -2.801307201385498, |
|
"logits/rejected": -2.7668023109436035, |
|
"logps/chosen": -221.6161346435547, |
|
"logps/rejected": -212.2379913330078, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5888996124267578, |
|
"rewards/margins": 0.16605983674526215, |
|
"rewards/rejected": -0.7549594044685364, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.725561028172282e-05, |
|
"logits/chosen": -2.459953784942627, |
|
"logits/rejected": -2.4980850219726562, |
|
"logps/chosen": -201.91897583007812, |
|
"logps/rejected": -208.00759887695312, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6136542558670044, |
|
"rewards/margins": 0.06774169206619263, |
|
"rewards/rejected": -0.6813960075378418, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7239265973685696e-05, |
|
"logits/chosen": -2.551776647567749, |
|
"logits/rejected": -2.5754733085632324, |
|
"logps/chosen": -185.41299438476562, |
|
"logps/rejected": -187.89947509765625, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5381264090538025, |
|
"rewards/margins": 0.11568471044301987, |
|
"rewards/rejected": -0.6538110971450806, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.722287598428907e-05, |
|
"logits/chosen": -2.588721990585327, |
|
"logits/rejected": -2.629152774810791, |
|
"logps/chosen": -197.33180236816406, |
|
"logps/rejected": -211.42718505859375, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6033443808555603, |
|
"rewards/margins": 0.405744731426239, |
|
"rewards/rejected": -1.0090891122817993, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.720644034719938e-05, |
|
"logits/chosen": -2.5057499408721924, |
|
"logits/rejected": -2.6138525009155273, |
|
"logps/chosen": -175.42535400390625, |
|
"logps/rejected": -199.4925079345703, |
|
"loss": 0.723, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8974592685699463, |
|
"rewards/margins": 0.013055291026830673, |
|
"rewards/rejected": -0.9105146527290344, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7189959096176825e-05, |
|
"logits/chosen": -2.4195330142974854, |
|
"logits/rejected": -2.467151165008545, |
|
"logps/chosen": -168.0084686279297, |
|
"logps/rejected": -181.73574829101562, |
|
"loss": 0.8057, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7599891424179077, |
|
"rewards/margins": -0.08929312229156494, |
|
"rewards/rejected": -0.6706960201263428, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7173432265075334e-05, |
|
"logits/chosen": -2.511300802230835, |
|
"logits/rejected": -2.510061502456665, |
|
"logps/chosen": -195.68515014648438, |
|
"logps/rejected": -173.744140625, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8897146582603455, |
|
"rewards/margins": 0.1359187215566635, |
|
"rewards/rejected": -1.0256333351135254, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7156859887842416e-05, |
|
"logits/chosen": -2.5270705223083496, |
|
"logits/rejected": -2.5860514640808105, |
|
"logps/chosen": -211.92176818847656, |
|
"logps/rejected": -210.06216430664062, |
|
"loss": 0.7143, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7650929689407349, |
|
"rewards/margins": 0.06967158615589142, |
|
"rewards/rejected": -0.8347646594047546, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.714024199851915e-05, |
|
"logits/chosen": -2.6095833778381348, |
|
"logits/rejected": -2.683137893676758, |
|
"logps/chosen": -166.6543426513672, |
|
"logps/rejected": -197.75425720214844, |
|
"loss": 0.7672, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9517545104026794, |
|
"rewards/margins": -0.0435512438416481, |
|
"rewards/rejected": -0.9082032442092896, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.712357863124013e-05, |
|
"logits/chosen": -2.583829641342163, |
|
"logits/rejected": -2.4270806312561035, |
|
"logps/chosen": -184.97288513183594, |
|
"logps/rejected": -151.17689514160156, |
|
"loss": 0.8427, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9021130800247192, |
|
"rewards/margins": -0.22366446256637573, |
|
"rewards/rejected": -0.6784486770629883, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.710686982023332e-05, |
|
"logits/chosen": -2.6779322624206543, |
|
"logits/rejected": -2.6819846630096436, |
|
"logps/chosen": -202.7177734375, |
|
"logps/rejected": -180.97412109375, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7709564566612244, |
|
"rewards/margins": 0.1275014728307724, |
|
"rewards/rejected": -0.8984578251838684, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.709011559982006e-05, |
|
"logits/chosen": -2.5081942081451416, |
|
"logits/rejected": -2.457127094268799, |
|
"logps/chosen": -199.16038513183594, |
|
"logps/rejected": -227.66439819335938, |
|
"loss": 0.715, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8098132014274597, |
|
"rewards/margins": 0.005300614982843399, |
|
"rewards/rejected": -0.8151137232780457, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.707331600441495e-05, |
|
"logits/chosen": -2.5350513458251953, |
|
"logits/rejected": -2.498892307281494, |
|
"logps/chosen": -169.93370056152344, |
|
"logps/rejected": -166.59365844726562, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6930581331253052, |
|
"rewards/margins": 0.06999292969703674, |
|
"rewards/rejected": -0.7630510330200195, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.705647106852581e-05, |
|
"logits/chosen": -2.5681025981903076, |
|
"logits/rejected": -2.5515594482421875, |
|
"logps/chosen": -232.0432586669922, |
|
"logps/rejected": -224.9477081298828, |
|
"loss": 0.7262, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9011371731758118, |
|
"rewards/margins": 0.08735474199056625, |
|
"rewards/rejected": -0.988491952419281, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7039580826753564e-05, |
|
"logits/chosen": -2.538968563079834, |
|
"logits/rejected": -2.5133233070373535, |
|
"logps/chosen": -158.14520263671875, |
|
"logps/rejected": -201.77728271484375, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6365475654602051, |
|
"rewards/margins": 0.2535760998725891, |
|
"rewards/rejected": -0.8901236057281494, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7022645313792235e-05, |
|
"logits/chosen": -2.5245423316955566, |
|
"logits/rejected": -2.588785171508789, |
|
"logps/chosen": -211.5054473876953, |
|
"logps/rejected": -183.34361267089844, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7383630275726318, |
|
"rewards/margins": 0.1513664424419403, |
|
"rewards/rejected": -0.8897294998168945, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.700566456442882e-05, |
|
"logits/chosen": -2.426429033279419, |
|
"logits/rejected": -2.4127559661865234, |
|
"logps/chosen": -170.16091918945312, |
|
"logps/rejected": -181.26731872558594, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.555845320224762, |
|
"rewards/margins": 0.09964090585708618, |
|
"rewards/rejected": -0.6554862260818481, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.6988638613543216e-05, |
|
"logits/chosen": -2.5047028064727783, |
|
"logits/rejected": -2.6594462394714355, |
|
"logps/chosen": -192.5770263671875, |
|
"logps/rejected": -196.17718505859375, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6389065384864807, |
|
"rewards/margins": 0.1103520542383194, |
|
"rewards/rejected": -0.7492585778236389, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.6971567496108206e-05, |
|
"logits/chosen": -2.4204087257385254, |
|
"logits/rejected": -2.4866108894348145, |
|
"logps/chosen": -186.6705780029297, |
|
"logps/rejected": -183.05654907226562, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7241983413696289, |
|
"rewards/margins": 0.062242452055215836, |
|
"rewards/rejected": -0.7864408493041992, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.695445124718931e-05, |
|
"logits/chosen": -2.599712610244751, |
|
"logits/rejected": -2.678144693374634, |
|
"logps/chosen": -171.8934326171875, |
|
"logps/rejected": -187.55401611328125, |
|
"loss": 0.7119, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8643355369567871, |
|
"rewards/margins": 0.07741285860538483, |
|
"rewards/rejected": -0.9417483806610107, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.693728990194479e-05, |
|
"logits/chosen": -2.5691065788269043, |
|
"logits/rejected": -2.515324592590332, |
|
"logps/chosen": -175.92041015625, |
|
"logps/rejected": -169.9009552001953, |
|
"loss": 0.7915, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7048065662384033, |
|
"rewards/margins": -0.13590610027313232, |
|
"rewards/rejected": -0.5689005851745605, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.692008349562551e-05, |
|
"logits/chosen": -2.5656256675720215, |
|
"logits/rejected": -2.5296895503997803, |
|
"logps/chosen": -181.8729248046875, |
|
"logps/rejected": -166.05845642089844, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6643526554107666, |
|
"rewards/margins": 0.1753290742635727, |
|
"rewards/rejected": -0.8396817445755005, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.690283206357491e-05, |
|
"logits/chosen": -2.6595234870910645, |
|
"logits/rejected": -2.6016488075256348, |
|
"logps/chosen": -227.77488708496094, |
|
"logps/rejected": -197.7751007080078, |
|
"loss": 0.7501, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9334564208984375, |
|
"rewards/margins": 0.008943833410739899, |
|
"rewards/rejected": -0.942400336265564, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.6885535641228904e-05, |
|
"logits/chosen": -2.6872832775115967, |
|
"logits/rejected": -2.6639106273651123, |
|
"logps/chosen": -198.11798095703125, |
|
"logps/rejected": -215.65846252441406, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.661568284034729, |
|
"rewards/margins": 0.10554268211126328, |
|
"rewards/rejected": -0.7671110033988953, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.6868194264115833e-05, |
|
"logits/chosen": -2.6474506855010986, |
|
"logits/rejected": -2.6153347492218018, |
|
"logps/chosen": -179.79026794433594, |
|
"logps/rejected": -181.140869140625, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7167541980743408, |
|
"rewards/margins": 0.10385677218437195, |
|
"rewards/rejected": -0.8206108808517456, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.685080796785637e-05, |
|
"logits/chosen": -2.6611721515655518, |
|
"logits/rejected": -2.5946366786956787, |
|
"logps/chosen": -175.3307647705078, |
|
"logps/rejected": -182.19512939453125, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5467984080314636, |
|
"rewards/margins": 0.12983733415603638, |
|
"rewards/rejected": -0.6766356825828552, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.683337678816345e-05, |
|
"logits/chosen": -2.564751148223877, |
|
"logits/rejected": -2.610701322555542, |
|
"logps/chosen": -160.96405029296875, |
|
"logps/rejected": -211.45936584472656, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6448365449905396, |
|
"rewards/margins": 0.34849783778190613, |
|
"rewards/rejected": -0.9933344125747681, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.6815900760842236e-05, |
|
"logits/chosen": -2.5778658390045166, |
|
"logits/rejected": -2.650463104248047, |
|
"logps/chosen": -178.8852081298828, |
|
"logps/rejected": -184.25819396972656, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6312435865402222, |
|
"rewards/margins": 0.21195663511753082, |
|
"rewards/rejected": -0.8432002067565918, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.679837992178996e-05, |
|
"logits/chosen": -2.6255109310150146, |
|
"logits/rejected": -2.568089246749878, |
|
"logps/chosen": -186.581787109375, |
|
"logps/rejected": -179.66636657714844, |
|
"loss": 0.7966, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7909551858901978, |
|
"rewards/margins": -0.12891662120819092, |
|
"rewards/rejected": -0.6620385646820068, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.678081430699594e-05, |
|
"logits/chosen": -2.548609972000122, |
|
"logits/rejected": -2.5789599418640137, |
|
"logps/chosen": -149.14419555664062, |
|
"logps/rejected": -175.63099670410156, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6500214338302612, |
|
"rewards/margins": 0.1032710000872612, |
|
"rewards/rejected": -0.7532925009727478, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.676320395254146e-05, |
|
"logits/chosen": -2.6516225337982178, |
|
"logits/rejected": -2.5997962951660156, |
|
"logps/chosen": -179.6166229248047, |
|
"logps/rejected": -181.91600036621094, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5761557221412659, |
|
"rewards/margins": 0.09436798840761185, |
|
"rewards/rejected": -0.6705237627029419, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.674554889459968e-05, |
|
"logits/chosen": -2.547447681427002, |
|
"logits/rejected": -2.531512498855591, |
|
"logps/chosen": -158.489013671875, |
|
"logps/rejected": -158.9821319580078, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.551589846611023, |
|
"rewards/margins": 0.06521686166524887, |
|
"rewards/rejected": -0.61680668592453, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.672784916943562e-05, |
|
"logits/chosen": -2.6113603115081787, |
|
"logits/rejected": -2.7179596424102783, |
|
"logps/chosen": -161.98703002929688, |
|
"logps/rejected": -183.0166778564453, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4798649549484253, |
|
"rewards/margins": 0.25054579973220825, |
|
"rewards/rejected": -0.7304107546806335, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.6710104813406034e-05, |
|
"logits/chosen": -2.542372703552246, |
|
"logits/rejected": -2.522775650024414, |
|
"logps/chosen": -179.12545776367188, |
|
"logps/rejected": -175.42910766601562, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5295111536979675, |
|
"rewards/margins": 0.1804201453924179, |
|
"rewards/rejected": -0.7099313139915466, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.669231586295934e-05, |
|
"logits/chosen": -2.604105234146118, |
|
"logits/rejected": -2.6281673908233643, |
|
"logps/chosen": -201.89901733398438, |
|
"logps/rejected": -226.806396484375, |
|
"loss": 0.7092, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4338040053844452, |
|
"rewards/margins": 0.028335200622677803, |
|
"rewards/rejected": -0.46213918924331665, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.667448235463557e-05, |
|
"logits/chosen": -2.639315605163574, |
|
"logits/rejected": -2.572453022003174, |
|
"logps/chosen": -211.58755493164062, |
|
"logps/rejected": -191.2517547607422, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4935193359851837, |
|
"rewards/margins": 0.28121161460876465, |
|
"rewards/rejected": -0.7747309803962708, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.665660432506629e-05, |
|
"logits/chosen": -2.577928304672241, |
|
"logits/rejected": -2.5669662952423096, |
|
"logps/chosen": -200.63458251953125, |
|
"logps/rejected": -192.37644958496094, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6348070502281189, |
|
"rewards/margins": 0.1238020658493042, |
|
"rewards/rejected": -0.7586091756820679, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.6638681810974496e-05, |
|
"logits/chosen": -2.5102195739746094, |
|
"logits/rejected": -2.4570584297180176, |
|
"logps/chosen": -184.1721954345703, |
|
"logps/rejected": -186.46142578125, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5391435027122498, |
|
"rewards/margins": 0.12175430357456207, |
|
"rewards/rejected": -0.6608977913856506, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.6620714849174576e-05, |
|
"logits/chosen": -2.5037519931793213, |
|
"logits/rejected": -2.514277219772339, |
|
"logps/chosen": -221.79428100585938, |
|
"logps/rejected": -226.41201782226562, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4555547833442688, |
|
"rewards/margins": 0.06287840008735657, |
|
"rewards/rejected": -0.5184332132339478, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.660270347657219e-05, |
|
"logits/chosen": -2.4214844703674316, |
|
"logits/rejected": -2.451406717300415, |
|
"logps/chosen": -164.8655242919922, |
|
"logps/rejected": -202.42665100097656, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4477751851081848, |
|
"rewards/margins": 0.17700761556625366, |
|
"rewards/rejected": -0.6247828006744385, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.658464773016428e-05, |
|
"logits/chosen": -2.6548826694488525, |
|
"logits/rejected": -2.7413270473480225, |
|
"logps/chosen": -203.27706909179688, |
|
"logps/rejected": -195.96438598632812, |
|
"loss": 0.7196, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6162900924682617, |
|
"rewards/margins": 0.04476276412606239, |
|
"rewards/rejected": -0.6610528230667114, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6566547647038864e-05, |
|
"logits/chosen": -2.6256935596466064, |
|
"logits/rejected": -2.544764757156372, |
|
"logps/chosen": -197.11715698242188, |
|
"logps/rejected": -174.37977600097656, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7993966937065125, |
|
"rewards/margins": 0.2561304569244385, |
|
"rewards/rejected": -1.0555272102355957, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6548403264375074e-05, |
|
"logits/chosen": -2.4777655601501465, |
|
"logits/rejected": -2.466064691543579, |
|
"logps/chosen": -206.86097717285156, |
|
"logps/rejected": -222.15383911132812, |
|
"loss": 0.8362, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8698564767837524, |
|
"rewards/margins": -0.1482725888490677, |
|
"rewards/rejected": -0.721583902835846, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6530214619443037e-05, |
|
"logits/chosen": -2.5265445709228516, |
|
"logits/rejected": -2.5728039741516113, |
|
"logps/chosen": -205.71484375, |
|
"logps/rejected": -193.68008422851562, |
|
"loss": 0.7739, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6624203324317932, |
|
"rewards/margins": -0.09644677489995956, |
|
"rewards/rejected": -0.5659735202789307, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6511981749603775e-05, |
|
"logits/chosen": -2.4683680534362793, |
|
"logits/rejected": -2.553187847137451, |
|
"logps/chosen": -188.4405975341797, |
|
"logps/rejected": -206.36973571777344, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.44957125186920166, |
|
"rewards/margins": 0.21964800357818604, |
|
"rewards/rejected": -0.6692192554473877, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6493704692309175e-05, |
|
"logits/chosen": -2.4824113845825195, |
|
"logits/rejected": -2.4895737171173096, |
|
"logps/chosen": -173.37237548828125, |
|
"logps/rejected": -182.10430908203125, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5202856063842773, |
|
"rewards/margins": 0.12215665727853775, |
|
"rewards/rejected": -0.6424421668052673, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.647538348510189e-05, |
|
"logits/chosen": -2.6831271648406982, |
|
"logits/rejected": -2.7396373748779297, |
|
"logps/chosen": -175.16824340820312, |
|
"logps/rejected": -194.82957458496094, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6846888065338135, |
|
"rewards/margins": 0.17653468251228333, |
|
"rewards/rejected": -0.861223578453064, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.645701816561523e-05, |
|
"logits/chosen": -2.55379581451416, |
|
"logits/rejected": -2.6678338050842285, |
|
"logps/chosen": -214.75527954101562, |
|
"logps/rejected": -180.83958435058594, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6419520974159241, |
|
"rewards/margins": 0.22850137948989868, |
|
"rewards/rejected": -0.8704534769058228, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.643860877157314e-05, |
|
"logits/chosen": -2.560502529144287, |
|
"logits/rejected": -2.6059296131134033, |
|
"logps/chosen": -225.27346801757812, |
|
"logps/rejected": -238.6981658935547, |
|
"loss": 0.7312, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6071743369102478, |
|
"rewards/margins": 0.0013379361480474472, |
|
"rewards/rejected": -0.6085121631622314, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.642015534079012e-05, |
|
"logits/chosen": -2.6502907276153564, |
|
"logits/rejected": -2.4651901721954346, |
|
"logps/chosen": -194.60215759277344, |
|
"logps/rejected": -208.71707153320312, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.41506141424179077, |
|
"rewards/margins": 0.14238694310188293, |
|
"rewards/rejected": -0.5574483871459961, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.640165791117106e-05, |
|
"logits/chosen": -2.597409248352051, |
|
"logits/rejected": -2.671415090560913, |
|
"logps/chosen": -169.68240356445312, |
|
"logps/rejected": -189.24998474121094, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6197757124900818, |
|
"rewards/margins": 0.10906066745519638, |
|
"rewards/rejected": -0.7288363575935364, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.63831165207113e-05, |
|
"logits/chosen": -2.2800724506378174, |
|
"logits/rejected": -2.3521509170532227, |
|
"logps/chosen": -171.45944213867188, |
|
"logps/rejected": -175.93577575683594, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.498868465423584, |
|
"rewards/margins": 0.19707906246185303, |
|
"rewards/rejected": -0.6959475874900818, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.6364531207496426e-05, |
|
"logits/chosen": -2.549999713897705, |
|
"logits/rejected": -2.5626368522644043, |
|
"logps/chosen": -162.32720947265625, |
|
"logps/rejected": -192.87612915039062, |
|
"loss": 0.7312, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5747624635696411, |
|
"rewards/margins": 0.024262502789497375, |
|
"rewards/rejected": -0.5990250110626221, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.634590200970227e-05, |
|
"logits/chosen": -2.482072591781616, |
|
"logits/rejected": -2.6162638664245605, |
|
"logps/chosen": -176.58897399902344, |
|
"logps/rejected": -207.56654357910156, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7275586128234863, |
|
"rewards/margins": 0.14506082236766815, |
|
"rewards/rejected": -0.8726193904876709, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.632722896559481e-05, |
|
"logits/chosen": -2.668024778366089, |
|
"logits/rejected": -2.6363847255706787, |
|
"logps/chosen": -263.9557800292969, |
|
"logps/rejected": -249.15301513671875, |
|
"loss": 0.7493, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0229195356369019, |
|
"rewards/margins": -0.055171869695186615, |
|
"rewards/rejected": -0.967747688293457, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.630851211353007e-05, |
|
"logits/chosen": -2.6272599697113037, |
|
"logits/rejected": -2.644105911254883, |
|
"logps/chosen": -169.70347595214844, |
|
"logps/rejected": -194.94564819335938, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7246192693710327, |
|
"rewards/margins": 0.25421109795570374, |
|
"rewards/rejected": -0.9788303971290588, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.628975149195407e-05, |
|
"logits/chosen": -2.530022621154785, |
|
"logits/rejected": -2.5307507514953613, |
|
"logps/chosen": -165.1283721923828, |
|
"logps/rejected": -191.00559997558594, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6375387907028198, |
|
"rewards/margins": 0.26712220907211304, |
|
"rewards/rejected": -0.9046609997749329, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6270947139402744e-05, |
|
"logits/chosen": -2.54582142829895, |
|
"logits/rejected": -2.5507116317749023, |
|
"logps/chosen": -197.43624877929688, |
|
"logps/rejected": -200.06396484375, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6543585062026978, |
|
"rewards/margins": 0.08520226180553436, |
|
"rewards/rejected": -0.7395609021186829, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6252099094501834e-05, |
|
"logits/chosen": -2.502086639404297, |
|
"logits/rejected": -2.507930040359497, |
|
"logps/chosen": -192.9934539794922, |
|
"logps/rejected": -207.4755859375, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5259783864021301, |
|
"rewards/margins": 0.3163459002971649, |
|
"rewards/rejected": -0.8423242568969727, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.623320739596685e-05, |
|
"logits/chosen": -2.5075860023498535, |
|
"logits/rejected": -2.537548065185547, |
|
"logps/chosen": -180.3726348876953, |
|
"logps/rejected": -195.20867919921875, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6498013138771057, |
|
"rewards/margins": 0.19693784415721893, |
|
"rewards/rejected": -0.8467391729354858, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.621427208260296e-05, |
|
"logits/chosen": -2.5124759674072266, |
|
"logits/rejected": -2.5460541248321533, |
|
"logps/chosen": -170.8051300048828, |
|
"logps/rejected": -183.51568603515625, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7085085511207581, |
|
"rewards/margins": 0.24482092261314392, |
|
"rewards/rejected": -0.9533295035362244, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6195293193304915e-05, |
|
"logits/chosen": -2.568577289581299, |
|
"logits/rejected": -2.730349063873291, |
|
"logps/chosen": -157.42147827148438, |
|
"logps/rejected": -204.0237274169922, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7542508244514465, |
|
"rewards/margins": 0.18506285548210144, |
|
"rewards/rejected": -0.9393137693405151, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6176270767056976e-05, |
|
"logits/chosen": -2.658346652984619, |
|
"logits/rejected": -2.697397232055664, |
|
"logps/chosen": -160.69436645507812, |
|
"logps/rejected": -182.4514923095703, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5536147952079773, |
|
"rewards/margins": 0.21064530313014984, |
|
"rewards/rejected": -0.7642600536346436, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.615720484293286e-05, |
|
"logits/chosen": -2.621380090713501, |
|
"logits/rejected": -2.5975751876831055, |
|
"logps/chosen": -172.44583129882812, |
|
"logps/rejected": -168.9969482421875, |
|
"loss": 0.7824, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7546152472496033, |
|
"rewards/margins": -0.09415875375270844, |
|
"rewards/rejected": -0.6604564785957336, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.613809546009558e-05, |
|
"logits/chosen": -2.5632882118225098, |
|
"logits/rejected": -2.528463840484619, |
|
"logps/chosen": -174.4139404296875, |
|
"logps/rejected": -178.07061767578125, |
|
"loss": 0.7851, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.783379077911377, |
|
"rewards/margins": -0.13342618942260742, |
|
"rewards/rejected": -0.64995276927948, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.611894265779748e-05, |
|
"logits/chosen": -2.6725411415100098, |
|
"logits/rejected": -2.6337125301361084, |
|
"logps/chosen": -158.43734741210938, |
|
"logps/rejected": -149.51394653320312, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7037851810455322, |
|
"rewards/margins": 0.15217718482017517, |
|
"rewards/rejected": -0.8559622764587402, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.609974647538003e-05, |
|
"logits/chosen": -2.5839614868164062, |
|
"logits/rejected": -2.5436882972717285, |
|
"logps/chosen": -186.00880432128906, |
|
"logps/rejected": -176.95623779296875, |
|
"loss": 0.6352, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7761020660400391, |
|
"rewards/margins": 0.2549971640110016, |
|
"rewards/rejected": -1.0310992002487183, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.608050695227385e-05, |
|
"logits/chosen": -2.663245439529419, |
|
"logits/rejected": -2.629906415939331, |
|
"logps/chosen": -187.883056640625, |
|
"logps/rejected": -181.8142852783203, |
|
"loss": 0.727, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8239506483078003, |
|
"rewards/margins": 0.07172393053770065, |
|
"rewards/rejected": -0.8956745862960815, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.606122412799857e-05, |
|
"logits/chosen": -2.864988327026367, |
|
"logits/rejected": -2.8613595962524414, |
|
"logps/chosen": -179.66754150390625, |
|
"logps/rejected": -183.17547607421875, |
|
"loss": 0.7754, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7847499847412109, |
|
"rewards/margins": -0.03732617199420929, |
|
"rewards/rejected": -0.7474238872528076, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.6041898042162764e-05, |
|
"logits/chosen": -2.67081618309021, |
|
"logits/rejected": -2.630650281906128, |
|
"logps/chosen": -184.00350952148438, |
|
"logps/rejected": -213.1297607421875, |
|
"loss": 0.7759, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9428163170814514, |
|
"rewards/margins": -0.10311193764209747, |
|
"rewards/rejected": -0.8397043347358704, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.602252873446386e-05, |
|
"logits/chosen": -2.5779166221618652, |
|
"logits/rejected": -2.657582998275757, |
|
"logps/chosen": -255.50413513183594, |
|
"logps/rejected": -295.5150146484375, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6901088356971741, |
|
"rewards/margins": 0.2417818158864975, |
|
"rewards/rejected": -0.9318906664848328, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.60031162446881e-05, |
|
"logits/chosen": -2.6082730293273926, |
|
"logits/rejected": -2.7072064876556396, |
|
"logps/chosen": -186.39288330078125, |
|
"logps/rejected": -197.77581787109375, |
|
"loss": 0.7185, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8791942596435547, |
|
"rewards/margins": -0.021152300760149956, |
|
"rewards/rejected": -0.8580418825149536, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|