|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 39.5, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": 0.6487364768981934, |
|
"logits/rejected": 0.7462336421012878, |
|
"logps/chosen": -331.4830322265625, |
|
"logps/rejected": -289.36822509765625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.3968749940395355, |
|
"rewards/chosen": 0.0010190431494265795, |
|
"rewards/margins": 0.0010569205041974783, |
|
"rewards/rejected": -3.78774493583478e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 36.25, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": 0.7357114553451538, |
|
"logits/rejected": 0.7462834715843201, |
|
"logps/chosen": -358.1224670410156, |
|
"logps/rejected": -318.0662536621094, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0003305533900856972, |
|
"rewards/margins": 0.005077774170786142, |
|
"rewards/rejected": -0.00474722171202302, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 39.75, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": 0.7648854851722717, |
|
"logits/rejected": 0.8517886400222778, |
|
"logps/chosen": -351.1484375, |
|
"logps/rejected": -318.3990783691406, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0017980957636609674, |
|
"rewards/margins": 0.005581674166023731, |
|
"rewards/rejected": -0.0037835785187780857, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 37.25, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": 0.7295006513595581, |
|
"logits/rejected": 0.7714008688926697, |
|
"logps/chosen": -319.7439880371094, |
|
"logps/rejected": -278.13323974609375, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.010593170300126076, |
|
"rewards/margins": 0.023843348026275635, |
|
"rewards/rejected": -0.013250177726149559, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 32.75, |
|
"learning_rate": 5.208333333333334e-06, |
|
"logits/chosen": 0.685738205909729, |
|
"logits/rejected": 0.7062253952026367, |
|
"logps/chosen": -320.6109313964844, |
|
"logps/rejected": -284.9264831542969, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.0017792358994483948, |
|
"rewards/margins": 0.04503456503152847, |
|
"rewards/rejected": -0.04325533285737038, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 36.5, |
|
"learning_rate": 6.25e-06, |
|
"logits/chosen": 0.6452481746673584, |
|
"logits/rejected": 0.7145572304725647, |
|
"logps/chosen": -337.2304382324219, |
|
"logps/rejected": -300.0358581542969, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.002510682214051485, |
|
"rewards/margins": 0.07026001065969467, |
|
"rewards/rejected": -0.07277069240808487, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 33.25, |
|
"learning_rate": 7.291666666666667e-06, |
|
"logits/chosen": 0.8339419364929199, |
|
"logits/rejected": 0.8587329983711243, |
|
"logps/chosen": -325.0736999511719, |
|
"logps/rejected": -313.72161865234375, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.022171273827552795, |
|
"rewards/margins": 0.10362186282873154, |
|
"rewards/rejected": -0.12579312920570374, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 35.25, |
|
"learning_rate": 8.333333333333334e-06, |
|
"logits/chosen": 0.7884281873703003, |
|
"logits/rejected": 0.9599426984786987, |
|
"logps/chosen": -309.9776306152344, |
|
"logps/rejected": -280.77960205078125, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03900856524705887, |
|
"rewards/margins": 0.13111677765846252, |
|
"rewards/rejected": -0.1701253354549408, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 36.5, |
|
"learning_rate": 9.375000000000001e-06, |
|
"logits/chosen": 0.7847155928611755, |
|
"logits/rejected": 0.7387144565582275, |
|
"logps/chosen": -330.1953430175781, |
|
"logps/rejected": -277.9568176269531, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.05813335254788399, |
|
"rewards/margins": 0.1728575974702835, |
|
"rewards/rejected": -0.230990931391716, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 29.25, |
|
"learning_rate": 9.999880027023295e-06, |
|
"logits/chosen": 0.4318479001522064, |
|
"logits/rejected": 0.5929848551750183, |
|
"logps/chosen": -327.99481201171875, |
|
"logps/rejected": -298.76483154296875, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.12621721625328064, |
|
"rewards/margins": 0.2853361964225769, |
|
"rewards/rejected": -0.41155338287353516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 30.5, |
|
"learning_rate": 9.998530397154684e-06, |
|
"logits/chosen": 0.4563142657279968, |
|
"logits/rejected": 0.5782237648963928, |
|
"logps/chosen": -325.25836181640625, |
|
"logps/rejected": -314.2166748046875, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.18687985837459564, |
|
"rewards/margins": 0.2569567561149597, |
|
"rewards/rejected": -0.44383659958839417, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 36.0, |
|
"learning_rate": 9.995681577335256e-06, |
|
"logits/chosen": 0.3232671320438385, |
|
"logits/rejected": 0.4357617497444153, |
|
"logps/chosen": -340.3876037597656, |
|
"logps/rejected": -319.13226318359375, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.3543888330459595, |
|
"rewards/margins": 0.3290286958217621, |
|
"rewards/rejected": -0.6834174990653992, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 56.5, |
|
"learning_rate": 9.99133442200056e-06, |
|
"logits/chosen": 0.2823497951030731, |
|
"logits/rejected": 0.4065135419368744, |
|
"logps/chosen": -354.84442138671875, |
|
"logps/rejected": -311.0687561035156, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.40330594778060913, |
|
"rewards/margins": 0.3464129567146301, |
|
"rewards/rejected": -0.749718964099884, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 37.5, |
|
"learning_rate": 9.985490234976132e-06, |
|
"logits/chosen": 0.4105094075202942, |
|
"logits/rejected": 0.4262070059776306, |
|
"logps/chosen": -345.6540222167969, |
|
"logps/rejected": -291.80731201171875, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.41038641333580017, |
|
"rewards/margins": 0.41181617975234985, |
|
"rewards/rejected": -0.8222025632858276, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 33.25, |
|
"learning_rate": 9.978150769086457e-06, |
|
"logits/chosen": 0.3375703692436218, |
|
"logits/rejected": 0.4788056015968323, |
|
"logps/chosen": -340.313232421875, |
|
"logps/rejected": -304.1383361816406, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.42093929648399353, |
|
"rewards/margins": 0.3101397156715393, |
|
"rewards/rejected": -0.7310789823532104, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 36.75, |
|
"learning_rate": 9.96931822562924e-06, |
|
"logits/chosen": 0.2715611755847931, |
|
"logits/rejected": 0.3587436079978943, |
|
"logps/chosen": -352.6816101074219, |
|
"logps/rejected": -338.83575439453125, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.332112193107605, |
|
"rewards/margins": 0.29170310497283936, |
|
"rewards/rejected": -0.6238153576850891, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 34.0, |
|
"learning_rate": 9.958995253715193e-06, |
|
"logits/chosen": 0.3311766982078552, |
|
"logits/rejected": 0.3224788010120392, |
|
"logps/chosen": -357.66912841796875, |
|
"logps/rejected": -316.62017822265625, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.31313422322273254, |
|
"rewards/margins": 0.3176955580711365, |
|
"rewards/rejected": -0.6308297514915466, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 38.25, |
|
"learning_rate": 9.947184949473478e-06, |
|
"logits/chosen": 0.23619632422924042, |
|
"logits/rejected": 0.2766094505786896, |
|
"logps/chosen": -343.662109375, |
|
"logps/rejected": -298.932373046875, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.472648948431015, |
|
"rewards/margins": 0.3989109396934509, |
|
"rewards/rejected": -0.8715597987174988, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 41.5, |
|
"learning_rate": 9.933890855123114e-06, |
|
"logits/chosen": 0.1247895210981369, |
|
"logits/rejected": 0.18231646716594696, |
|
"logps/chosen": -373.7274169921875, |
|
"logps/rejected": -356.18646240234375, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.8531802892684937, |
|
"rewards/margins": 0.3459845781326294, |
|
"rewards/rejected": -1.199164867401123, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 43.75, |
|
"learning_rate": 9.919116957910566e-06, |
|
"logits/chosen": 0.10570982843637466, |
|
"logits/rejected": 0.11052794754505157, |
|
"logps/chosen": -349.4450988769531, |
|
"logps/rejected": -290.8612365722656, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.6416983604431152, |
|
"rewards/margins": 0.45599108934402466, |
|
"rewards/rejected": -1.0976893901824951, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 38.5, |
|
"learning_rate": 9.902867688913869e-06, |
|
"logits/chosen": 0.3035932779312134, |
|
"logits/rejected": 0.3483582139015198, |
|
"logps/chosen": -353.8571472167969, |
|
"logps/rejected": -306.32904052734375, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.2641884386539459, |
|
"rewards/margins": 0.4089083671569824, |
|
"rewards/rejected": -0.6730968356132507, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 41.0, |
|
"learning_rate": 9.885147921713621e-06, |
|
"logits/chosen": 0.12661756575107574, |
|
"logits/rejected": 0.20708250999450684, |
|
"logps/chosen": -325.804443359375, |
|
"logps/rejected": -309.0091247558594, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.2193823605775833, |
|
"rewards/margins": 0.4421941637992859, |
|
"rewards/rejected": -0.6615765690803528, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 30.625, |
|
"learning_rate": 9.865962970931287e-06, |
|
"logits/chosen": 0.3643207848072052, |
|
"logits/rejected": 0.4040307402610779, |
|
"logps/chosen": -358.22705078125, |
|
"logps/rejected": -312.9072570800781, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.3125769793987274, |
|
"rewards/margins": 0.4290243983268738, |
|
"rewards/rejected": -0.7416013479232788, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 35.0, |
|
"learning_rate": 9.845318590635186e-06, |
|
"logits/chosen": 0.44897300004959106, |
|
"logits/rejected": 0.6014218330383301, |
|
"logps/chosen": -352.2749328613281, |
|
"logps/rejected": -309.62176513671875, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.27151674032211304, |
|
"rewards/margins": 0.4776983857154846, |
|
"rewards/rejected": -0.7492151260375977, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 35.25, |
|
"learning_rate": 9.823220972614712e-06, |
|
"logits/chosen": 0.3287668228149414, |
|
"logits/rejected": 0.40800076723098755, |
|
"logps/chosen": -367.3862609863281, |
|
"logps/rejected": -299.0010986328125, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4212459921836853, |
|
"rewards/margins": 0.4778348505496979, |
|
"rewards/rejected": -0.8990808725357056, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 39.75, |
|
"learning_rate": 9.79967674452324e-06, |
|
"logits/chosen": 0.2960013747215271, |
|
"logits/rejected": 0.3838788866996765, |
|
"logps/chosen": -350.2323303222656, |
|
"logps/rejected": -340.3704528808594, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5522688627243042, |
|
"rewards/margins": 0.5364810228347778, |
|
"rewards/rejected": -1.0887497663497925, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 36.25, |
|
"learning_rate": 9.774692967890332e-06, |
|
"logits/chosen": 0.13149850070476532, |
|
"logits/rejected": 0.1432052105665207, |
|
"logps/chosen": -356.21356201171875, |
|
"logps/rejected": -323.2649230957031, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.3824268877506256, |
|
"rewards/margins": 0.4610356390476227, |
|
"rewards/rejected": -0.8434625864028931, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 35.75, |
|
"learning_rate": 9.74827713600379e-06, |
|
"logits/chosen": 0.245396226644516, |
|
"logits/rejected": 0.37196552753448486, |
|
"logps/chosen": -308.9268798828125, |
|
"logps/rejected": -277.8331298828125, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.23228350281715393, |
|
"rewards/margins": 0.37543928623199463, |
|
"rewards/rejected": -0.6077227592468262, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 30.875, |
|
"learning_rate": 9.720437171662232e-06, |
|
"logits/chosen": 0.3768264651298523, |
|
"logits/rejected": 0.4466184675693512, |
|
"logps/chosen": -336.4764099121094, |
|
"logps/rejected": -312.9964294433594, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.21132774651050568, |
|
"rewards/margins": 0.4387349486351013, |
|
"rewards/rejected": -0.6500626802444458, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 33.5, |
|
"learning_rate": 9.691181424798825e-06, |
|
"logits/chosen": 0.27258244156837463, |
|
"logits/rejected": 0.24488012492656708, |
|
"logps/chosen": -327.12249755859375, |
|
"logps/rejected": -303.77685546875, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.48688727617263794, |
|
"rewards/margins": 0.43369269371032715, |
|
"rewards/rejected": -0.9205799102783203, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 38.25, |
|
"learning_rate": 9.660518669976936e-06, |
|
"logits/chosen": 0.22490909695625305, |
|
"logits/rejected": 0.3626439869403839, |
|
"logps/chosen": -359.6585998535156, |
|
"logps/rejected": -312.7129821777344, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.6881123781204224, |
|
"rewards/margins": 0.4073651432991028, |
|
"rewards/rejected": -1.0954773426055908, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 33.75, |
|
"learning_rate": 9.628458103758403e-06, |
|
"logits/chosen": 0.18019357323646545, |
|
"logits/rejected": 0.30855241417884827, |
|
"logps/chosen": -371.63665771484375, |
|
"logps/rejected": -335.8221435546875, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6700717210769653, |
|
"rewards/margins": 0.5360826253890991, |
|
"rewards/rejected": -1.2061543464660645, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 39.25, |
|
"learning_rate": 9.595009341945246e-06, |
|
"logits/chosen": 0.13094857335090637, |
|
"logits/rejected": 0.19792452454566956, |
|
"logps/chosen": -334.1517639160156, |
|
"logps/rejected": -320.565673828125, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.8788414001464844, |
|
"rewards/margins": 0.45465344190597534, |
|
"rewards/rejected": -1.3334949016571045, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 34.5, |
|
"learning_rate": 9.560182416695639e-06, |
|
"logits/chosen": 0.21438567340373993, |
|
"logits/rejected": 0.22045159339904785, |
|
"logps/chosen": -331.2416687011719, |
|
"logps/rejected": -329.4930419921875, |
|
"loss": 0.5683, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.6694966554641724, |
|
"rewards/margins": 0.5601910352706909, |
|
"rewards/rejected": -1.2296875715255737, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 39.0, |
|
"learning_rate": 9.523987773514999e-06, |
|
"logits/chosen": 0.14380326867103577, |
|
"logits/rejected": 0.2659078538417816, |
|
"logps/chosen": -337.06488037109375, |
|
"logps/rejected": -299.07940673828125, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4915216565132141, |
|
"rewards/margins": 0.37540850043296814, |
|
"rewards/rejected": -0.8669301271438599, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 40.5, |
|
"learning_rate": 9.486436268123112e-06, |
|
"logits/chosen": 0.1830969899892807, |
|
"logits/rejected": 0.2517342269420624, |
|
"logps/chosen": -365.96990966796875, |
|
"logps/rejected": -339.3229675292969, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.35854586958885193, |
|
"rewards/margins": 0.44347500801086426, |
|
"rewards/rejected": -0.8020208477973938, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 40.5, |
|
"learning_rate": 9.447539163198218e-06, |
|
"logits/chosen": 0.34863370656967163, |
|
"logits/rejected": 0.34325408935546875, |
|
"logps/chosen": -344.86480712890625, |
|
"logps/rejected": -307.914306640625, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.4770238399505615, |
|
"rewards/margins": 0.5033739805221558, |
|
"rewards/rejected": -0.9803977012634277, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 24.75, |
|
"learning_rate": 9.407308124999031e-06, |
|
"logits/chosen": 0.5095074772834778, |
|
"logits/rejected": 0.5166248083114624, |
|
"logps/chosen": -355.6305847167969, |
|
"logps/rejected": -334.64788818359375, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.5633832812309265, |
|
"rewards/margins": 0.515869677066803, |
|
"rewards/rejected": -1.0792529582977295, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 30.5, |
|
"learning_rate": 9.365755219865733e-06, |
|
"logits/chosen": 0.5360152125358582, |
|
"logits/rejected": 0.6312729716300964, |
|
"logps/chosen": -351.32574462890625, |
|
"logps/rejected": -332.18890380859375, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.3784104585647583, |
|
"rewards/margins": 0.6517626643180847, |
|
"rewards/rejected": -1.0301730632781982, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 40.0, |
|
"learning_rate": 9.322892910600959e-06, |
|
"logits/chosen": 0.5068603754043579, |
|
"logits/rejected": 0.7184394598007202, |
|
"logps/chosen": -324.0625, |
|
"logps/rejected": -290.60235595703125, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.39290112257003784, |
|
"rewards/margins": 0.38739123940467834, |
|
"rewards/rejected": -0.7802923917770386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 55.75, |
|
"learning_rate": 9.278734052731876e-06, |
|
"logits/chosen": 0.47780805826187134, |
|
"logits/rejected": 0.48742303252220154, |
|
"logps/chosen": -347.030517578125, |
|
"logps/rejected": -322.89068603515625, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.47796815633773804, |
|
"rewards/margins": 0.547213613986969, |
|
"rewards/rejected": -1.025181770324707, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 42.0, |
|
"learning_rate": 9.233291890654477e-06, |
|
"logits/chosen": 0.08945097029209137, |
|
"logits/rejected": 0.17587144672870636, |
|
"logps/chosen": -353.0492248535156, |
|
"logps/rejected": -310.22900390625, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.6215865612030029, |
|
"rewards/margins": 0.6318700313568115, |
|
"rewards/rejected": -1.253456711769104, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 39.5, |
|
"learning_rate": 9.186580053661238e-06, |
|
"logits/chosen": 0.19737955927848816, |
|
"logits/rejected": 0.20681536197662354, |
|
"logps/chosen": -342.05816650390625, |
|
"logps/rejected": -354.335693359375, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4559093117713928, |
|
"rewards/margins": 0.5013066530227661, |
|
"rewards/rejected": -0.9572159051895142, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 32.0, |
|
"learning_rate": 9.138612551853334e-06, |
|
"logits/chosen": 0.1024736613035202, |
|
"logits/rejected": 0.2752048373222351, |
|
"logps/chosen": -355.54364013671875, |
|
"logps/rejected": -307.53631591796875, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.20539434254169464, |
|
"rewards/margins": 0.5175925493240356, |
|
"rewards/rejected": -0.7229868769645691, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 35.5, |
|
"learning_rate": 9.089403771938651e-06, |
|
"logits/chosen": 0.1647811233997345, |
|
"logits/rejected": 0.2729242742061615, |
|
"logps/chosen": -343.55419921875, |
|
"logps/rejected": -314.5325012207031, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.2670280635356903, |
|
"rewards/margins": 0.5155874490737915, |
|
"rewards/rejected": -0.7826155424118042, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 36.75, |
|
"learning_rate": 9.038968472916831e-06, |
|
"logits/chosen": 0.14953358471393585, |
|
"logits/rejected": 0.24258370697498322, |
|
"logps/chosen": -373.007080078125, |
|
"logps/rejected": -365.58245849609375, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.530977725982666, |
|
"rewards/margins": 0.6304374933242798, |
|
"rewards/rejected": -1.1614153385162354, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 35.75, |
|
"learning_rate": 8.987321781652663e-06, |
|
"logits/chosen": 0.20829911530017853, |
|
"logits/rejected": 0.25458788871765137, |
|
"logps/chosen": -337.02142333984375, |
|
"logps/rejected": -305.6877746582031, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.9067142605781555, |
|
"rewards/margins": 0.6314893960952759, |
|
"rewards/rejected": -1.538203477859497, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 41.75, |
|
"learning_rate": 8.93447918833914e-06, |
|
"logits/chosen": 0.18115252256393433, |
|
"logits/rejected": 0.17303326725959778, |
|
"logps/chosen": -371.98431396484375, |
|
"logps/rejected": -319.89947509765625, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.8123010396957397, |
|
"rewards/margins": 0.5886048078536987, |
|
"rewards/rejected": -1.400905966758728, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 36.0, |
|
"learning_rate": 8.880456541851544e-06, |
|
"logits/chosen": 0.12780669331550598, |
|
"logits/rejected": 0.21940919756889343, |
|
"logps/chosen": -397.94189453125, |
|
"logps/rejected": -341.4785461425781, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.6672419309616089, |
|
"rewards/margins": 0.6856431365013123, |
|
"rewards/rejected": -1.3528850078582764, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 45.75, |
|
"learning_rate": 8.825270044993963e-06, |
|
"logits/chosen": 0.20649901032447815, |
|
"logits/rejected": 0.31849536299705505, |
|
"logps/chosen": -320.14593505859375, |
|
"logps/rejected": -326.33624267578125, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5974608659744263, |
|
"rewards/margins": 0.5049938559532166, |
|
"rewards/rejected": -1.1024547815322876, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 42.75, |
|
"learning_rate": 8.768936249639632e-06, |
|
"logits/chosen": 0.046025604009628296, |
|
"logits/rejected": 0.18477210402488708, |
|
"logps/chosen": -335.65899658203125, |
|
"logps/rejected": -325.29730224609375, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.6880438923835754, |
|
"rewards/margins": 0.4891757071018219, |
|
"rewards/rejected": -1.1772195100784302, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 38.75, |
|
"learning_rate": 8.711472051766606e-06, |
|
"logits/chosen": 0.12576794624328613, |
|
"logits/rejected": 0.19818930327892303, |
|
"logps/chosen": -358.3614196777344, |
|
"logps/rejected": -336.26629638671875, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5878896713256836, |
|
"rewards/margins": 0.6127403974533081, |
|
"rewards/rejected": -1.2006300687789917, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 38.5, |
|
"learning_rate": 8.652894686390205e-06, |
|
"logits/chosen": 0.18315266072750092, |
|
"logits/rejected": 0.24707746505737305, |
|
"logps/chosen": -356.6893005371094, |
|
"logps/rejected": -326.3449401855469, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.5467976331710815, |
|
"rewards/margins": 0.6014063954353333, |
|
"rewards/rejected": -1.1482040882110596, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 45.25, |
|
"learning_rate": 8.593221722393789e-06, |
|
"logits/chosen": 0.1029457077383995, |
|
"logits/rejected": 0.20355193316936493, |
|
"logps/chosen": -357.2725524902344, |
|
"logps/rejected": -325.27288818359375, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8227931261062622, |
|
"rewards/margins": 0.6153770685195923, |
|
"rewards/rejected": -1.4381701946258545, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 41.5, |
|
"learning_rate": 8.53247105725939e-06, |
|
"logits/chosen": 0.20170524716377258, |
|
"logits/rejected": 0.1971486508846283, |
|
"logps/chosen": -320.24566650390625, |
|
"logps/rejected": -294.46917724609375, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.39367663860321045, |
|
"rewards/margins": 0.6428011059761047, |
|
"rewards/rejected": -1.0364776849746704, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 42.0, |
|
"learning_rate": 8.470660911699783e-06, |
|
"logits/chosen": 0.049773894250392914, |
|
"logits/rejected": 0.12189406156539917, |
|
"logps/chosen": -332.68487548828125, |
|
"logps/rejected": -287.7740173339844, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.35799601674079895, |
|
"rewards/margins": 0.5440070629119873, |
|
"rewards/rejected": -0.9020029902458191, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 36.5, |
|
"learning_rate": 8.407809824193624e-06, |
|
"logits/chosen": 0.03806518390774727, |
|
"logits/rejected": 0.16943983733654022, |
|
"logps/chosen": -371.38580322265625, |
|
"logps/rejected": -338.66558837890625, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5821398496627808, |
|
"rewards/margins": 0.6142541170120239, |
|
"rewards/rejected": -1.1963939666748047, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 35.25, |
|
"learning_rate": 8.343936645425277e-06, |
|
"logits/chosen": 0.2618806064128876, |
|
"logits/rejected": 0.3286944329738617, |
|
"logps/chosen": -325.019287109375, |
|
"logps/rejected": -308.45611572265625, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.5636202692985535, |
|
"rewards/margins": 0.7985345721244812, |
|
"rewards/rejected": -1.3621549606323242, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 34.75, |
|
"learning_rate": 8.279060532630991e-06, |
|
"logits/chosen": 0.21167007088661194, |
|
"logits/rejected": 0.33110693097114563, |
|
"logps/chosen": -361.67559814453125, |
|
"logps/rejected": -336.69476318359375, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.8580425977706909, |
|
"rewards/margins": 0.647322416305542, |
|
"rewards/rejected": -1.5053651332855225, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 43.0, |
|
"learning_rate": 8.21320094385316e-06, |
|
"logits/chosen": 0.257051020860672, |
|
"logits/rejected": 0.3034771680831909, |
|
"logps/chosen": -373.9206848144531, |
|
"logps/rejected": -343.5843200683594, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7790796756744385, |
|
"rewards/margins": 0.5478149056434631, |
|
"rewards/rejected": -1.3268945217132568, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 41.25, |
|
"learning_rate": 8.146377632104328e-06, |
|
"logits/chosen": 0.17596812546253204, |
|
"logits/rejected": 0.34751996397972107, |
|
"logps/chosen": -384.0326843261719, |
|
"logps/rejected": -325.1558532714844, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5697289705276489, |
|
"rewards/margins": 0.7126566171646118, |
|
"rewards/rejected": -1.2823854684829712, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 38.75, |
|
"learning_rate": 8.078610639442761e-06, |
|
"logits/chosen": 0.187199205160141, |
|
"logits/rejected": 0.22407758235931396, |
|
"logps/chosen": -365.73095703125, |
|
"logps/rejected": -313.1622619628906, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.44766363501548767, |
|
"rewards/margins": 0.5626498460769653, |
|
"rewards/rejected": -1.0103133916854858, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 43.25, |
|
"learning_rate": 8.009920290961302e-06, |
|
"logits/chosen": 0.0821579247713089, |
|
"logits/rejected": 0.02954455092549324, |
|
"logps/chosen": -342.8247375488281, |
|
"logps/rejected": -329.66412353515625, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.33613795042037964, |
|
"rewards/margins": 0.6381479501724243, |
|
"rewards/rejected": -0.9742859601974487, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 41.5, |
|
"learning_rate": 7.94032718869134e-06, |
|
"logits/chosen": 0.05233108997344971, |
|
"logits/rejected": 0.01644396223127842, |
|
"logps/chosen": -358.94122314453125, |
|
"logps/rejected": -318.9773864746094, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.31456658244132996, |
|
"rewards/margins": 0.5918359160423279, |
|
"rewards/rejected": -0.906402587890625, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 33.75, |
|
"learning_rate": 7.869852205423738e-06, |
|
"logits/chosen": 0.009163993410766125, |
|
"logits/rejected": -0.007742050103843212, |
|
"logps/chosen": -342.1306457519531, |
|
"logps/rejected": -305.15631103515625, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.31523770093917847, |
|
"rewards/margins": 0.5241441130638123, |
|
"rewards/rejected": -0.8393818140029907, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 40.75, |
|
"learning_rate": 7.798516478448514e-06, |
|
"logits/chosen": -0.01641426980495453, |
|
"logits/rejected": 0.01689126156270504, |
|
"logps/chosen": -364.1932067871094, |
|
"logps/rejected": -316.0845031738281, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4109156131744385, |
|
"rewards/margins": 0.5688261389732361, |
|
"rewards/rejected": -0.9797417521476746, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 31.0, |
|
"learning_rate": 7.726341403215237e-06, |
|
"logits/chosen": 0.09717679023742676, |
|
"logits/rejected": 0.10045663267374039, |
|
"logps/chosen": -344.9303894042969, |
|
"logps/rejected": -297.6980895996094, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5955493450164795, |
|
"rewards/margins": 0.6834920644760132, |
|
"rewards/rejected": -1.2790412902832031, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 33.75, |
|
"learning_rate": 7.653348626915957e-06, |
|
"logits/chosen": 0.15363553166389465, |
|
"logits/rejected": 0.20421214401721954, |
|
"logps/chosen": -342.5677795410156, |
|
"logps/rejected": -321.9688415527344, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.6576627492904663, |
|
"rewards/margins": 0.7189033627510071, |
|
"rewards/rejected": -1.3765661716461182, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 32.75, |
|
"learning_rate": 7.5795600419926595e-06, |
|
"logits/chosen": 0.2883613705635071, |
|
"logits/rejected": 0.2674182951450348, |
|
"logps/chosen": -353.3540344238281, |
|
"logps/rejected": -311.6170959472656, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.47711777687072754, |
|
"rewards/margins": 0.6064115762710571, |
|
"rewards/rejected": -1.0835294723510742, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 31.5, |
|
"learning_rate": 7.504997779571134e-06, |
|
"logits/chosen": 0.3076362907886505, |
|
"logits/rejected": 0.3693523108959198, |
|
"logps/chosen": -341.3320007324219, |
|
"logps/rejected": -311.7213439941406, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3323260247707367, |
|
"rewards/margins": 0.46539098024368286, |
|
"rewards/rejected": -0.7977169752120972, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 41.75, |
|
"learning_rate": 7.429684202823284e-06, |
|
"logits/chosen": 0.24711641669273376, |
|
"logits/rejected": 0.15847380459308624, |
|
"logps/chosen": -364.9874572753906, |
|
"logps/rejected": -315.2619934082031, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3125119209289551, |
|
"rewards/margins": 0.6405046582221985, |
|
"rewards/rejected": -0.9530165791511536, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 38.75, |
|
"learning_rate": 7.353641900259823e-06, |
|
"logits/chosen": 0.26350438594818115, |
|
"logits/rejected": 0.20197024941444397, |
|
"logps/chosen": -346.3494567871094, |
|
"logps/rejected": -314.46673583984375, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5180891752243042, |
|
"rewards/margins": 0.6086361408233643, |
|
"rewards/rejected": -1.1267253160476685, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 43.25, |
|
"learning_rate": 7.276893678955387e-06, |
|
"logits/chosen": 0.06701471656560898, |
|
"logits/rejected": 0.2811892330646515, |
|
"logps/chosen": -367.32171630859375, |
|
"logps/rejected": -336.2926330566406, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.7037767171859741, |
|
"rewards/margins": 0.647846519947052, |
|
"rewards/rejected": -1.3516231775283813, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 31.0, |
|
"learning_rate": 7.199462557708098e-06, |
|
"logits/chosen": 0.059072770178318024, |
|
"logits/rejected": 0.15490247309207916, |
|
"logps/chosen": -310.3693542480469, |
|
"logps/rejected": -300.7638244628906, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.6676520705223083, |
|
"rewards/margins": 0.6050979495048523, |
|
"rewards/rejected": -1.2727501392364502, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 37.25, |
|
"learning_rate": 7.1213717601356245e-06, |
|
"logits/chosen": 0.04896034672856331, |
|
"logits/rejected": 0.07459446787834167, |
|
"logps/chosen": -362.3270568847656, |
|
"logps/rejected": -325.6581115722656, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5440580248832703, |
|
"rewards/margins": 0.6718584299087524, |
|
"rewards/rejected": -1.2159165143966675, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 35.75, |
|
"learning_rate": 7.042644707709816e-06, |
|
"logits/chosen": 0.028451938182115555, |
|
"logits/rejected": 0.1025298684835434, |
|
"logps/chosen": -352.1075134277344, |
|
"logps/rejected": -334.69586181640625, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5233157873153687, |
|
"rewards/margins": 0.5710444450378418, |
|
"rewards/rejected": -1.0943602323532104, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 48.5, |
|
"learning_rate": 6.963305012731984e-06, |
|
"logits/chosen": 0.03199886158108711, |
|
"logits/rejected": 0.04575660824775696, |
|
"logps/chosen": -304.0777893066406, |
|
"logps/rejected": -298.5992126464844, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.4543026089668274, |
|
"rewards/margins": 0.5623122453689575, |
|
"rewards/rejected": -1.0166150331497192, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 44.25, |
|
"learning_rate": 6.8833764712509554e-06, |
|
"logits/chosen": 0.07288331538438797, |
|
"logits/rejected": 0.12292104959487915, |
|
"logps/chosen": -316.65643310546875, |
|
"logps/rejected": -302.11065673828125, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.31171008944511414, |
|
"rewards/margins": 0.5113183259963989, |
|
"rewards/rejected": -0.8230283856391907, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 40.5, |
|
"learning_rate": 6.802883055926026e-06, |
|
"logits/chosen": 0.010061124339699745, |
|
"logits/rejected": 0.09118668735027313, |
|
"logps/chosen": -333.7193603515625, |
|
"logps/rejected": -297.7068786621094, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.29549041390419006, |
|
"rewards/margins": 0.7173663973808289, |
|
"rewards/rejected": -1.0128569602966309, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 44.0, |
|
"learning_rate": 6.721848908836921e-06, |
|
"logits/chosen": 0.02005094103515148, |
|
"logits/rejected": 0.028864353895187378, |
|
"logps/chosen": -380.8367614746094, |
|
"logps/rejected": -323.69317626953125, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.44863682985305786, |
|
"rewards/margins": 0.7399032115936279, |
|
"rewards/rejected": -1.188539981842041, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 33.5, |
|
"learning_rate": 6.640298334242959e-06, |
|
"logits/chosen": -0.040520161390304565, |
|
"logits/rejected": 0.05559789016842842, |
|
"logps/chosen": -326.5780029296875, |
|
"logps/rejected": -323.2212219238281, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.7126988172531128, |
|
"rewards/margins": 0.7181462049484253, |
|
"rewards/rejected": -1.430845022201538, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 43.75, |
|
"learning_rate": 6.558255791293572e-06, |
|
"logits/chosen": -0.013416662812232971, |
|
"logits/rejected": 0.10103078186511993, |
|
"logps/chosen": -359.6937255859375, |
|
"logps/rejected": -331.66778564453125, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.7645487189292908, |
|
"rewards/margins": 0.6964177489280701, |
|
"rewards/rejected": -1.4609664678573608, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 34.25, |
|
"learning_rate": 6.475745886692361e-06, |
|
"logits/chosen": 0.08711127936840057, |
|
"logits/rejected": 0.07562930881977081, |
|
"logps/chosen": -356.294189453125, |
|
"logps/rejected": -345.10723876953125, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.7678012251853943, |
|
"rewards/margins": 0.7699213027954102, |
|
"rewards/rejected": -1.5377224683761597, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 43.75, |
|
"learning_rate": 6.392793367316905e-06, |
|
"logits/chosen": -0.004989707376807928, |
|
"logits/rejected": 0.02837694250047207, |
|
"logps/chosen": -347.4369812011719, |
|
"logps/rejected": -331.71673583984375, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.7738672494888306, |
|
"rewards/margins": 0.6914864778518677, |
|
"rewards/rejected": -1.4653537273406982, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 38.5, |
|
"learning_rate": 6.309423112796529e-06, |
|
"logits/chosen": 0.01806572452187538, |
|
"logits/rejected": 0.247653529047966, |
|
"logps/chosen": -328.3951110839844, |
|
"logps/rejected": -334.0901794433594, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.7129162549972534, |
|
"rewards/margins": 0.6553431749343872, |
|
"rewards/rejected": -1.3682594299316406, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 50.25, |
|
"learning_rate": 6.225660128050248e-06, |
|
"logits/chosen": 0.07560832798480988, |
|
"logits/rejected": 0.13501077890396118, |
|
"logps/chosen": -341.61297607421875, |
|
"logps/rejected": -323.997802734375, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7762230038642883, |
|
"rewards/margins": 0.6634593605995178, |
|
"rewards/rejected": -1.4396824836730957, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 42.5, |
|
"learning_rate": 6.141529535787139e-06, |
|
"logits/chosen": 0.18502117693424225, |
|
"logits/rejected": 0.2866303622722626, |
|
"logps/chosen": -381.30023193359375, |
|
"logps/rejected": -345.3155517578125, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7158933877944946, |
|
"rewards/margins": 0.7875123023986816, |
|
"rewards/rejected": -1.5034056901931763, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 39.25, |
|
"learning_rate": 6.057056568971383e-06, |
|
"logits/chosen": 0.06999083608388901, |
|
"logits/rejected": 0.14929188787937164, |
|
"logps/chosen": -363.5868225097656, |
|
"logps/rejected": -335.22454833984375, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.768639326095581, |
|
"rewards/margins": 0.8374959230422974, |
|
"rewards/rejected": -1.6061351299285889, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 36.5, |
|
"learning_rate": 5.972266563254246e-06, |
|
"logits/chosen": 0.27822867035865784, |
|
"logits/rejected": 0.2106626331806183, |
|
"logps/chosen": -391.5980224609375, |
|
"logps/rejected": -348.16497802734375, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8508477210998535, |
|
"rewards/margins": 0.7439194917678833, |
|
"rewards/rejected": -1.5947673320770264, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 33.25, |
|
"learning_rate": 5.887184949375242e-06, |
|
"logits/chosen": 0.13532599806785583, |
|
"logits/rejected": 0.22985470294952393, |
|
"logps/chosen": -346.69805908203125, |
|
"logps/rejected": -307.9049072265625, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.6407856345176697, |
|
"rewards/margins": 0.7918336391448975, |
|
"rewards/rejected": -1.432619333267212, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 36.75, |
|
"learning_rate": 5.8018372455348e-06, |
|
"logits/chosen": 0.20308074355125427, |
|
"logits/rejected": 0.22425875067710876, |
|
"logps/chosen": -364.8111572265625, |
|
"logps/rejected": -323.373291015625, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7124744057655334, |
|
"rewards/margins": 0.7094903588294983, |
|
"rewards/rejected": -1.4219647645950317, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 33.0, |
|
"learning_rate": 5.71624904974069e-06, |
|
"logits/chosen": 0.08423123508691788, |
|
"logits/rejected": 0.2612914443016052, |
|
"logps/chosen": -365.93475341796875, |
|
"logps/rejected": -344.0113830566406, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7027541399002075, |
|
"rewards/margins": 0.7311244606971741, |
|
"rewards/rejected": -1.4338784217834473, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 28.75, |
|
"learning_rate": 5.630446032130498e-06, |
|
"logits/chosen": 0.08106087893247604, |
|
"logits/rejected": 0.20746131241321564, |
|
"logps/chosen": -348.0682678222656, |
|
"logps/rejected": -333.0579833984375, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.5800120234489441, |
|
"rewards/margins": 0.6838679313659668, |
|
"rewards/rejected": -1.2638797760009766, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 41.25, |
|
"learning_rate": 5.5444539272724925e-06, |
|
"logits/chosen": 0.10578608512878418, |
|
"logits/rejected": 0.28211677074432373, |
|
"logps/chosen": -348.66021728515625, |
|
"logps/rejected": -336.518798828125, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6300241351127625, |
|
"rewards/margins": 0.74030601978302, |
|
"rewards/rejected": -1.3703300952911377, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 46.5, |
|
"learning_rate": 5.458298526447155e-06, |
|
"logits/chosen": 0.2096971720457077, |
|
"logits/rejected": 0.2496938705444336, |
|
"logps/chosen": -347.2626037597656, |
|
"logps/rejected": -311.06072998046875, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.5817705392837524, |
|
"rewards/margins": 0.5896257162094116, |
|
"rewards/rejected": -1.171396255493164, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 46.0, |
|
"learning_rate": 5.372005669911694e-06, |
|
"logits/chosen": 0.0424225851893425, |
|
"logits/rejected": 0.15929560363292694, |
|
"logps/chosen": -323.9616394042969, |
|
"logps/rejected": -302.2296142578125, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -0.5780489444732666, |
|
"rewards/margins": 0.45929765701293945, |
|
"rewards/rejected": -1.037346601486206, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 37.75, |
|
"learning_rate": 5.285601239149875e-06, |
|
"logits/chosen": 0.13957878947257996, |
|
"logits/rejected": 0.06276361644268036, |
|
"logps/chosen": -363.7620849609375, |
|
"logps/rejected": -332.8384704589844, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4892084002494812, |
|
"rewards/margins": 0.5942028760910034, |
|
"rewards/rejected": -1.0834112167358398, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 43.0, |
|
"learning_rate": 5.199111149109498e-06, |
|
"logits/chosen": 0.02179548144340515, |
|
"logits/rejected": 0.19822855293750763, |
|
"logps/chosen": -311.2777404785156, |
|
"logps/rejected": -302.4115295410156, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.624075174331665, |
|
"rewards/margins": 0.6447365880012512, |
|
"rewards/rejected": -1.2688117027282715, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 41.25, |
|
"learning_rate": 5.112561340429817e-06, |
|
"logits/chosen": 0.18798553943634033, |
|
"logits/rejected": 0.09373607486486435, |
|
"logps/chosen": -341.14532470703125, |
|
"logps/rejected": -302.0753479003906, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.6896688342094421, |
|
"rewards/margins": 0.6189961433410645, |
|
"rewards/rejected": -1.3086649179458618, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 38.5, |
|
"learning_rate": 5.0259777716612665e-06, |
|
"logits/chosen": 0.12692488729953766, |
|
"logits/rejected": 0.19669848680496216, |
|
"logps/chosen": -368.73553466796875, |
|
"logps/rejected": -342.2942810058594, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.6040617823600769, |
|
"rewards/margins": 0.5547485947608948, |
|
"rewards/rejected": -1.1588103771209717, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 44.5, |
|
"learning_rate": 4.939386411479814e-06, |
|
"logits/chosen": 0.17332792282104492, |
|
"logits/rejected": 0.23241786658763885, |
|
"logps/chosen": -363.03436279296875, |
|
"logps/rejected": -353.3703918457031, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.49267879128456116, |
|
"rewards/margins": 0.6268835067749023, |
|
"rewards/rejected": -1.1195623874664307, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 45.25, |
|
"learning_rate": 4.85281323089828e-06, |
|
"logits/chosen": 0.21631035208702087, |
|
"logits/rejected": 0.18023517727851868, |
|
"logps/chosen": -378.2434387207031, |
|
"logps/rejected": -343.40557861328125, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.60394287109375, |
|
"rewards/margins": 0.5025305151939392, |
|
"rewards/rejected": -1.106473445892334, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 39.25, |
|
"learning_rate": 4.766284195476943e-06, |
|
"logits/chosen": 0.26781684160232544, |
|
"logits/rejected": 0.3123559355735779, |
|
"logps/chosen": -358.23638916015625, |
|
"logps/rejected": -327.5035705566406, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.4111200273036957, |
|
"rewards/margins": 0.7634793519973755, |
|
"rewards/rejected": -1.1745994091033936, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 35.5, |
|
"learning_rate": 4.679825257535795e-06, |
|
"logits/chosen": 0.20860572159290314, |
|
"logits/rejected": 0.17548814415931702, |
|
"logps/chosen": -352.4187316894531, |
|
"logps/rejected": -312.301513671875, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4872250556945801, |
|
"rewards/margins": 0.6681305170059204, |
|
"rewards/rejected": -1.1553555727005005, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 39.75, |
|
"learning_rate": 4.593462348370759e-06, |
|
"logits/chosen": 0.18573978543281555, |
|
"logits/rejected": 0.301888644695282, |
|
"logps/chosen": -346.60955810546875, |
|
"logps/rejected": -321.0350036621094, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.5732883214950562, |
|
"rewards/margins": 0.6586001515388489, |
|
"rewards/rejected": -1.2318884134292603, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 34.5, |
|
"learning_rate": 4.507221370476223e-06, |
|
"logits/chosen": 0.27575668692588806, |
|
"logits/rejected": 0.2642131447792053, |
|
"logps/chosen": -355.8514099121094, |
|
"logps/rejected": -338.43194580078125, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.603151798248291, |
|
"rewards/margins": 0.6535589694976807, |
|
"rewards/rejected": -1.2567107677459717, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 27.625, |
|
"learning_rate": 4.421128189776195e-06, |
|
"logits/chosen": 0.17187847197055817, |
|
"logits/rejected": 0.30097633600234985, |
|
"logps/chosen": -317.4275817871094, |
|
"logps/rejected": -276.91058349609375, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6077526807785034, |
|
"rewards/margins": 0.6645117402076721, |
|
"rewards/rejected": -1.2722644805908203, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 29.625, |
|
"learning_rate": 4.335208627866438e-06, |
|
"logits/chosen": 0.3107313811779022, |
|
"logits/rejected": 0.3381400406360626, |
|
"logps/chosen": -345.51873779296875, |
|
"logps/rejected": -302.8568420410156, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5785449743270874, |
|
"rewards/margins": 0.72198086977005, |
|
"rewards/rejected": -1.3005257844924927, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 41.0, |
|
"learning_rate": 4.249488454269908e-06, |
|
"logits/chosen": 0.2692057490348816, |
|
"logits/rejected": 0.36852699518203735, |
|
"logps/chosen": -359.2057189941406, |
|
"logps/rejected": -338.135986328125, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.66764897108078, |
|
"rewards/margins": 0.6854020953178406, |
|
"rewards/rejected": -1.3530510663986206, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 38.75, |
|
"learning_rate": 4.163993378707786e-06, |
|
"logits/chosen": 0.23850250244140625, |
|
"logits/rejected": 0.32512611150741577, |
|
"logps/chosen": -323.40374755859375, |
|
"logps/rejected": -301.8957214355469, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.5826855897903442, |
|
"rewards/margins": 0.6959084272384644, |
|
"rewards/rejected": -1.278593897819519, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 45.0, |
|
"learning_rate": 4.0787490433884685e-06, |
|
"logits/chosen": 0.25609683990478516, |
|
"logits/rejected": 0.2937456965446472, |
|
"logps/chosen": -329.973388671875, |
|
"logps/rejected": -304.82904052734375, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6114211082458496, |
|
"rewards/margins": 0.586783766746521, |
|
"rewards/rejected": -1.1982048749923706, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 41.25, |
|
"learning_rate": 3.993781015316802e-06, |
|
"logits/chosen": 0.2453668862581253, |
|
"logits/rejected": 0.2688624858856201, |
|
"logps/chosen": -379.10552978515625, |
|
"logps/rejected": -333.52215576171875, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6501410007476807, |
|
"rewards/margins": 0.6753184199333191, |
|
"rewards/rejected": -1.3254594802856445, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 45.25, |
|
"learning_rate": 3.909114778625861e-06, |
|
"logits/chosen": 0.22569675743579865, |
|
"logits/rejected": 0.20706626772880554, |
|
"logps/chosen": -384.211181640625, |
|
"logps/rejected": -316.38995361328125, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.5756205320358276, |
|
"rewards/margins": 0.8201796412467957, |
|
"rewards/rejected": -1.3958003520965576, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 41.25, |
|
"learning_rate": 3.824775726933596e-06, |
|
"logits/chosen": 0.28916609287261963, |
|
"logits/rejected": 0.35677972435951233, |
|
"logps/chosen": -349.084716796875, |
|
"logps/rejected": -304.26617431640625, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.662204384803772, |
|
"rewards/margins": 0.72789466381073, |
|
"rewards/rejected": -1.3900991678237915, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 56.0, |
|
"learning_rate": 3.7407891557266242e-06, |
|
"logits/chosen": 0.2205003947019577, |
|
"logits/rejected": 0.27626657485961914, |
|
"logps/chosen": -340.46710205078125, |
|
"logps/rejected": -329.74786376953125, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8766458630561829, |
|
"rewards/margins": 0.6808016896247864, |
|
"rewards/rejected": -1.5574474334716797, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 43.0, |
|
"learning_rate": 3.6571802547734457e-06, |
|
"logits/chosen": 0.19416388869285583, |
|
"logits/rejected": 0.3043220043182373, |
|
"logps/chosen": -349.0753479003906, |
|
"logps/rejected": -327.81964111328125, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8548088073730469, |
|
"rewards/margins": 0.7565596103668213, |
|
"rewards/rejected": -1.6113685369491577, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 38.75, |
|
"learning_rate": 3.5739741005693807e-06, |
|
"logits/chosen": 0.26571187376976013, |
|
"logits/rejected": 0.403964102268219, |
|
"logps/chosen": -375.9601135253906, |
|
"logps/rejected": -345.172119140625, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.7909923195838928, |
|
"rewards/margins": 0.7638007998466492, |
|
"rewards/rejected": -1.5547930002212524, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 34.0, |
|
"learning_rate": 3.4911956488154696e-06, |
|
"logits/chosen": 0.22815632820129395, |
|
"logits/rejected": 0.1856929063796997, |
|
"logps/chosen": -343.6809997558594, |
|
"logps/rejected": -315.6387939453125, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8829870223999023, |
|
"rewards/margins": 0.6338706016540527, |
|
"rewards/rejected": -1.5168575048446655, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 33.5, |
|
"learning_rate": 3.4088697269336045e-06, |
|
"logits/chosen": 0.27643901109695435, |
|
"logits/rejected": 0.3212670385837555, |
|
"logps/chosen": -365.35595703125, |
|
"logps/rejected": -312.13323974609375, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.6881390810012817, |
|
"rewards/margins": 0.8743413686752319, |
|
"rewards/rejected": -1.5624804496765137, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 51.0, |
|
"learning_rate": 3.3270210266201373e-06, |
|
"logits/chosen": 0.29524120688438416, |
|
"logits/rejected": 0.3692258298397064, |
|
"logps/chosen": -351.9238586425781, |
|
"logps/rejected": -327.85919189453125, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8268892168998718, |
|
"rewards/margins": 0.6596170663833618, |
|
"rewards/rejected": -1.4865062236785889, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 32.0, |
|
"learning_rate": 3.2456740964401977e-06, |
|
"logits/chosen": 0.2849295735359192, |
|
"logits/rejected": 0.4959246516227722, |
|
"logps/chosen": -354.4574279785156, |
|
"logps/rejected": -339.2894287109375, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8398796319961548, |
|
"rewards/margins": 0.6982079744338989, |
|
"rewards/rejected": -1.5380874872207642, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 41.5, |
|
"learning_rate": 3.1648533344649303e-06, |
|
"logits/chosen": 0.19813226163387299, |
|
"logits/rejected": 0.44143158197402954, |
|
"logps/chosen": -340.7344970703125, |
|
"logps/rejected": -349.23114013671875, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8335161209106445, |
|
"rewards/margins": 0.7508944869041443, |
|
"rewards/rejected": -1.5844106674194336, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 51.5, |
|
"learning_rate": 3.084582980953881e-06, |
|
"logits/chosen": 0.2836536169052124, |
|
"logits/rejected": 0.2980581223964691, |
|
"logps/chosen": -388.7774963378906, |
|
"logps/rejected": -311.330322265625, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.7870951294898987, |
|
"rewards/margins": 0.7385523915290833, |
|
"rewards/rejected": -1.525647521018982, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 37.0, |
|
"learning_rate": 3.0048871110847043e-06, |
|
"logits/chosen": 0.25779467821121216, |
|
"logits/rejected": 0.23647575080394745, |
|
"logps/chosen": -365.7320861816406, |
|
"logps/rejected": -324.42779541015625, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.7111884951591492, |
|
"rewards/margins": 0.8184769749641418, |
|
"rewards/rejected": -1.529665470123291, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 37.75, |
|
"learning_rate": 2.925789627732395e-06, |
|
"logits/chosen": 0.2080594003200531, |
|
"logits/rejected": 0.24620768427848816, |
|
"logps/chosen": -357.9142150878906, |
|
"logps/rejected": -327.7608947753906, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7053016424179077, |
|
"rewards/margins": 0.8197522163391113, |
|
"rewards/rejected": -1.5250537395477295, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 32.75, |
|
"learning_rate": 2.8473142543001818e-06, |
|
"logits/chosen": 0.2071513682603836, |
|
"logits/rejected": 0.3076412081718445, |
|
"logps/chosen": -320.2906188964844, |
|
"logps/rejected": -300.67083740234375, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.808573842048645, |
|
"rewards/margins": 0.7147833108901978, |
|
"rewards/rejected": -1.5233570337295532, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 40.5, |
|
"learning_rate": 2.7694845276042714e-06, |
|
"logits/chosen": 0.19621708989143372, |
|
"logits/rejected": 0.25396865606307983, |
|
"logps/chosen": -360.89154052734375, |
|
"logps/rejected": -332.4544677734375, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7095572352409363, |
|
"rewards/margins": 0.8451035618782043, |
|
"rewards/rejected": -1.5546607971191406, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 42.0, |
|
"learning_rate": 2.6923237908145227e-06, |
|
"logits/chosen": 0.26621609926223755, |
|
"logits/rejected": 0.26851433515548706, |
|
"logps/chosen": -325.02984619140625, |
|
"logps/rejected": -332.8418884277344, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6964584589004517, |
|
"rewards/margins": 0.7985239028930664, |
|
"rewards/rejected": -1.4949824810028076, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 33.5, |
|
"learning_rate": 2.615855186453241e-06, |
|
"logits/chosen": 0.2269335240125656, |
|
"logits/rejected": 0.3090178966522217, |
|
"logps/chosen": -358.5539855957031, |
|
"logps/rejected": -346.3840026855469, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.6508886814117432, |
|
"rewards/margins": 0.8423940539360046, |
|
"rewards/rejected": -1.4932825565338135, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 46.25, |
|
"learning_rate": 2.5401016494541193e-06, |
|
"logits/chosen": 0.18353882431983948, |
|
"logits/rejected": 0.3234695494174957, |
|
"logps/chosen": -341.1846923828125, |
|
"logps/rejected": -330.22100830078125, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.7482860684394836, |
|
"rewards/margins": 0.7044668197631836, |
|
"rewards/rejected": -1.4527528285980225, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 53.0, |
|
"learning_rate": 2.4650859002834465e-06, |
|
"logits/chosen": 0.20721504092216492, |
|
"logits/rejected": 0.4179702699184418, |
|
"logps/chosen": -349.2493591308594, |
|
"logps/rejected": -329.89178466796875, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.7340461015701294, |
|
"rewards/margins": 0.6964015960693359, |
|
"rewards/rejected": -1.4304475784301758, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 41.0, |
|
"learning_rate": 2.390830438125661e-06, |
|
"logits/chosen": 0.12962877750396729, |
|
"logits/rejected": 0.1510620415210724, |
|
"logps/chosen": -353.7647399902344, |
|
"logps/rejected": -325.97857666015625, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.7643977403640747, |
|
"rewards/margins": 0.6256237626075745, |
|
"rewards/rejected": -1.390021562576294, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 35.5, |
|
"learning_rate": 2.3173575341352457e-06, |
|
"logits/chosen": 0.2343304455280304, |
|
"logits/rejected": 0.3055940568447113, |
|
"logps/chosen": -346.0968017578125, |
|
"logps/rejected": -324.8904724121094, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.664479672908783, |
|
"rewards/margins": 0.6432613134384155, |
|
"rewards/rejected": -1.3077409267425537, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 42.5, |
|
"learning_rate": 2.2446892247570257e-06, |
|
"logits/chosen": 0.28046920895576477, |
|
"logits/rejected": 0.32017889618873596, |
|
"logps/chosen": -351.0575866699219, |
|
"logps/rejected": -331.1203918457031, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7322802543640137, |
|
"rewards/margins": 0.6340840458869934, |
|
"rewards/rejected": -1.3663642406463623, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 33.75, |
|
"learning_rate": 2.172847305116872e-06, |
|
"logits/chosen": 0.21674051880836487, |
|
"logits/rejected": 0.2154543697834015, |
|
"logps/chosen": -347.7149353027344, |
|
"logps/rejected": -322.49041748046875, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6023791432380676, |
|
"rewards/margins": 0.6135936975479126, |
|
"rewards/rejected": -1.2159727811813354, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 35.25, |
|
"learning_rate": 2.1018533224847638e-06, |
|
"logits/chosen": 0.25386855006217957, |
|
"logits/rejected": 0.2589409351348877, |
|
"logps/chosen": -377.95098876953125, |
|
"logps/rejected": -333.69635009765625, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6216534376144409, |
|
"rewards/margins": 0.7461624145507812, |
|
"rewards/rejected": -1.3678158521652222, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 34.25, |
|
"learning_rate": 2.0317285698122035e-06, |
|
"logits/chosen": 0.16483676433563232, |
|
"logits/rejected": 0.32940173149108887, |
|
"logps/chosen": -339.90887451171875, |
|
"logps/rejected": -323.4661865234375, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5953302383422852, |
|
"rewards/margins": 0.7433405518531799, |
|
"rewards/rejected": -1.3386707305908203, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 36.5, |
|
"learning_rate": 1.962494079345906e-06, |
|
"logits/chosen": 0.09827329963445663, |
|
"logits/rejected": 0.15172025561332703, |
|
"logps/chosen": -383.4322509765625, |
|
"logps/rejected": -332.6456604003906, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6343564391136169, |
|
"rewards/margins": 0.7060499787330627, |
|
"rewards/rejected": -1.3404064178466797, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 31.5, |
|
"learning_rate": 1.8941706163196676e-06, |
|
"logits/chosen": 0.2607867121696472, |
|
"logits/rejected": 0.40171027183532715, |
|
"logps/chosen": -314.8638610839844, |
|
"logps/rejected": -309.70318603515625, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.6349200010299683, |
|
"rewards/margins": 0.6689399480819702, |
|
"rewards/rejected": -1.3038599491119385, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 32.75, |
|
"learning_rate": 1.8267786727263426e-06, |
|
"logits/chosen": 0.2444543093442917, |
|
"logits/rejected": 0.3561408519744873, |
|
"logps/chosen": -341.54486083984375, |
|
"logps/rejected": -317.6834411621094, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5884548425674438, |
|
"rewards/margins": 0.6964614987373352, |
|
"rewards/rejected": -1.2849162817001343, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 29.375, |
|
"learning_rate": 1.760338461171755e-06, |
|
"logits/chosen": 0.21381571888923645, |
|
"logits/rejected": 0.2990735173225403, |
|
"logps/chosen": -327.5777282714844, |
|
"logps/rejected": -322.8582763671875, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6773046255111694, |
|
"rewards/margins": 0.6310230493545532, |
|
"rewards/rejected": -1.3083276748657227, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 35.5, |
|
"learning_rate": 1.6948699088123992e-06, |
|
"logits/chosen": 0.21470919251441956, |
|
"logits/rejected": 0.25266513228416443, |
|
"logps/chosen": -333.81622314453125, |
|
"logps/rejected": -307.29010009765625, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.5910285711288452, |
|
"rewards/margins": 0.6486441493034363, |
|
"rewards/rejected": -1.2396726608276367, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 41.5, |
|
"learning_rate": 1.6303926513787821e-06, |
|
"logits/chosen": 0.08108960837125778, |
|
"logits/rejected": 0.06331077218055725, |
|
"logps/chosen": -339.13446044921875, |
|
"logps/rejected": -310.05657958984375, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.6054891347885132, |
|
"rewards/margins": 0.7288047075271606, |
|
"rewards/rejected": -1.3342937231063843, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 35.25, |
|
"learning_rate": 1.5669260272861426e-06, |
|
"logits/chosen": 0.20187553763389587, |
|
"logits/rejected": 0.1850132793188095, |
|
"logps/chosen": -336.2139892578125, |
|
"logps/rejected": -334.64141845703125, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5741112232208252, |
|
"rewards/margins": 0.7714685201644897, |
|
"rewards/rejected": -1.3455798625946045, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 40.0, |
|
"learning_rate": 1.5044890718343535e-06, |
|
"logits/chosen": 0.23868337273597717, |
|
"logits/rejected": 0.12508736550807953, |
|
"logps/chosen": -325.13470458984375, |
|
"logps/rejected": -315.5563049316406, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7012775540351868, |
|
"rewards/margins": 0.6083887815475464, |
|
"rewards/rejected": -1.3096662759780884, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 36.0, |
|
"learning_rate": 1.4431005114987485e-06, |
|
"logits/chosen": 0.24810700118541718, |
|
"logits/rejected": 0.24045827984809875, |
|
"logps/chosen": -395.248779296875, |
|
"logps/rejected": -352.9403381347656, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5400234460830688, |
|
"rewards/margins": 0.7289952635765076, |
|
"rewards/rejected": -1.2690187692642212, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 37.0, |
|
"learning_rate": 1.3827787583135533e-06, |
|
"logits/chosen": 0.17350813746452332, |
|
"logits/rejected": 0.3169202506542206, |
|
"logps/chosen": -348.0475769042969, |
|
"logps/rejected": -334.5708923339844, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.6126903295516968, |
|
"rewards/margins": 0.6701362133026123, |
|
"rewards/rejected": -1.2828264236450195, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 47.25, |
|
"learning_rate": 1.3235419043496362e-06, |
|
"logits/chosen": 0.31210190057754517, |
|
"logits/rejected": 0.4039413034915924, |
|
"logps/chosen": -340.84112548828125, |
|
"logps/rejected": -317.56768798828125, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.6002423167228699, |
|
"rewards/margins": 0.638708770275116, |
|
"rewards/rejected": -1.2389512062072754, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 36.0, |
|
"learning_rate": 1.2654077162882271e-06, |
|
"logits/chosen": 0.23288901150226593, |
|
"logits/rejected": 0.18572565913200378, |
|
"logps/chosen": -346.56048583984375, |
|
"logps/rejected": -320.6227722167969, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.597626805305481, |
|
"rewards/margins": 0.7199206352233887, |
|
"rewards/rejected": -1.3175475597381592, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 34.0, |
|
"learning_rate": 1.2083936300922238e-06, |
|
"logits/chosen": 0.34366169571876526, |
|
"logits/rejected": 0.3978513181209564, |
|
"logps/chosen": -362.42364501953125, |
|
"logps/rejected": -334.7811279296875, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.5686925649642944, |
|
"rewards/margins": 0.6736392974853516, |
|
"rewards/rejected": -1.2423319816589355, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 29.875, |
|
"learning_rate": 1.1525167457766856e-06, |
|
"logits/chosen": 0.2138892114162445, |
|
"logits/rejected": 0.18896692991256714, |
|
"logps/chosen": -337.1097412109375, |
|
"logps/rejected": -315.19427490234375, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.5940120816230774, |
|
"rewards/margins": 0.6726430654525757, |
|
"rewards/rejected": -1.2666552066802979, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 41.5, |
|
"learning_rate": 1.0977938222801004e-06, |
|
"logits/chosen": 0.2619992792606354, |
|
"logits/rejected": 0.3644074499607086, |
|
"logps/chosen": -340.20709228515625, |
|
"logps/rejected": -314.8345031738281, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6193765997886658, |
|
"rewards/margins": 0.6324874758720398, |
|
"rewards/rejected": -1.2518641948699951, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 38.75, |
|
"learning_rate": 1.0442412724379365e-06, |
|
"logits/chosen": 0.19523002207279205, |
|
"logits/rejected": 0.19391083717346191, |
|
"logps/chosen": -346.6025390625, |
|
"logps/rejected": -295.1070861816406, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6979666948318481, |
|
"rewards/margins": 0.6040533781051636, |
|
"rewards/rejected": -1.3020200729370117, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 35.25, |
|
"learning_rate": 9.9187515806e-07, |
|
"logits/chosen": 0.3515494167804718, |
|
"logits/rejected": 0.3660346567630768, |
|
"logps/chosen": -368.04571533203125, |
|
"logps/rejected": -319.6786193847656, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5553494095802307, |
|
"rewards/margins": 0.6650460958480835, |
|
"rewards/rejected": -1.220395565032959, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 32.5, |
|
"learning_rate": 9.407111851130879e-07, |
|
"logits/chosen": 0.3301383852958679, |
|
"logits/rejected": 0.277874231338501, |
|
"logps/chosen": -338.82794189453125, |
|
"logps/rejected": -320.33892822265625, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5561447143554688, |
|
"rewards/margins": 0.7759675979614258, |
|
"rewards/rejected": -1.3321123123168945, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 36.5, |
|
"learning_rate": 8.907646990103496e-07, |
|
"logits/chosen": 0.2506190538406372, |
|
"logits/rejected": 0.3602852523326874, |
|
"logps/chosen": -330.9645080566406, |
|
"logps/rejected": -305.1396179199219, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6034108400344849, |
|
"rewards/margins": 0.7130244970321655, |
|
"rewards/rejected": -1.3164353370666504, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 31.0, |
|
"learning_rate": 8.42050680008798e-07, |
|
"logits/chosen": 0.11515147984027863, |
|
"logits/rejected": 0.17567628622055054, |
|
"logps/chosen": -345.45513916015625, |
|
"logps/rejected": -329.71380615234375, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6171659231185913, |
|
"rewards/margins": 0.6560701131820679, |
|
"rewards/rejected": -1.2732360363006592, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 40.25, |
|
"learning_rate": 7.945837387163424e-07, |
|
"logits/chosen": 0.32047078013420105, |
|
"logits/rejected": 0.30428266525268555, |
|
"logps/chosen": -354.98297119140625, |
|
"logps/rejected": -323.7526550292969, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.6056606769561768, |
|
"rewards/margins": 0.6512014865875244, |
|
"rewards/rejected": -1.2568622827529907, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 46.5, |
|
"learning_rate": 7.483781117096828e-07, |
|
"logits/chosen": 0.3148021996021271, |
|
"logits/rejected": 0.3127804100513458, |
|
"logps/chosen": -372.3219299316406, |
|
"logps/rejected": -344.1582946777344, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6119446754455566, |
|
"rewards/margins": 0.7214257121086121, |
|
"rewards/rejected": -1.3333704471588135, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 46.25, |
|
"learning_rate": 7.034476572643855e-07, |
|
"logits/chosen": 0.30679917335510254, |
|
"logits/rejected": 0.35226863622665405, |
|
"logps/chosen": -350.220703125, |
|
"logps/rejected": -324.85552978515625, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6405495405197144, |
|
"rewards/margins": 0.6569965481758118, |
|
"rewards/rejected": -1.297546148300171, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 39.25, |
|
"learning_rate": 6.598058511984307e-07, |
|
"logits/chosen": 0.30778008699417114, |
|
"logits/rejected": 0.32357412576675415, |
|
"logps/chosen": -336.26043701171875, |
|
"logps/rejected": -302.003662109375, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7136401534080505, |
|
"rewards/margins": 0.7009720802307129, |
|
"rewards/rejected": -1.4146122932434082, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 31.375, |
|
"learning_rate": 6.174657828304543e-07, |
|
"logits/chosen": 0.2360578030347824, |
|
"logits/rejected": 0.269861102104187, |
|
"logps/chosen": -333.8213806152344, |
|
"logps/rejected": -317.9136962890625, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.653602659702301, |
|
"rewards/margins": 0.5380398631095886, |
|
"rewards/rejected": -1.1916425228118896, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 31.0, |
|
"learning_rate": 5.764401510539253e-07, |
|
"logits/chosen": 0.3052513599395752, |
|
"logits/rejected": 0.2237941026687622, |
|
"logps/chosen": -359.7833557128906, |
|
"logps/rejected": -303.59979248046875, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5926748514175415, |
|
"rewards/margins": 0.6909803748130798, |
|
"rewards/rejected": -1.2836551666259766, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 29.375, |
|
"learning_rate": 5.36741260528415e-07, |
|
"logits/chosen": 0.13227298855781555, |
|
"logits/rejected": 0.2516798973083496, |
|
"logps/chosen": -374.15216064453125, |
|
"logps/rejected": -356.4462585449219, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.5345168113708496, |
|
"rewards/margins": 0.8594783544540405, |
|
"rewards/rejected": -1.3939951658248901, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 33.25, |
|
"learning_rate": 4.98381017989103e-07, |
|
"logits/chosen": 0.12983646988868713, |
|
"logits/rejected": 0.21437188982963562, |
|
"logps/chosen": -347.4361572265625, |
|
"logps/rejected": -309.8014221191406, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.5407166481018066, |
|
"rewards/margins": 0.7795384526252747, |
|
"rewards/rejected": -1.3202550411224365, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 34.0, |
|
"learning_rate": 4.6137092867564127e-07, |
|
"logits/chosen": 0.2845227122306824, |
|
"logits/rejected": 0.30273061990737915, |
|
"logps/chosen": -319.15130615234375, |
|
"logps/rejected": -301.53167724609375, |
|
"loss": 0.5385, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.5724095106124878, |
|
"rewards/margins": 0.7000355124473572, |
|
"rewards/rejected": -1.2724450826644897, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 31.875, |
|
"learning_rate": 4.2572209288143095e-07, |
|
"logits/chosen": 0.24127666652202606, |
|
"logits/rejected": 0.24279674887657166, |
|
"logps/chosen": -349.64788818359375, |
|
"logps/rejected": -320.1210021972656, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6160230040550232, |
|
"rewards/margins": 0.5984134674072266, |
|
"rewards/rejected": -1.2144365310668945, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 27.75, |
|
"learning_rate": 3.9144520262435094e-07, |
|
"logits/chosen": 0.2617672085762024, |
|
"logits/rejected": 0.2997414469718933, |
|
"logps/chosen": -375.3115234375, |
|
"logps/rejected": -318.6556701660156, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.47033149003982544, |
|
"rewards/margins": 0.8409943580627441, |
|
"rewards/rejected": -1.3113257884979248, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 43.0, |
|
"learning_rate": 3.5855053843994625e-07, |
|
"logits/chosen": 0.21160957217216492, |
|
"logits/rejected": 0.2680891454219818, |
|
"logps/chosen": -332.6085205078125, |
|
"logps/rejected": -345.51708984375, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6225179433822632, |
|
"rewards/margins": 0.6167303323745728, |
|
"rewards/rejected": -1.239248275756836, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 38.5, |
|
"learning_rate": 3.270479662980247e-07, |
|
"logits/chosen": 0.37835609912872314, |
|
"logits/rejected": 0.3830730617046356, |
|
"logps/chosen": -342.5943908691406, |
|
"logps/rejected": -330.6464538574219, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6359362602233887, |
|
"rewards/margins": 0.7126865386962891, |
|
"rewards/rejected": -1.3486229181289673, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 40.0, |
|
"learning_rate": 2.9694693464359434e-07, |
|
"logits/chosen": 0.2732701599597931, |
|
"logits/rejected": 0.22347459197044373, |
|
"logps/chosen": -359.74591064453125, |
|
"logps/rejected": -350.09124755859375, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5950442552566528, |
|
"rewards/margins": 0.7091079950332642, |
|
"rewards/rejected": -1.304152250289917, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 35.75, |
|
"learning_rate": 2.682564715630287e-07, |
|
"logits/chosen": 0.23081643879413605, |
|
"logits/rejected": 0.25710368156433105, |
|
"logps/chosen": -352.5965881347656, |
|
"logps/rejected": -327.4501647949219, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.5555008053779602, |
|
"rewards/margins": 0.8596852421760559, |
|
"rewards/rejected": -1.4151861667633057, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 47.75, |
|
"learning_rate": 2.4098518207630706e-07, |
|
"logits/chosen": 0.2609720826148987, |
|
"logits/rejected": 0.33673757314682007, |
|
"logps/chosen": -347.4607238769531, |
|
"logps/rejected": -302.12030029296875, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.6414428949356079, |
|
"rewards/margins": 0.6207069158554077, |
|
"rewards/rejected": -1.2621498107910156, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 44.5, |
|
"learning_rate": 2.1514124555614412e-07, |
|
"logits/chosen": 0.12182704359292984, |
|
"logits/rejected": 0.2441065013408661, |
|
"logps/chosen": -375.3338623046875, |
|
"logps/rejected": -339.09783935546875, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.6452317833900452, |
|
"rewards/margins": 0.691677451133728, |
|
"rewards/rejected": -1.336909294128418, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 35.0, |
|
"learning_rate": 1.9073241327478287e-07, |
|
"logits/chosen": 0.15027470886707306, |
|
"logits/rejected": 0.1401127278804779, |
|
"logps/chosen": -337.196044921875, |
|
"logps/rejected": -298.7091979980469, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5936441421508789, |
|
"rewards/margins": 0.6106031537055969, |
|
"rewards/rejected": -1.204247236251831, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 39.0, |
|
"learning_rate": 1.677660060791836e-07, |
|
"logits/chosen": 0.28163331747055054, |
|
"logits/rejected": 0.28244131803512573, |
|
"logps/chosen": -352.9237365722656, |
|
"logps/rejected": -314.7523193359375, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5691486597061157, |
|
"rewards/margins": 0.7542569041252136, |
|
"rewards/rejected": -1.3234055042266846, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 35.75, |
|
"learning_rate": 1.4624891219531256e-07, |
|
"logits/chosen": 0.22607457637786865, |
|
"logits/rejected": 0.24305304884910583, |
|
"logps/chosen": -352.2106018066406, |
|
"logps/rejected": -320.9057312011719, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.5814008116722107, |
|
"rewards/margins": 0.6882680058479309, |
|
"rewards/rejected": -1.2696688175201416, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 31.625, |
|
"learning_rate": 1.2618758516218187e-07, |
|
"logits/chosen": 0.2960145175457001, |
|
"logits/rejected": 0.2859836220741272, |
|
"logps/chosen": -311.757080078125, |
|
"logps/rejected": -291.49859619140625, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.5901347398757935, |
|
"rewards/margins": 0.6353054046630859, |
|
"rewards/rejected": -1.225440263748169, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 41.5, |
|
"learning_rate": 1.0758804189626492e-07, |
|
"logits/chosen": 0.2627493441104889, |
|
"logits/rejected": 0.3188014328479767, |
|
"logps/chosen": -339.60894775390625, |
|
"logps/rejected": -312.9742736816406, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6211244463920593, |
|
"rewards/margins": 0.610070526599884, |
|
"rewards/rejected": -1.2311948537826538, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 44.25, |
|
"learning_rate": 9.045586088686497e-08, |
|
"logits/chosen": 0.24961581826210022, |
|
"logits/rejected": 0.1900576502084732, |
|
"logps/chosen": -364.18084716796875, |
|
"logps/rejected": -320.77935791015625, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.5827781558036804, |
|
"rewards/margins": 0.7221345901489258, |
|
"rewards/rejected": -1.304912805557251, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 47.25, |
|
"learning_rate": 7.479618052298132e-08, |
|
"logits/chosen": 0.3448382019996643, |
|
"logits/rejected": 0.25041282176971436, |
|
"logps/chosen": -367.3106689453125, |
|
"logps/rejected": -348.275634765625, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.6221089363098145, |
|
"rewards/margins": 0.6926394104957581, |
|
"rewards/rejected": -1.3147482872009277, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 34.25, |
|
"learning_rate": 6.06136975521715e-08, |
|
"logits/chosen": 0.12194709479808807, |
|
"logits/rejected": 0.2333538830280304, |
|
"logps/chosen": -364.0761413574219, |
|
"logps/rejected": -331.1363525390625, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6387330293655396, |
|
"rewards/margins": 0.7657533288002014, |
|
"rewards/rejected": -1.4044864177703857, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 34.25, |
|
"learning_rate": 4.7912665671874246e-08, |
|
"logits/chosen": 0.1737120896577835, |
|
"logits/rejected": 0.22887209057807922, |
|
"logps/chosen": -345.92010498046875, |
|
"logps/rejected": -320.91265869140625, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.6163133978843689, |
|
"rewards/margins": 0.6825416088104248, |
|
"rewards/rejected": -1.2988550662994385, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 40.0, |
|
"learning_rate": 3.669689425361444e-08, |
|
"logits/chosen": 0.23761753737926483, |
|
"logits/rejected": 0.2581033408641815, |
|
"logps/chosen": -318.6600341796875, |
|
"logps/rejected": -311.02447509765625, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.545235276222229, |
|
"rewards/margins": 0.6656547784805298, |
|
"rewards/rejected": -1.2108900547027588, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 47.25, |
|
"learning_rate": 2.6969747200472073e-08, |
|
"logits/chosen": 0.2629963159561157, |
|
"logits/rejected": 0.41755908727645874, |
|
"logps/chosen": -329.5137634277344, |
|
"logps/rejected": -319.69329833984375, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.6437569856643677, |
|
"rewards/margins": 0.6761563420295715, |
|
"rewards/rejected": -1.3199132680892944, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 41.0, |
|
"learning_rate": 1.873414193816092e-08, |
|
"logits/chosen": 0.28942468762397766, |
|
"logits/rejected": 0.29614606499671936, |
|
"logps/chosen": -374.42071533203125, |
|
"logps/rejected": -350.49163818359375, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.6072831749916077, |
|
"rewards/margins": 0.7554194331169128, |
|
"rewards/rejected": -1.3627026081085205, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 52.5, |
|
"learning_rate": 1.1992548540016858e-08, |
|
"logits/chosen": 0.20397917926311493, |
|
"logits/rejected": 0.19432392716407776, |
|
"logps/chosen": -374.0498962402344, |
|
"logps/rejected": -343.69732666015625, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6645746827125549, |
|
"rewards/margins": 0.6493507027626038, |
|
"rewards/rejected": -1.3139253854751587, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 40.0, |
|
"learning_rate": 6.746988986156e-09, |
|
"logits/chosen": 0.26821833848953247, |
|
"logits/rejected": 0.34926992654800415, |
|
"logps/chosen": -333.3935546875, |
|
"logps/rejected": -306.5715637207031, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.6133612394332886, |
|
"rewards/margins": 0.7218830585479736, |
|
"rewards/rejected": -1.3352441787719727, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 39.5, |
|
"learning_rate": 2.9990365570314874e-09, |
|
"logits/chosen": 0.2864960730075836, |
|
"logits/rejected": 0.2247813194990158, |
|
"logps/chosen": -365.5644226074219, |
|
"logps/rejected": -339.42352294921875, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.5904373526573181, |
|
"rewards/margins": 0.726112961769104, |
|
"rewards/rejected": -1.3165501356124878, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 40.75, |
|
"learning_rate": 7.498153615653758e-10, |
|
"logits/chosen": 0.1960332989692688, |
|
"logits/rejected": 0.2011827528476715, |
|
"logps/chosen": -312.5413818359375, |
|
"logps/rejected": -306.91925048828125, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5954924821853638, |
|
"rewards/margins": 0.6465845704078674, |
|
"rewards/rejected": -1.242077112197876, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 38.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.20826788246631622, |
|
"logits/rejected": 0.1714758276939392, |
|
"logps/chosen": -357.2660217285156, |
|
"logps/rejected": -325.0472412109375, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.650245726108551, |
|
"rewards/margins": 0.598173975944519, |
|
"rewards/rejected": -1.2484197616577148, |
|
"step": 1910 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|