|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999649982499125, |
|
"eval_steps": 500, |
|
"global_step": 357, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0280014000700035, |
|
"grad_norm": 61.10678368436559, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"logits/chosen": -0.6513304710388184, |
|
"logits/rejected": -0.6610185503959656, |
|
"logps/chosen": -396.359619140625, |
|
"logps/rejected": -397.0393371582031, |
|
"loss": 0.8679, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.006401772145181894, |
|
"rewards/margins": -0.00633437093347311, |
|
"rewards/rejected": 0.012736144475638866, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056002800140007, |
|
"grad_norm": 58.411802607555295, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -0.6498872637748718, |
|
"logits/rejected": -0.6464060544967651, |
|
"logps/chosen": -357.53594970703125, |
|
"logps/rejected": -362.6917419433594, |
|
"loss": 0.8677, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.01474761962890625, |
|
"rewards/margins": -0.0023844907991588116, |
|
"rewards/rejected": -0.012363128364086151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0840042002100105, |
|
"grad_norm": 58.885273724206364, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.6749114394187927, |
|
"logits/rejected": -0.6687039136886597, |
|
"logps/chosen": -364.6935729980469, |
|
"logps/rejected": -363.3437805175781, |
|
"loss": 0.8652, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.002331914845854044, |
|
"rewards/margins": 0.0017761134076863527, |
|
"rewards/rejected": -0.004108029417693615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.112005600280014, |
|
"grad_norm": 58.81350398572691, |
|
"learning_rate": 4.998084579146532e-07, |
|
"logits/chosen": -0.6095571517944336, |
|
"logits/rejected": -0.6133966445922852, |
|
"logps/chosen": -391.51312255859375, |
|
"logps/rejected": -383.5360412597656, |
|
"loss": 0.857, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.04013464227318764, |
|
"rewards/margins": 0.03998289257287979, |
|
"rewards/rejected": 0.00015174821601249278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1400070003500175, |
|
"grad_norm": 53.82936706874698, |
|
"learning_rate": 4.976569787782584e-07, |
|
"logits/chosen": -0.6407713294029236, |
|
"logits/rejected": -0.6338817477226257, |
|
"logps/chosen": -394.0837707519531, |
|
"logps/rejected": -388.13946533203125, |
|
"loss": 0.8571, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0042350986041128635, |
|
"rewards/margins": 0.02888796292245388, |
|
"rewards/rejected": -0.03312305733561516, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.168008400420021, |
|
"grad_norm": 94.59632664663164, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -0.6345051527023315, |
|
"logits/rejected": -0.6252551674842834, |
|
"logps/chosen": -362.01007080078125, |
|
"logps/rejected": -361.896484375, |
|
"loss": 0.8467, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.026126855984330177, |
|
"rewards/margins": 0.04506516456604004, |
|
"rewards/rejected": -0.07119203358888626, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1960098004900245, |
|
"grad_norm": 53.596742254187554, |
|
"learning_rate": 4.862865560637862e-07, |
|
"logits/chosen": -0.6581880450248718, |
|
"logits/rejected": -0.6622239351272583, |
|
"logps/chosen": -360.1644287109375, |
|
"logps/rejected": -370.22357177734375, |
|
"loss": 0.8469, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03359580785036087, |
|
"rewards/margins": 0.0706966444849968, |
|
"rewards/rejected": -0.10429245233535767, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.224011200560028, |
|
"grad_norm": 58.433831169578426, |
|
"learning_rate": 4.771764352146005e-07, |
|
"logits/chosen": -0.6605185270309448, |
|
"logits/rejected": -0.6589399576187134, |
|
"logps/chosen": -385.06634521484375, |
|
"logps/rejected": -387.2419738769531, |
|
"loss": 0.8296, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0589970238506794, |
|
"rewards/margins": 0.06647703796625137, |
|
"rewards/rejected": -0.12547405064105988, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2520126006300315, |
|
"grad_norm": 57.687812779743524, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -0.6234251260757446, |
|
"logits/rejected": -0.63193279504776, |
|
"logps/chosen": -380.99102783203125, |
|
"logps/rejected": -384.75, |
|
"loss": 0.8274, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09747395664453506, |
|
"rewards/margins": 0.13489681482315063, |
|
"rewards/rejected": -0.2323707789182663, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.280014000700035, |
|
"grad_norm": 51.987093732637376, |
|
"learning_rate": 4.5254149052732074e-07, |
|
"logits/chosen": -0.5935919880867004, |
|
"logits/rejected": -0.5995978116989136, |
|
"logps/chosen": -365.17034912109375, |
|
"logps/rejected": -366.9688720703125, |
|
"loss": 0.823, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1324506253004074, |
|
"rewards/margins": 0.11821047961711884, |
|
"rewards/rejected": -0.25066110491752625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3080154007700385, |
|
"grad_norm": 56.36533929974317, |
|
"learning_rate": 4.372524399734997e-07, |
|
"logits/chosen": -0.6224404573440552, |
|
"logits/rejected": -0.6308005452156067, |
|
"logps/chosen": -366.1192626953125, |
|
"logps/rejected": -365.95452880859375, |
|
"loss": 0.8183, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.23297986388206482, |
|
"rewards/margins": 0.2393256425857544, |
|
"rewards/rejected": -0.4723054766654968, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.336016800840042, |
|
"grad_norm": 58.24416326380675, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -0.6303149461746216, |
|
"logits/rejected": -0.6240934729576111, |
|
"logps/chosen": -400.3144226074219, |
|
"logps/rejected": -398.35565185546875, |
|
"loss": 0.8163, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.15998268127441406, |
|
"rewards/margins": 0.1608564555644989, |
|
"rewards/rejected": -0.3208391070365906, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3640182009100455, |
|
"grad_norm": 55.406602765547156, |
|
"learning_rate": 4.014614153978704e-07, |
|
"logits/chosen": -0.664055585861206, |
|
"logits/rejected": -0.6637083888053894, |
|
"logps/chosen": -348.1646423339844, |
|
"logps/rejected": -348.63470458984375, |
|
"loss": 0.8041, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1901615858078003, |
|
"rewards/margins": 0.11691661179065704, |
|
"rewards/rejected": -0.30707818269729614, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.392019600980049, |
|
"grad_norm": 68.77886528211879, |
|
"learning_rate": 3.8130198598165444e-07, |
|
"logits/chosen": -0.6314767599105835, |
|
"logits/rejected": -0.6304478645324707, |
|
"logps/chosen": -422.37127685546875, |
|
"logps/rejected": -416.79571533203125, |
|
"loss": 0.8088, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2302822768688202, |
|
"rewards/margins": 0.11769070476293564, |
|
"rewards/rejected": -0.3479730188846588, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4200210010500525, |
|
"grad_norm": 59.72315250139388, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -0.6638253331184387, |
|
"logits/rejected": -0.6717976331710815, |
|
"logps/chosen": -417.35394287109375, |
|
"logps/rejected": -419.6104431152344, |
|
"loss": 0.7981, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1914275586605072, |
|
"rewards/margins": 0.2206917554140091, |
|
"rewards/rejected": -0.4121193289756775, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.448022401120056, |
|
"grad_norm": 55.46653773859189, |
|
"learning_rate": 3.374181440262409e-07, |
|
"logits/chosen": -0.660588264465332, |
|
"logits/rejected": -0.6529449224472046, |
|
"logps/chosen": -362.0516052246094, |
|
"logps/rejected": -363.2063293457031, |
|
"loss": 0.7991, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.22989945113658905, |
|
"rewards/margins": 0.2941688001155853, |
|
"rewards/rejected": -0.5240682363510132, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4760238011900595, |
|
"grad_norm": 52.29591301111268, |
|
"learning_rate": 3.14113729894821e-07, |
|
"logits/chosen": -0.6663147807121277, |
|
"logits/rejected": -0.6645540595054626, |
|
"logps/chosen": -349.26556396484375, |
|
"logps/rejected": -350.56536865234375, |
|
"loss": 0.8001, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.28285256028175354, |
|
"rewards/margins": 0.2937370836734772, |
|
"rewards/rejected": -0.5765896439552307, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.504025201260063, |
|
"grad_norm": 52.652777282426925, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -0.6935344338417053, |
|
"logits/rejected": -0.6966893076896667, |
|
"logps/chosen": -408.3907165527344, |
|
"logps/rejected": -406.89556884765625, |
|
"loss": 0.7937, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.14246144890785217, |
|
"rewards/margins": 0.26209157705307007, |
|
"rewards/rejected": -0.40455299615859985, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5320266013300665, |
|
"grad_norm": 53.707911382607165, |
|
"learning_rate": 2.6589297666702654e-07, |
|
"logits/chosen": -0.6629470586776733, |
|
"logits/rejected": -0.6508482694625854, |
|
"logps/chosen": -381.4786376953125, |
|
"logps/rejected": -381.14105224609375, |
|
"loss": 0.7828, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.22650186717510223, |
|
"rewards/margins": 0.49067601561546326, |
|
"rewards/rejected": -0.7171779274940491, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.56002800140007, |
|
"grad_norm": 58.031636496032235, |
|
"learning_rate": 2.414381431880974e-07, |
|
"logits/chosen": -0.7053166627883911, |
|
"logits/rejected": -0.7067330479621887, |
|
"logps/chosen": -338.40826416015625, |
|
"logps/rejected": -340.7874755859375, |
|
"loss": 0.7841, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13104045391082764, |
|
"rewards/margins": 0.4193459451198578, |
|
"rewards/rejected": -0.550386369228363, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5880294014700735, |
|
"grad_norm": 52.285898129134694, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -0.6645469665527344, |
|
"logits/rejected": -0.682064414024353, |
|
"logps/chosen": -353.6531677246094, |
|
"logps/rejected": -361.7914123535156, |
|
"loss": 0.7812, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2817208766937256, |
|
"rewards/margins": 0.40158504247665405, |
|
"rewards/rejected": -0.6833059191703796, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.616030801540077, |
|
"grad_norm": 51.20855890287249, |
|
"learning_rate": 1.9300756996985379e-07, |
|
"logits/chosen": -0.6860191822052002, |
|
"logits/rejected": -0.6938604116439819, |
|
"logps/chosen": -383.1531982421875, |
|
"logps/rejected": -388.2940979003906, |
|
"loss": 0.7743, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1995268315076828, |
|
"rewards/margins": 0.29305171966552734, |
|
"rewards/rejected": -0.49257856607437134, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6440322016100805, |
|
"grad_norm": 59.00218646947897, |
|
"learning_rate": 1.6949534396892355e-07, |
|
"logits/chosen": -0.63894122838974, |
|
"logits/rejected": -0.6412523984909058, |
|
"logps/chosen": -371.1944580078125, |
|
"logps/rejected": -369.9986877441406, |
|
"loss": 0.7781, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2539726793766022, |
|
"rewards/margins": 0.2992478013038635, |
|
"rewards/rejected": -0.5532204508781433, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.672033601680084, |
|
"grad_norm": 54.18024076081892, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -0.6566568613052368, |
|
"logits/rejected": -0.6547525525093079, |
|
"logps/chosen": -374.5245056152344, |
|
"logps/rejected": -372.85205078125, |
|
"loss": 0.7789, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.27056020498275757, |
|
"rewards/margins": 0.2600334584712982, |
|
"rewards/rejected": -0.5305936932563782, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7000350017500875, |
|
"grad_norm": 51.99890637389821, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -0.6490362882614136, |
|
"logits/rejected": -0.6515687108039856, |
|
"logps/chosen": -339.3143005371094, |
|
"logps/rejected": -345.4483642578125, |
|
"loss": 0.7694, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.21335434913635254, |
|
"rewards/margins": 0.34682440757751465, |
|
"rewards/rejected": -0.5601787567138672, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.728036401820091, |
|
"grad_norm": 51.948937918535066, |
|
"learning_rate": 1.0444273293265149e-07, |
|
"logits/chosen": -0.6950569748878479, |
|
"logits/rejected": -0.6903547644615173, |
|
"logps/chosen": -366.1841735839844, |
|
"logps/rejected": -369.20501708984375, |
|
"loss": 0.7654, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.26671379804611206, |
|
"rewards/margins": 0.5104727149009705, |
|
"rewards/rejected": -0.7771865129470825, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7560378018900945, |
|
"grad_norm": 50.70399011598055, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -0.6942373514175415, |
|
"logits/rejected": -0.6850725412368774, |
|
"logps/chosen": -373.9687194824219, |
|
"logps/rejected": -375.5534973144531, |
|
"loss": 0.777, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2907332181930542, |
|
"rewards/margins": 0.28084948658943176, |
|
"rewards/rejected": -0.5715826749801636, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.784039201960098, |
|
"grad_norm": 57.415336554214456, |
|
"learning_rate": 6.769086121815423e-08, |
|
"logits/chosen": -0.680923342704773, |
|
"logits/rejected": -0.6828472018241882, |
|
"logps/chosen": -355.0218505859375, |
|
"logps/rejected": -355.49542236328125, |
|
"loss": 0.7707, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.30371373891830444, |
|
"rewards/margins": 0.31885650753974915, |
|
"rewards/rejected": -0.622570276260376, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8120406020301015, |
|
"grad_norm": 57.63174427513397, |
|
"learning_rate": 5.184799714145557e-08, |
|
"logits/chosen": -0.695022702217102, |
|
"logits/rejected": -0.6762406826019287, |
|
"logps/chosen": -365.63861083984375, |
|
"logps/rejected": -363.7517395019531, |
|
"loss": 0.7698, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2936111092567444, |
|
"rewards/margins": 0.30915942788124084, |
|
"rewards/rejected": -0.6027705073356628, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.840042002100105, |
|
"grad_norm": 56.79055475776468, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -0.6702035069465637, |
|
"logits/rejected": -0.6661104559898376, |
|
"logps/chosen": -377.29071044921875, |
|
"logps/rejected": -382.6650695800781, |
|
"loss": 0.7656, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.349121630191803, |
|
"rewards/margins": 0.44733327627182007, |
|
"rewards/rejected": -0.7964549660682678, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8680434021701086, |
|
"grad_norm": 55.63568503616908, |
|
"learning_rate": 2.5985096645928934e-08, |
|
"logits/chosen": -0.6938387155532837, |
|
"logits/rejected": -0.6860832571983337, |
|
"logps/chosen": -408.07073974609375, |
|
"logps/rejected": -408.2176208496094, |
|
"loss": 0.7705, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4276718199253082, |
|
"rewards/margins": 0.31639137864112854, |
|
"rewards/rejected": -0.7440632581710815, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.896044802240112, |
|
"grad_norm": 51.80246792386731, |
|
"learning_rate": 1.6212585889044366e-08, |
|
"logits/chosen": -0.6435590386390686, |
|
"logits/rejected": -0.6448679566383362, |
|
"logps/chosen": -388.0117492675781, |
|
"logps/rejected": -395.35443115234375, |
|
"loss": 0.7644, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.27469509840011597, |
|
"rewards/margins": 0.4029006063938141, |
|
"rewards/rejected": -0.6775957345962524, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9240462023101155, |
|
"grad_norm": 54.61689196980515, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -0.6612351536750793, |
|
"logits/rejected": -0.6655117273330688, |
|
"logps/chosen": -411.57568359375, |
|
"logps/rejected": -413.49468994140625, |
|
"loss": 0.7677, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2688027024269104, |
|
"rewards/margins": 0.27715611457824707, |
|
"rewards/rejected": -0.5459588766098022, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.952047602380119, |
|
"grad_norm": 47.07624026939366, |
|
"learning_rate": 3.452196302677901e-09, |
|
"logits/chosen": -0.6746488809585571, |
|
"logits/rejected": -0.6755790710449219, |
|
"logps/chosen": -368.6392517089844, |
|
"logps/rejected": -369.9941101074219, |
|
"loss": 0.7629, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2345232516527176, |
|
"rewards/margins": 0.3673866391181946, |
|
"rewards/rejected": -0.601909875869751, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9800490024501225, |
|
"grad_norm": 51.32121740454206, |
|
"learning_rate": 5.864431365401879e-10, |
|
"logits/chosen": -0.6399149894714355, |
|
"logits/rejected": -0.6450085639953613, |
|
"logps/chosen": -406.9984436035156, |
|
"logps/rejected": -398.86395263671875, |
|
"loss": 0.7734, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.28699636459350586, |
|
"rewards/margins": 0.3435482978820801, |
|
"rewards/rejected": -0.6305446624755859, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.999649982499125, |
|
"step": 357, |
|
"total_flos": 115812661985280.0, |
|
"train_loss": 0.8014469694356624, |
|
"train_runtime": 7763.3592, |
|
"train_samples_per_second": 5.887, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 357, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 115812661985280.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|