{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999649982499125, "eval_steps": 500, "global_step": 357, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0280014000700035, "grad_norm": 61.10678368436559, "learning_rate": 1.3888888888888888e-07, "logits/chosen": -0.6513304710388184, "logits/rejected": -0.6610185503959656, "logps/chosen": -396.359619140625, "logps/rejected": -397.0393371582031, "loss": 0.8679, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": 0.006401772145181894, "rewards/margins": -0.00633437093347311, "rewards/rejected": 0.012736144475638866, "step": 10 }, { "epoch": 0.056002800140007, "grad_norm": 58.411802607555295, "learning_rate": 2.7777777777777776e-07, "logits/chosen": -0.6498872637748718, "logits/rejected": -0.6464060544967651, "logps/chosen": -357.53594970703125, "logps/rejected": -362.6917419433594, "loss": 0.8677, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.01474761962890625, "rewards/margins": -0.0023844907991588116, "rewards/rejected": -0.012363128364086151, "step": 20 }, { "epoch": 0.0840042002100105, "grad_norm": 58.885273724206364, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -0.6749114394187927, "logits/rejected": -0.6687039136886597, "logps/chosen": -364.6935729980469, "logps/rejected": -363.3437805175781, "loss": 0.8652, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.002331914845854044, "rewards/margins": 0.0017761134076863527, "rewards/rejected": -0.004108029417693615, "step": 30 }, { "epoch": 0.112005600280014, "grad_norm": 58.81350398572691, "learning_rate": 4.998084579146532e-07, "logits/chosen": -0.6095571517944336, "logits/rejected": -0.6133966445922852, "logps/chosen": -391.51312255859375, "logps/rejected": -383.5360412597656, "loss": 0.857, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.04013464227318764, "rewards/margins": 0.03998289257287979, "rewards/rejected": 0.00015174821601249278, "step": 40 }, { "epoch": 0.1400070003500175, "grad_norm": 53.82936706874698, "learning_rate": 4.976569787782584e-07, "logits/chosen": -0.6407713294029236, "logits/rejected": -0.6338817477226257, "logps/chosen": -394.0837707519531, "logps/rejected": -388.13946533203125, "loss": 0.8571, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.0042350986041128635, "rewards/margins": 0.02888796292245388, "rewards/rejected": -0.03312305733561516, "step": 50 }, { "epoch": 0.168008400420021, "grad_norm": 94.59632664663164, "learning_rate": 4.931352528237397e-07, "logits/chosen": -0.6345051527023315, "logits/rejected": -0.6252551674842834, "logps/chosen": -362.01007080078125, "logps/rejected": -361.896484375, "loss": 0.8467, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.026126855984330177, "rewards/margins": 0.04506516456604004, "rewards/rejected": -0.07119203358888626, "step": 60 }, { "epoch": 0.1960098004900245, "grad_norm": 53.596742254187554, "learning_rate": 4.862865560637862e-07, "logits/chosen": -0.6581880450248718, "logits/rejected": -0.6622239351272583, "logps/chosen": -360.1644287109375, "logps/rejected": -370.22357177734375, "loss": 0.8469, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03359580785036087, "rewards/margins": 0.0706966444849968, "rewards/rejected": -0.10429245233535767, "step": 70 }, { "epoch": 0.224011200560028, "grad_norm": 58.433831169578426, "learning_rate": 4.771764352146005e-07, "logits/chosen": -0.6605185270309448, "logits/rejected": -0.6589399576187134, "logps/chosen": -385.06634521484375, "logps/rejected": -387.2419738769531, "loss": 0.8296, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0589970238506794, "rewards/margins": 0.06647703796625137, "rewards/rejected": -0.12547405064105988, "step": 80 }, { "epoch": 0.2520126006300315, "grad_norm": 57.687812779743524, "learning_rate": 4.658920803689553e-07, "logits/chosen": -0.6234251260757446, "logits/rejected": -0.63193279504776, "logps/chosen": -380.99102783203125, "logps/rejected": -384.75, "loss": 0.8274, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09747395664453506, "rewards/margins": 0.13489681482315063, "rewards/rejected": -0.2323707789182663, "step": 90 }, { "epoch": 0.280014000700035, "grad_norm": 51.987093732637376, "learning_rate": 4.5254149052732074e-07, "logits/chosen": -0.5935919880867004, "logits/rejected": -0.5995978116989136, "logps/chosen": -365.17034912109375, "logps/rejected": -366.9688720703125, "loss": 0.823, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1324506253004074, "rewards/margins": 0.11821047961711884, "rewards/rejected": -0.25066110491752625, "step": 100 }, { "epoch": 0.3080154007700385, "grad_norm": 56.36533929974317, "learning_rate": 4.372524399734997e-07, "logits/chosen": -0.6224404573440552, "logits/rejected": -0.6308005452156067, "logps/chosen": -366.1192626953125, "logps/rejected": -365.95452880859375, "loss": 0.8183, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.23297986388206482, "rewards/margins": 0.2393256425857544, "rewards/rejected": -0.4723054766654968, "step": 110 }, { "epoch": 0.336016800840042, "grad_norm": 58.24416326380675, "learning_rate": 4.201712553872657e-07, "logits/chosen": -0.6303149461746216, "logits/rejected": -0.6240934729576111, "logps/chosen": -400.3144226074219, "logps/rejected": -398.35565185546875, "loss": 0.8163, "rewards/accuracies": 0.59375, "rewards/chosen": -0.15998268127441406, "rewards/margins": 0.1608564555644989, "rewards/rejected": -0.3208391070365906, "step": 120 }, { "epoch": 0.3640182009100455, "grad_norm": 55.406602765547156, "learning_rate": 4.014614153978704e-07, "logits/chosen": -0.664055585861206, "logits/rejected": -0.6637083888053894, "logps/chosen": -348.1646423339844, "logps/rejected": -348.63470458984375, "loss": 0.8041, "rewards/accuracies": 0.625, "rewards/chosen": -0.1901615858078003, "rewards/margins": 0.11691661179065704, "rewards/rejected": -0.30707818269729614, "step": 130 }, { "epoch": 0.392019600980049, "grad_norm": 68.77886528211879, "learning_rate": 3.8130198598165444e-07, "logits/chosen": -0.6314767599105835, "logits/rejected": -0.6304478645324707, "logps/chosen": -422.37127685546875, "logps/rejected": -416.79571533203125, "loss": 0.8088, "rewards/accuracies": 0.625, "rewards/chosen": -0.2302822768688202, "rewards/margins": 0.11769070476293564, "rewards/rejected": -0.3479730188846588, "step": 140 }, { "epoch": 0.4200210010500525, "grad_norm": 59.72315250139388, "learning_rate": 3.598859066780754e-07, "logits/chosen": -0.6638253331184387, "logits/rejected": -0.6717976331710815, "logps/chosen": -417.35394287109375, "logps/rejected": -419.6104431152344, "loss": 0.7981, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1914275586605072, "rewards/margins": 0.2206917554140091, "rewards/rejected": -0.4121193289756775, "step": 150 }, { "epoch": 0.448022401120056, "grad_norm": 55.46653773859189, "learning_rate": 3.374181440262409e-07, "logits/chosen": -0.660588264465332, "logits/rejected": -0.6529449224472046, "logps/chosen": -362.0516052246094, "logps/rejected": -363.2063293457031, "loss": 0.7991, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22989945113658905, "rewards/margins": 0.2941688001155853, "rewards/rejected": -0.5240682363510132, "step": 160 }, { "epoch": 0.4760238011900595, "grad_norm": 52.29591301111268, "learning_rate": 3.14113729894821e-07, "logits/chosen": -0.6663147807121277, "logits/rejected": -0.6645540595054626, "logps/chosen": -349.26556396484375, "logps/rejected": -350.56536865234375, "loss": 0.8001, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.28285256028175354, "rewards/margins": 0.2937370836734772, "rewards/rejected": -0.5765896439552307, "step": 170 }, { "epoch": 0.504025201260063, "grad_norm": 52.652777282426925, "learning_rate": 2.9019570347986706e-07, "logits/chosen": -0.6935344338417053, "logits/rejected": -0.6966893076896667, "logps/chosen": -408.3907165527344, "logps/rejected": -406.89556884765625, "loss": 0.7937, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14246144890785217, "rewards/margins": 0.26209157705307007, "rewards/rejected": -0.40455299615859985, "step": 180 }, { "epoch": 0.5320266013300665, "grad_norm": 53.707911382607165, "learning_rate": 2.6589297666702654e-07, "logits/chosen": -0.6629470586776733, "logits/rejected": -0.6508482694625854, "logps/chosen": -381.4786376953125, "logps/rejected": -381.14105224609375, "loss": 0.7828, "rewards/accuracies": 0.71875, "rewards/chosen": -0.22650186717510223, "rewards/margins": 0.49067601561546326, "rewards/rejected": -0.7171779274940491, "step": 190 }, { "epoch": 0.56002800140007, "grad_norm": 58.031636496032235, "learning_rate": 2.414381431880974e-07, "logits/chosen": -0.7053166627883911, "logits/rejected": -0.7067330479621887, "logps/chosen": -338.40826416015625, "logps/rejected": -340.7874755859375, "loss": 0.7841, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13104045391082764, "rewards/margins": 0.4193459451198578, "rewards/rejected": -0.550386369228363, "step": 200 }, { "epoch": 0.5880294014700735, "grad_norm": 52.285898129134694, "learning_rate": 2.1706525253979534e-07, "logits/chosen": -0.6645469665527344, "logits/rejected": -0.682064414024353, "logps/chosen": -353.6531677246094, "logps/rejected": -361.7914123535156, "loss": 0.7812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2817208766937256, "rewards/margins": 0.40158504247665405, "rewards/rejected": -0.6833059191703796, "step": 210 }, { "epoch": 0.616030801540077, "grad_norm": 51.20855890287249, "learning_rate": 1.9300756996985379e-07, "logits/chosen": -0.6860191822052002, "logits/rejected": -0.6938604116439819, "logps/chosen": -383.1531982421875, "logps/rejected": -388.2940979003906, "loss": 0.7743, "rewards/accuracies": 0.65625, "rewards/chosen": -0.1995268315076828, "rewards/margins": 0.29305171966552734, "rewards/rejected": -0.49257856607437134, "step": 220 }, { "epoch": 0.6440322016100805, "grad_norm": 59.00218646947897, "learning_rate": 1.6949534396892355e-07, "logits/chosen": -0.63894122838974, "logits/rejected": -0.6412523984909058, "logps/chosen": -371.1944580078125, "logps/rejected": -369.9986877441406, "loss": 0.7781, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2539726793766022, "rewards/margins": 0.2992478013038635, "rewards/rejected": -0.5532204508781433, "step": 230 }, { "epoch": 0.672033601680084, "grad_norm": 54.18024076081892, "learning_rate": 1.4675360263490295e-07, "logits/chosen": -0.6566568613052368, "logits/rejected": -0.6547525525093079, "logps/chosen": -374.5245056152344, "logps/rejected": -372.85205078125, "loss": 0.7789, "rewards/accuracies": 0.625, "rewards/chosen": -0.27056020498275757, "rewards/margins": 0.2600334584712982, "rewards/rejected": -0.5305936932563782, "step": 240 }, { "epoch": 0.7000350017500875, "grad_norm": 51.99890637389821, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.6490362882614136, "logits/rejected": -0.6515687108039856, "logps/chosen": -339.3143005371094, "logps/rejected": -345.4483642578125, "loss": 0.7694, "rewards/accuracies": 0.65625, "rewards/chosen": -0.21335434913635254, "rewards/margins": 0.34682440757751465, "rewards/rejected": -0.5601787567138672, "step": 250 }, { "epoch": 0.728036401820091, "grad_norm": 51.948937918535066, "learning_rate": 1.0444273293265149e-07, "logits/chosen": -0.6950569748878479, "logits/rejected": -0.6903547644615173, "logps/chosen": -366.1841735839844, "logps/rejected": -369.20501708984375, "loss": 0.7654, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.26671379804611206, "rewards/margins": 0.5104727149009705, "rewards/rejected": -0.7771865129470825, "step": 260 }, { "epoch": 0.7560378018900945, "grad_norm": 50.70399011598055, "learning_rate": 8.527854855097224e-08, "logits/chosen": -0.6942373514175415, "logits/rejected": -0.6850725412368774, "logps/chosen": -373.9687194824219, "logps/rejected": -375.5534973144531, "loss": 0.777, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2907332181930542, "rewards/margins": 0.28084948658943176, "rewards/rejected": -0.5715826749801636, "step": 270 }, { "epoch": 0.784039201960098, "grad_norm": 57.415336554214456, "learning_rate": 6.769086121815423e-08, "logits/chosen": -0.680923342704773, "logits/rejected": -0.6828472018241882, "logps/chosen": -355.0218505859375, "logps/rejected": -355.49542236328125, "loss": 0.7707, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30371373891830444, "rewards/margins": 0.31885650753974915, "rewards/rejected": -0.622570276260376, "step": 280 }, { "epoch": 0.8120406020301015, "grad_norm": 57.63174427513397, "learning_rate": 5.184799714145557e-08, "logits/chosen": -0.695022702217102, "logits/rejected": -0.6762406826019287, "logps/chosen": -365.63861083984375, "logps/rejected": -363.7517395019531, "loss": 0.7698, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2936111092567444, "rewards/margins": 0.30915942788124084, "rewards/rejected": -0.6027705073356628, "step": 290 }, { "epoch": 0.840042002100105, "grad_norm": 56.79055475776468, "learning_rate": 3.790158337517127e-08, "logits/chosen": -0.6702035069465637, "logits/rejected": -0.6661104559898376, "logps/chosen": -377.29071044921875, "logps/rejected": -382.6650695800781, "loss": 0.7656, "rewards/accuracies": 0.6875, "rewards/chosen": -0.349121630191803, "rewards/margins": 0.44733327627182007, "rewards/rejected": -0.7964549660682678, "step": 300 }, { "epoch": 0.8680434021701086, "grad_norm": 55.63568503616908, "learning_rate": 2.5985096645928934e-08, "logits/chosen": -0.6938387155532837, "logits/rejected": -0.6860832571983337, "logps/chosen": -408.07073974609375, "logps/rejected": -408.2176208496094, "loss": 0.7705, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.4276718199253082, "rewards/margins": 0.31639137864112854, "rewards/rejected": -0.7440632581710815, "step": 310 }, { "epoch": 0.896044802240112, "grad_norm": 51.80246792386731, "learning_rate": 1.6212585889044366e-08, "logits/chosen": -0.6435590386390686, "logits/rejected": -0.6448679566383362, "logps/chosen": -388.0117492675781, "logps/rejected": -395.35443115234375, "loss": 0.7644, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.27469509840011597, "rewards/margins": 0.4029006063938141, "rewards/rejected": -0.6775957345962524, "step": 320 }, { "epoch": 0.9240462023101155, "grad_norm": 54.61689196980515, "learning_rate": 8.677580722139671e-09, "logits/chosen": -0.6612351536750793, "logits/rejected": -0.6655117273330688, "logps/chosen": -411.57568359375, "logps/rejected": -413.49468994140625, "loss": 0.7677, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2688027024269104, "rewards/margins": 0.27715611457824707, "rewards/rejected": -0.5459588766098022, "step": 330 }, { "epoch": 0.952047602380119, "grad_norm": 47.07624026939366, "learning_rate": 3.452196302677901e-09, "logits/chosen": -0.6746488809585571, "logits/rejected": -0.6755790710449219, "logps/chosen": -368.6392517089844, "logps/rejected": -369.9941101074219, "loss": 0.7629, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.2345232516527176, "rewards/margins": 0.3673866391181946, "rewards/rejected": -0.601909875869751, "step": 340 }, { "epoch": 0.9800490024501225, "grad_norm": 51.32121740454206, "learning_rate": 5.864431365401879e-10, "logits/chosen": -0.6399149894714355, "logits/rejected": -0.6450085639953613, "logps/chosen": -406.9984436035156, "logps/rejected": -398.86395263671875, "loss": 0.7734, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.28699636459350586, "rewards/margins": 0.3435482978820801, "rewards/rejected": -0.6305446624755859, "step": 350 }, { "epoch": 0.999649982499125, "step": 357, "total_flos": 115812661985280.0, "train_loss": 0.8014469694356624, "train_runtime": 7763.3592, "train_samples_per_second": 5.887, "train_steps_per_second": 0.046 } ], "logging_steps": 10, "max_steps": 357, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 115812661985280.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }