diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.0, + "epoch": 0.9994767137624281, "eval_steps": 100, - "global_step": 956, + "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -11,10 +11,10 @@ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, - "logits/chosen": -3.0465383529663086, - "logits/rejected": -2.7803585529327393, - "logps/chosen": -262.69439697265625, - "logps/rejected": -200.3923797607422, + "logits/chosen": -1.404180884361267, + "logits/rejected": -1.4915521144866943, + "logps/chosen": -253.50843811035156, + "logps/rejected": -228.21987915039062, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.0, @@ -22,1702 +22,1558 @@ "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1, - "use_label": 0.0 + "use_label": 6.0 }, { "epoch": 0.01, "learning_rate": 5.208333333333333e-08, - "logits/chosen": -2.765808582305908, - "logits/rejected": -2.763443946838379, - "logps/chosen": -299.00396728515625, - "logps/rejected": -250.18435668945312, - "loss": 0.6941, - "pred_label": 0.0, - "rewards/accuracies": 0.4444444477558136, - "rewards/chosen": 0.0011364308884367347, - "rewards/margins": 0.0017329632537439466, - "rewards/rejected": -0.0005965338204987347, + "logits/chosen": -1.3619701862335205, + "logits/rejected": -1.3933377265930176, + "logps/chosen": -394.7999267578125, + "logps/rejected": -299.5929870605469, + "loss": 0.6938, + "pred_label": 0.0, + "rewards/accuracies": 0.5694444179534912, + "rewards/chosen": 0.004884083289653063, + "rewards/margins": 0.009783001616597176, + "rewards/rejected": -0.0048989187926054, "step": 10, - "use_label": 0.0 + "use_label": 46.0 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, - "logits/chosen": -2.7105605602264404, - "logits/rejected": -2.742635726928711, - "logps/chosen": -221.0091552734375, - "logps/rejected": -200.49070739746094, - "loss": 0.6855, - "pred_label": 0.0, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": -0.006218413356691599, - "rewards/margins": 0.027181172743439674, - "rewards/rejected": -0.03339958190917969, + "logits/chosen": -1.3801854848861694, + "logits/rejected": -1.3241381645202637, + "logps/chosen": -280.59625244140625, + "logps/rejected": -281.3047790527344, + "loss": 0.6889, + "pred_label": 0.0, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.022532949224114418, + "rewards/margins": 0.00945592112839222, + "rewards/rejected": 0.013077028095722198, "step": 20, - "use_label": 0.0 + "use_label": 122.0 }, { "epoch": 0.03, "learning_rate": 1.5624999999999999e-07, - "logits/chosen": -2.7838003635406494, - "logits/rejected": -2.7471749782562256, - "logps/chosen": -286.62408447265625, - "logps/rejected": -256.59033203125, - "loss": 0.6656, + "logits/chosen": -1.4004117250442505, + "logits/rejected": -1.3532276153564453, + "logps/chosen": -354.6773376464844, + "logps/rejected": -297.12548828125, + "loss": 0.6742, "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.032594505697488785, - "rewards/margins": 0.0683099776506424, - "rewards/rejected": -0.03571547567844391, + "rewards/accuracies": 0.6875, + "rewards/chosen": 0.09127137809991837, + "rewards/margins": 0.05737986043095589, + "rewards/rejected": 0.03389151394367218, "step": 30, - "use_label": 0.0 + "use_label": 202.0 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, - "logits/chosen": -2.6903152465820312, - "logits/rejected": -2.681668996810913, - "logps/chosen": -263.98040771484375, - "logps/rejected": -253.0127716064453, - "loss": 0.6449, - "pred_label": 0.0, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": 0.07213224470615387, - "rewards/margins": 0.12824633717536926, - "rewards/rejected": -0.05611409991979599, + "logits/chosen": -1.375112771987915, + "logits/rejected": -1.3534674644470215, + "logps/chosen": -342.58099365234375, + "logps/rejected": -301.4981384277344, + "loss": 0.6653, + "pred_label": 0.0, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.1729733645915985, + "rewards/margins": 0.04751574248075485, + "rewards/rejected": 0.12545761466026306, "step": 40, - "use_label": 0.0 + "use_label": 282.0 }, { "epoch": 0.05, "learning_rate": 2.604166666666667e-07, - "logits/chosen": -2.7848479747772217, - "logits/rejected": -2.7641561031341553, - "logps/chosen": -302.9726257324219, - "logps/rejected": -290.980712890625, - "loss": 0.63, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.0347287543118, - "rewards/margins": 0.08969736844301224, - "rewards/rejected": -0.12442612648010254, + "logits/chosen": -1.3950073719024658, + "logits/rejected": -1.3905388116836548, + "logps/chosen": -305.8765563964844, + "logps/rejected": -294.03155517578125, + "loss": 0.6438, + "pred_label": 0.0, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.2843405604362488, + "rewards/margins": 0.14093510806560516, + "rewards/rejected": 0.14340545237064362, "step": 50, - "use_label": 0.0 + "use_label": 362.0 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, - "logits/chosen": -2.8073248863220215, - "logits/rejected": -2.7780768871307373, - "logps/chosen": -274.9972229003906, - "logps/rejected": -271.43719482421875, - "loss": 0.6385, + "logits/chosen": -1.3388121128082275, + "logits/rejected": -1.417626142501831, + "logps/chosen": -326.7298583984375, + "logps/rejected": -311.16656494140625, + "loss": 0.6341, "pred_label": 0.0, - "rewards/accuracies": 0.625, - "rewards/chosen": 0.0038132492918521166, - "rewards/margins": 0.19520750641822815, - "rewards/rejected": -0.19139425456523895, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.3555651009082794, + "rewards/margins": 0.16317971050739288, + "rewards/rejected": 0.19238540530204773, "step": 60, - "use_label": 0.0 + "use_label": 442.0 }, { "epoch": 0.07, "learning_rate": 3.645833333333333e-07, - "logits/chosen": -2.8025059700012207, - "logits/rejected": -2.8569769859313965, - "logps/chosen": -308.70867919921875, - "logps/rejected": -294.216796875, - "loss": 0.5652, - "pred_label": 0.0, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.10104294866323471, - "rewards/margins": 0.25741106271743774, - "rewards/rejected": -0.15636806190013885, + "logits/chosen": -1.442932367324829, + "logits/rejected": -1.4548231363296509, + "logps/chosen": -356.62017822265625, + "logps/rejected": -318.04931640625, + "loss": 0.6155, + "pred_label": 1.7999999523162842, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.5572141408920288, + "rewards/margins": 0.2931746542453766, + "rewards/rejected": 0.26403939723968506, "step": 70, - "use_label": 0.0 + "use_label": 520.2000122070312 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, - "logits/chosen": -2.659728527069092, - "logits/rejected": -2.5561347007751465, - "logps/chosen": -282.79150390625, - "logps/rejected": -264.9902648925781, - "loss": 0.5422, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.1746046096086502, - "rewards/margins": 0.6943714022636414, - "rewards/rejected": -0.5197668075561523, + "logits/chosen": -1.2479106187820435, + "logits/rejected": -1.2282390594482422, + "logps/chosen": -359.615478515625, + "logps/rejected": -299.5457458496094, + "loss": 0.579, + "pred_label": 6.349999904632568, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.5866491198539734, + "rewards/margins": 0.5094068050384521, + "rewards/rejected": 0.07724229991436005, "step": 80, - "use_label": 0.0 + "use_label": 595.6500244140625 }, { "epoch": 0.09, "learning_rate": 4.6874999999999996e-07, - "logits/chosen": -2.746777057647705, - "logits/rejected": -2.6942930221557617, - "logps/chosen": -290.62408447265625, - "logps/rejected": -270.763427734375, - "loss": 0.5694, - "pred_label": 0.0, + "logits/chosen": -1.2900737524032593, + "logits/rejected": -1.325388789176941, + "logps/chosen": -360.9581298828125, + "logps/rejected": -319.90625, + "loss": 0.5944, + "pred_label": 13.550000190734863, "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.2031947821378708, - "rewards/margins": 0.5417487025260925, - "rewards/rejected": -0.3385539650917053, + "rewards/chosen": 0.7376635074615479, + "rewards/margins": 0.3891093134880066, + "rewards/rejected": 0.34855419397354126, "step": 90, - "use_label": 0.0 + "use_label": 668.4500122070312 }, { "epoch": 0.1, - "learning_rate": 4.976744186046512e-07, - "logits/chosen": -2.6346707344055176, - "logits/rejected": -2.666553258895874, - "logps/chosen": -344.0323486328125, - "logps/rejected": -350.5570373535156, - "loss": 0.6368, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.40496858954429626, - "rewards/margins": 0.16033101081848145, - "rewards/rejected": -0.5652996301651001, + "learning_rate": 4.976717112922002e-07, + "logits/chosen": -1.3841612339019775, + "logits/rejected": -1.4763787984848022, + "logps/chosen": -366.4154052734375, + "logps/rejected": -358.1784362792969, + "loss": 0.5918, + "pred_label": 19.350000381469727, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.8709436655044556, + "rewards/margins": 0.47374382615089417, + "rewards/rejected": 0.39719995856285095, "step": 100, - "use_label": 0.0 - }, - { - "epoch": 0.1, - "eval_logits/chosen": -2.824883460998535, - "eval_logits/rejected": -2.8082714080810547, - "eval_logps/chosen": -286.7236022949219, - "eval_logps/rejected": -265.6400451660156, - "eval_loss": 0.6040099859237671, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.6746031641960144, - "eval_rewards/chosen": 0.17999570071697235, - "eval_rewards/margins": 0.5800058245658875, - "eval_rewards/rejected": -0.4000100791454315, - "eval_runtime": 152.1323, - "eval_samples_per_second": 13.146, - "eval_steps_per_second": 0.414, - "eval_use_label": 0.0, - "step": 100 + "use_label": 742.6500244140625 }, { "epoch": 0.12, - "learning_rate": 4.91860465116279e-07, - "logits/chosen": -2.742032527923584, - "logits/rejected": -2.74245023727417, - "logps/chosen": -242.73056030273438, - "logps/rejected": -212.1634063720703, - "loss": 0.6054, - "pred_label": 0.0, - "rewards/accuracies": 0.625, - "rewards/chosen": 0.16584378480911255, - "rewards/margins": 0.5073415637016296, - "rewards/rejected": -0.3414977788925171, + "learning_rate": 4.918509895227007e-07, + "logits/chosen": -1.373608946800232, + "logits/rejected": -1.4505846500396729, + "logps/chosen": -306.3442077636719, + "logps/rejected": -289.1839904785156, + "loss": 0.5705, + "pred_label": 29.049999237060547, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.7434765100479126, + "rewards/margins": 0.5097277760505676, + "rewards/rejected": 0.23374874889850616, "step": 110, - "use_label": 0.0 + "use_label": 812.9500122070312 }, { "epoch": 0.13, - "learning_rate": 4.860465116279069e-07, - "logits/chosen": -2.648292064666748, - "logits/rejected": -2.6121315956115723, - "logps/chosen": -278.00640869140625, - "logps/rejected": -268.49859619140625, - "loss": 0.5983, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.15003331005573273, - "rewards/margins": 0.9967330098152161, - "rewards/rejected": -0.8466997146606445, + "learning_rate": 4.860302677532014e-07, + "logits/chosen": -1.3047031164169312, + "logits/rejected": -1.317251205444336, + "logps/chosen": -324.14703369140625, + "logps/rejected": -270.2970275878906, + "loss": 0.5547, + "pred_label": 40.0, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 0.7135593891143799, + "rewards/margins": 0.6617862582206726, + "rewards/rejected": 0.05177304893732071, "step": 120, - "use_label": 0.0 + "use_label": 882.0 }, { "epoch": 0.14, - "learning_rate": 4.802325581395348e-07, - "logits/chosen": -2.7574169635772705, - "logits/rejected": -2.726195812225342, - "logps/chosen": -325.28851318359375, - "logps/rejected": -264.0757751464844, - "loss": 0.5869, - "pred_label": 0.0, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": 0.13045839965343475, - "rewards/margins": 0.4203871190547943, - "rewards/rejected": -0.28992873430252075, + "learning_rate": 4.802095459837019e-07, + "logits/chosen": -1.421308159828186, + "logits/rejected": -1.3982212543487549, + "logps/chosen": -381.67926025390625, + "logps/rejected": -307.33258056640625, + "loss": 0.5737, + "pred_label": 47.150001525878906, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.0430835485458374, + "rewards/margins": 0.5517104268074036, + "rewards/rejected": 0.49137312173843384, "step": 130, - "use_label": 0.0 + "use_label": 954.8499755859375 }, { "epoch": 0.15, - "learning_rate": 4.7441860465116277e-07, - "logits/chosen": -2.715982675552368, - "logits/rejected": -2.7288689613342285, - "logps/chosen": -239.6348419189453, - "logps/rejected": -251.3628692626953, - "loss": 0.5227, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": 0.2861214280128479, - "rewards/margins": 0.9334532618522644, - "rewards/rejected": -0.6473318338394165, + "learning_rate": 4.743888242142026e-07, + "logits/chosen": -1.477430820465088, + "logits/rejected": -1.4979420900344849, + "logps/chosen": -278.7945861816406, + "logps/rejected": -253.20358276367188, + "loss": 0.5539, + "pred_label": 52.849998474121094, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.726542055606842, + "rewards/margins": 0.5279777646064758, + "rewards/rejected": 0.19856423139572144, "step": 140, - "use_label": 0.0 + "use_label": 1029.1500244140625 }, { "epoch": 0.16, - "learning_rate": 4.6860465116279066e-07, - "logits/chosen": -2.7235076427459717, - "logits/rejected": -2.73270845413208, - "logps/chosen": -269.925048828125, - "logps/rejected": -257.32135009765625, - "loss": 0.5829, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.3011024594306946, - "rewards/margins": 0.7531024217605591, - "rewards/rejected": -0.4519999027252197, + "learning_rate": 4.685681024447031e-07, + "logits/chosen": -1.4019322395324707, + "logits/rejected": -1.4297659397125244, + "logps/chosen": -336.5803527832031, + "logps/rejected": -333.4914245605469, + "loss": 0.5609, + "pred_label": 60.45000076293945, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.8561170697212219, + "rewards/margins": 0.3941256105899811, + "rewards/rejected": 0.46199145913124084, "step": 150, - "use_label": 0.0 + "use_label": 1101.550048828125 }, { "epoch": 0.17, - "learning_rate": 4.627906976744186e-07, - "logits/chosen": -2.7294018268585205, - "logits/rejected": -2.7083096504211426, - "logps/chosen": -271.4284973144531, - "logps/rejected": -311.17718505859375, - "loss": 0.6106, - "pred_label": 0.0, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.2270795851945877, - "rewards/margins": 0.7130987048149109, - "rewards/rejected": -0.4860190749168396, + "learning_rate": 4.627473806752037e-07, + "logits/chosen": -1.3426318168640137, + "logits/rejected": -1.3782684803009033, + "logps/chosen": -347.442626953125, + "logps/rejected": -337.1346130371094, + "loss": 0.5358, + "pred_label": 71.05000305175781, + "rewards/accuracies": 0.6875, + "rewards/chosen": 1.0326350927352905, + "rewards/margins": 0.5863274335861206, + "rewards/rejected": 0.4463076591491699, "step": 160, - "use_label": 0.0 + "use_label": 1170.949951171875 }, { "epoch": 0.18, - "learning_rate": 4.569767441860465e-07, - "logits/chosen": -2.7488739490509033, - "logits/rejected": -2.731077194213867, - "logps/chosen": -296.81561279296875, - "logps/rejected": -279.2462158203125, - "loss": 0.5766, - "pred_label": 0.0, + "learning_rate": 4.5692665890570433e-07, + "logits/chosen": -1.3970049619674683, + "logits/rejected": -1.4572138786315918, + "logps/chosen": -335.081787109375, + "logps/rejected": -267.0716552734375, + "loss": 0.5303, + "pred_label": 80.0, "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.3102962076663971, - "rewards/margins": 0.6582878828048706, - "rewards/rejected": -0.3479916751384735, + "rewards/chosen": 0.8701731562614441, + "rewards/margins": 0.6228026151657104, + "rewards/rejected": 0.24737051129341125, "step": 170, - "use_label": 0.0 + "use_label": 1242.0 }, { "epoch": 0.19, - "learning_rate": 4.511627906976744e-07, - "logits/chosen": -2.678403377532959, - "logits/rejected": -2.6578478813171387, - "logps/chosen": -238.8046875, - "logps/rejected": -242.45297241210938, - "loss": 0.535, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": 0.11773022264242172, - "rewards/margins": 0.943315863609314, - "rewards/rejected": -0.8255857229232788, + "learning_rate": 4.5110593713620486e-07, + "logits/chosen": -1.3490818738937378, + "logits/rejected": -1.3612277507781982, + "logps/chosen": -265.89105224609375, + "logps/rejected": -296.7646484375, + "loss": 0.5288, + "pred_label": 92.75, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 0.7616567611694336, + "rewards/margins": 0.6177178025245667, + "rewards/rejected": 0.14393898844718933, "step": 180, - "use_label": 0.0 + "use_label": 1309.25 }, { "epoch": 0.2, - "learning_rate": 4.4534883720930233e-07, - "logits/chosen": -2.7577908039093018, - "logits/rejected": -2.7466981410980225, - "logps/chosen": -255.8538818359375, - "logps/rejected": -249.35043334960938, - "loss": 0.515, - "pred_label": 0.0, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.29801803827285767, - "rewards/margins": 0.9082610011100769, - "rewards/rejected": -0.610243022441864, + "learning_rate": 4.4528521536670544e-07, + "logits/chosen": -1.4309965372085571, + "logits/rejected": -1.4547052383422852, + "logps/chosen": -323.339111328125, + "logps/rejected": -273.64691162109375, + "loss": 0.5208, + "pred_label": 106.19999694824219, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.0953993797302246, + "rewards/margins": 0.8105791211128235, + "rewards/rejected": 0.2848203480243683, "step": 190, - "use_label": 0.0 + "use_label": 1375.800048828125 }, { "epoch": 0.21, - "learning_rate": 4.395348837209302e-07, - "logits/chosen": -2.726404905319214, - "logits/rejected": -2.684250593185425, - "logps/chosen": -314.0807189941406, - "logps/rejected": -294.7442626953125, - "loss": 0.558, - "pred_label": 0.0, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": 0.19561365246772766, - "rewards/margins": 0.6523147225379944, - "rewards/rejected": -0.45670104026794434, + "learning_rate": 4.3946449359720607e-07, + "logits/chosen": -1.4408454895019531, + "logits/rejected": -1.4733045101165771, + "logps/chosen": -323.10467529296875, + "logps/rejected": -352.9010009765625, + "loss": 0.4934, + "pred_label": 116.3499984741211, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.2384220361709595, + "rewards/margins": 0.7947490811347961, + "rewards/rejected": 0.4436728358268738, "step": 200, - "use_label": 0.0 - }, - { - "epoch": 0.21, - "eval_logits/chosen": -2.808067798614502, - "eval_logits/rejected": -2.7980899810791016, - "eval_logps/chosen": -287.2000732421875, - "eval_logps/rejected": -269.501953125, - "eval_loss": 0.5651848316192627, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.7420634627342224, - "eval_rewards/chosen": 0.1323493868112564, - "eval_rewards/margins": 0.9185545444488525, - "eval_rewards/rejected": -0.7862052321434021, - "eval_runtime": 151.5393, - "eval_samples_per_second": 13.198, - "eval_steps_per_second": 0.416, - "eval_use_label": 0.0, - "step": 200 + "use_label": 1445.6500244140625 }, { "epoch": 0.22, - "learning_rate": 4.337209302325581e-07, - "logits/chosen": -2.7738518714904785, - "logits/rejected": -2.76870059967041, - "logps/chosen": -309.36224365234375, - "logps/rejected": -252.21908569335938, - "loss": 0.5574, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": 0.1390099674463272, - "rewards/margins": 0.8792921900749207, - "rewards/rejected": -0.7402822375297546, + "learning_rate": 4.336437718277066e-07, + "logits/chosen": -1.373170256614685, + "logits/rejected": -1.3570789098739624, + "logps/chosen": -382.3429870605469, + "logps/rejected": -266.7330627441406, + "loss": 0.5348, + "pred_label": 131.64999389648438, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.3385562896728516, + "rewards/margins": 1.1700799465179443, + "rewards/rejected": 0.16847634315490723, "step": 210, - "use_label": 0.0 + "use_label": 1510.3499755859375 }, { "epoch": 0.23, - "learning_rate": 4.27906976744186e-07, - "logits/chosen": -2.7275898456573486, - "logits/rejected": -2.7462148666381836, - "logps/chosen": -330.2643127441406, - "logps/rejected": -309.7840576171875, - "loss": 0.545, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.24796506762504578, - "rewards/margins": 0.912385106086731, - "rewards/rejected": -0.6644200086593628, + "learning_rate": 4.278230500582072e-07, + "logits/chosen": -1.3580390214920044, + "logits/rejected": -1.386907935142517, + "logps/chosen": -335.6164245605469, + "logps/rejected": -291.1451416015625, + "loss": 0.5062, + "pred_label": 152.14999389648438, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 1.2729132175445557, + "rewards/margins": 1.0367872714996338, + "rewards/rejected": 0.23612599074840546, "step": 220, - "use_label": 0.0 + "use_label": 1569.8499755859375 }, { "epoch": 0.24, - "learning_rate": 4.220930232558139e-07, - "logits/chosen": -2.656540870666504, - "logits/rejected": -2.662473678588867, - "logps/chosen": -303.71685791015625, - "logps/rejected": -266.63275146484375, - "loss": 0.552, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.06451065838336945, - "rewards/margins": 0.7794097661972046, - "rewards/rejected": -0.8439203500747681, + "learning_rate": 4.220023282887078e-07, + "logits/chosen": -1.415291428565979, + "logits/rejected": -1.4271855354309082, + "logps/chosen": -337.0808410644531, + "logps/rejected": -293.56695556640625, + "loss": 0.4849, + "pred_label": 175.89999389648438, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.054723858833313, + "rewards/margins": 0.9728155136108398, + "rewards/rejected": 0.08190834522247314, "step": 230, - "use_label": 0.0 + "use_label": 1626.0999755859375 }, { "epoch": 0.25, - "learning_rate": 4.162790697674418e-07, - "logits/chosen": -2.670305013656616, - "logits/rejected": -2.6477558612823486, - "logps/chosen": -266.712158203125, - "logps/rejected": -217.30209350585938, - "loss": 0.5225, - "pred_label": 0.0, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.07932360470294952, - "rewards/margins": 0.7470332384109497, - "rewards/rejected": -0.8263567686080933, + "learning_rate": 4.1618160651920834e-07, + "logits/chosen": -1.456987738609314, + "logits/rejected": -1.487733244895935, + "logps/chosen": -301.118896484375, + "logps/rejected": -239.19088745117188, + "loss": 0.5087, + "pred_label": 191.25, + "rewards/accuracies": 0.6875, + "rewards/chosen": 1.0468814373016357, + "rewards/margins": 0.7403112649917603, + "rewards/rejected": 0.30657026171684265, "step": 240, - "use_label": 0.0 + "use_label": 1690.75 }, { "epoch": 0.26, - "learning_rate": 4.104651162790698e-07, - "logits/chosen": -2.7736430168151855, - "logits/rejected": -2.734602928161621, - "logps/chosen": -282.9905090332031, - "logps/rejected": -334.78863525390625, - "loss": 0.5718, - "pred_label": 0.0, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.052515070885419846, - "rewards/margins": 0.7567065954208374, - "rewards/rejected": -0.7041915655136108, + "learning_rate": 4.103608847497089e-07, + "logits/chosen": -1.3532272577285767, + "logits/rejected": -1.3804259300231934, + "logps/chosen": -317.42230224609375, + "logps/rejected": -329.9377136230469, + "loss": 0.5253, + "pred_label": 205.25, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.2708420753479004, + "rewards/margins": 0.9137954711914062, + "rewards/rejected": 0.3570464253425598, "step": 250, - "use_label": 0.0 + "use_label": 1756.75 }, { "epoch": 0.27, - "learning_rate": 4.046511627906977e-07, - "logits/chosen": -2.671180009841919, - "logits/rejected": -2.6478466987609863, - "logps/chosen": -248.6728057861328, - "logps/rejected": -269.4361267089844, - "loss": 0.5466, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": 0.02055490016937256, - "rewards/margins": 0.9694421887397766, - "rewards/rejected": -0.9488871693611145, + "learning_rate": 4.0454016298020956e-07, + "logits/chosen": -1.3812406063079834, + "logits/rejected": -1.4057163000106812, + "logps/chosen": -331.4401550292969, + "logps/rejected": -308.66558837890625, + "loss": 0.5155, + "pred_label": 220.64999389648438, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.9700489044189453, + "rewards/margins": 0.6729909181594849, + "rewards/rejected": 0.29705801606178284, "step": 260, - "use_label": 0.0 + "use_label": 1821.3499755859375 }, { "epoch": 0.28, - "learning_rate": 3.9883720930232557e-07, - "logits/chosen": -2.7515838146209717, - "logits/rejected": -2.805983543395996, - "logps/chosen": -296.47076416015625, - "logps/rejected": -235.0985565185547, - "loss": 0.5395, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.11441866308450699, - "rewards/margins": 0.8921812772750854, - "rewards/rejected": -0.7777627110481262, + "learning_rate": 3.987194412107101e-07, + "logits/chosen": -1.4664005041122437, + "logits/rejected": -1.4549050331115723, + "logps/chosen": -349.93994140625, + "logps/rejected": -278.48590087890625, + "loss": 0.4778, + "pred_label": 240.25, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.315645456314087, + "rewards/margins": 1.0870709419250488, + "rewards/rejected": 0.22857451438903809, "step": 270, - "use_label": 0.0 + "use_label": 1881.75 }, { "epoch": 0.29, - "learning_rate": 3.9302325581395346e-07, - "logits/chosen": -2.771331310272217, - "logits/rejected": -2.799328327178955, - "logps/chosen": -299.34796142578125, - "logps/rejected": -297.7183837890625, - "loss": 0.5239, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.07190613448619843, - "rewards/margins": 0.8066242933273315, - "rewards/rejected": -0.8785305023193359, + "learning_rate": 3.9289871944121066e-07, + "logits/chosen": -1.4699549674987793, + "logits/rejected": -1.417506456375122, + "logps/chosen": -373.6863708496094, + "logps/rejected": -331.5750427246094, + "loss": 0.4764, + "pred_label": 259.95001220703125, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": 1.3312596082687378, + "rewards/margins": 0.79558265209198, + "rewards/rejected": 0.5356770753860474, "step": 280, - "use_label": 0.0 + "use_label": 1942.050048828125 }, { "epoch": 0.3, - "learning_rate": 3.8720930232558135e-07, - "logits/chosen": -2.7138657569885254, - "logits/rejected": -2.7323174476623535, - "logps/chosen": -290.46563720703125, - "logps/rejected": -261.27117919921875, - "loss": 0.5757, - "pred_label": 0.0, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.09668038040399551, - "rewards/margins": 0.9626511335372925, - "rewards/rejected": -1.0593315362930298, + "learning_rate": 3.870779976717113e-07, + "logits/chosen": -1.4723706245422363, + "logits/rejected": -1.4665457010269165, + "logps/chosen": -311.8465881347656, + "logps/rejected": -274.10888671875, + "loss": 0.4979, + "pred_label": 278.3500061035156, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.0801382064819336, + "rewards/margins": 0.6504096388816833, + "rewards/rejected": 0.42972850799560547, "step": 290, - "use_label": 0.0 + "use_label": 2003.6500244140625 }, { "epoch": 0.31, - "learning_rate": 3.813953488372093e-07, - "logits/chosen": -2.659846782684326, - "logits/rejected": -2.6316401958465576, - "logps/chosen": -293.4468078613281, - "logps/rejected": -243.309326171875, - "loss": 0.553, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.0554303340613842, - "rewards/margins": 1.0153982639312744, - "rewards/rejected": -1.0708284378051758, + "learning_rate": 3.812572759022118e-07, + "logits/chosen": -1.4164968729019165, + "logits/rejected": -1.4505560398101807, + "logps/chosen": -274.9853210449219, + "logps/rejected": -272.59814453125, + "loss": 0.4576, + "pred_label": 297.5, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": 1.4018886089324951, + "rewards/margins": 1.1822335720062256, + "rewards/rejected": 0.2196551263332367, "step": 300, - "use_label": 0.0 - }, - { - "epoch": 0.31, - "eval_logits/chosen": -2.751713991165161, - "eval_logits/rejected": -2.742063045501709, - "eval_logps/chosen": -289.1977844238281, - "eval_logps/rejected": -272.0630187988281, - "eval_loss": 0.5431597828865051, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.7341269850730896, - "eval_rewards/chosen": -0.06741873919963837, - "eval_rewards/margins": 0.9748923778533936, - "eval_rewards/rejected": -1.042311191558838, - "eval_runtime": 151.828, - "eval_samples_per_second": 13.173, - "eval_steps_per_second": 0.415, - "eval_use_label": 0.0, - "step": 300 + "use_label": 2064.5 }, { "epoch": 0.32, - "learning_rate": 3.755813953488372e-07, - "logits/chosen": -2.612884283065796, - "logits/rejected": -2.65791916847229, - "logps/chosen": -306.4892578125, - "logps/rejected": -278.91619873046875, - "loss": 0.4974, - "pred_label": 0.0, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.24395442008972168, - "rewards/margins": 1.114572525024414, - "rewards/rejected": -0.8706180453300476, + "learning_rate": 3.754365541327124e-07, + "logits/chosen": -1.3352575302124023, + "logits/rejected": -1.4178216457366943, + "logps/chosen": -303.4643249511719, + "logps/rejected": -325.78961181640625, + "loss": 0.4554, + "pred_label": 323.8999938964844, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.0443353652954102, + "rewards/margins": 1.2372277975082397, + "rewards/rejected": -0.19289246201515198, "step": 310, - "use_label": 0.0 + "use_label": 2118.10009765625 }, { "epoch": 0.33, - "learning_rate": 3.697674418604651e-07, - "logits/chosen": -2.6776044368743896, - "logits/rejected": -2.622297525405884, - "logps/chosen": -270.759033203125, - "logps/rejected": -289.8017578125, - "loss": 0.5455, - "pred_label": 0.0, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.07885434478521347, - "rewards/margins": 1.0345350503921509, - "rewards/rejected": -1.1133893728256226, + "learning_rate": 3.6961583236321304e-07, + "logits/chosen": -1.4550950527191162, + "logits/rejected": -1.436576247215271, + "logps/chosen": -340.7930908203125, + "logps/rejected": -323.78985595703125, + "loss": 0.4729, + "pred_label": 351.29998779296875, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.1371341943740845, + "rewards/margins": 0.8774749636650085, + "rewards/rejected": 0.259659081697464, "step": 320, - "use_label": 0.0 + "use_label": 2170.699951171875 }, { "epoch": 0.35, - "learning_rate": 3.63953488372093e-07, - "logits/chosen": -2.716804027557373, - "logits/rejected": -2.640516996383667, - "logps/chosen": -262.74810791015625, - "logps/rejected": -253.93417358398438, - "loss": 0.5639, - "pred_label": 0.0, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.14604653418064117, - "rewards/margins": 0.922707736492157, - "rewards/rejected": -0.7766610980033875, + "learning_rate": 3.637951105937136e-07, + "logits/chosen": -1.428165078163147, + "logits/rejected": -1.4091517925262451, + "logps/chosen": -317.4041748046875, + "logps/rejected": -238.2615509033203, + "loss": 0.4642, + "pred_label": 379.04998779296875, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.1726207733154297, + "rewards/margins": 1.0814019441604614, + "rewards/rejected": 0.0912187248468399, "step": 330, - "use_label": 0.0 + "use_label": 2222.949951171875 }, { "epoch": 0.36, - "learning_rate": 3.581395348837209e-07, - "logits/chosen": -2.7240614891052246, - "logits/rejected": -2.6947460174560547, - "logps/chosen": -238.51412963867188, - "logps/rejected": -228.6656494140625, - "loss": 0.5925, - "pred_label": 0.0, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": -0.19918441772460938, - "rewards/margins": 0.44254446029663086, - "rewards/rejected": -0.6417288184165955, + "learning_rate": 3.579743888242142e-07, + "logits/chosen": -1.4679944515228271, + "logits/rejected": -1.5165075063705444, + "logps/chosen": -273.54071044921875, + "logps/rejected": -243.1127471923828, + "loss": 0.4859, + "pred_label": 401.95001220703125, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.1446250677108765, + "rewards/margins": 0.7705689668655396, + "rewards/rejected": 0.3740561306476593, "step": 340, - "use_label": 0.0 + "use_label": 2280.050048828125 }, { "epoch": 0.37, - "learning_rate": 3.5232558139534886e-07, - "logits/chosen": -2.7215187549591064, - "logits/rejected": -2.7599117755889893, - "logps/chosen": -307.45989990234375, - "logps/rejected": -334.6233825683594, - "loss": 0.577, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": 0.1541057825088501, - "rewards/margins": 1.1278841495513916, - "rewards/rejected": -0.9737783670425415, + "learning_rate": 3.521536670547148e-07, + "logits/chosen": -1.2454713582992554, + "logits/rejected": -1.321508765220642, + "logps/chosen": -286.65777587890625, + "logps/rejected": -306.366455078125, + "loss": 0.4577, + "pred_label": 428.45001220703125, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.181875467300415, + "rewards/margins": 1.0571717023849487, + "rewards/rejected": 0.12470376491546631, "step": 350, - "use_label": 0.0 + "use_label": 2333.550048828125 }, { "epoch": 0.38, - "learning_rate": 3.4651162790697675e-07, - "logits/chosen": -2.7533516883850098, - "logits/rejected": -2.8209774494171143, - "logps/chosen": -291.31658935546875, - "logps/rejected": -252.8400421142578, - "loss": 0.5715, - "pred_label": 0.0, + "learning_rate": 3.4633294528521536e-07, + "logits/chosen": -1.4310450553894043, + "logits/rejected": -1.4215561151504517, + "logps/chosen": -347.1667785644531, + "logps/rejected": -319.175537109375, + "loss": 0.462, + "pred_label": 454.75, "rewards/accuracies": 0.737500011920929, - "rewards/chosen": 0.08639977127313614, - "rewards/margins": 0.8964889645576477, - "rewards/rejected": -0.8100892305374146, + "rewards/chosen": 1.2432093620300293, + "rewards/margins": 0.7565422058105469, + "rewards/rejected": 0.4866672456264496, "step": 360, - "use_label": 0.0 + "use_label": 2387.25 }, { "epoch": 0.39, - "learning_rate": 3.4069767441860464e-07, - "logits/chosen": -2.7346293926239014, - "logits/rejected": -2.712937355041504, - "logps/chosen": -243.9089813232422, - "logps/rejected": -270.5567321777344, - "loss": 0.524, - "pred_label": 0.0, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.014877429232001305, - "rewards/margins": 0.8318921327590942, - "rewards/rejected": -0.8170146942138672, + "learning_rate": 3.4051222351571594e-07, + "logits/chosen": -1.3713960647583008, + "logits/rejected": -1.4076581001281738, + "logps/chosen": -289.0054626464844, + "logps/rejected": -323.2015686035156, + "loss": 0.4783, + "pred_label": 474.5, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 0.9605358839035034, + "rewards/margins": 0.8715503811836243, + "rewards/rejected": 0.08898548781871796, "step": 370, - "use_label": 0.0 + "use_label": 2447.5 }, { "epoch": 0.4, - "learning_rate": 3.3488372093023253e-07, - "logits/chosen": -2.820455312728882, - "logits/rejected": -2.7536873817443848, - "logps/chosen": -331.84075927734375, - "logps/rejected": -300.8788146972656, - "loss": 0.5174, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.10691157728433609, - "rewards/margins": 0.881051242351532, - "rewards/rejected": -0.9879627227783203, + "learning_rate": 3.346915017462165e-07, + "logits/chosen": -1.3850533962249756, + "logits/rejected": -1.4161368608474731, + "logps/chosen": -363.1261291503906, + "logps/rejected": -332.7879943847656, + "loss": 0.4392, + "pred_label": 492.8999938964844, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.097650408744812, + "rewards/margins": 0.8471637964248657, + "rewards/rejected": 0.2504865527153015, "step": 380, - "use_label": 0.0 + "use_label": 2509.10009765625 }, { "epoch": 0.41, - "learning_rate": 3.290697674418604e-07, - "logits/chosen": -2.560523509979248, - "logits/rejected": -2.5691637992858887, - "logps/chosen": -262.97808837890625, - "logps/rejected": -238.43594360351562, - "loss": 0.5326, - "pred_label": 0.0, - "rewards/accuracies": 0.6875, - "rewards/chosen": 0.1129593625664711, - "rewards/margins": 0.8537227511405945, - "rewards/rejected": -0.7407633662223816, + "learning_rate": 3.288707799767171e-07, + "logits/chosen": -1.389650821685791, + "logits/rejected": -1.339377760887146, + "logps/chosen": -286.0871276855469, + "logps/rejected": -249.79629516601562, + "loss": 0.4179, + "pred_label": 517.4000244140625, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": 1.0542852878570557, + "rewards/margins": 1.5020934343338013, + "rewards/rejected": -0.44780832529067993, "step": 390, - "use_label": 0.0 + "use_label": 2564.60009765625 }, { "epoch": 0.42, - "learning_rate": 3.232558139534883e-07, - "logits/chosen": -2.748053789138794, - "logits/rejected": -2.720460891723633, - "logps/chosen": -307.76116943359375, - "logps/rejected": -274.69989013671875, - "loss": 0.5019, - "pred_label": 0.0, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.10151912271976471, - "rewards/margins": 0.7108665704727173, - "rewards/rejected": -0.8123855590820312, + "learning_rate": 3.230500582072177e-07, + "logits/chosen": -1.4820091724395752, + "logits/rejected": -1.4391555786132812, + "logps/chosen": -380.75665283203125, + "logps/rejected": -305.62017822265625, + "loss": 0.4127, + "pred_label": 540.4500122070312, + "rewards/accuracies": 0.6875, + "rewards/chosen": 1.1569273471832275, + "rewards/margins": 1.0463058948516846, + "rewards/rejected": 0.11062141507863998, "step": 400, - "use_label": 0.0 - }, - { - "epoch": 0.42, - "eval_logits/chosen": -2.796114206314087, - "eval_logits/rejected": -2.7870821952819824, - "eval_logps/chosen": -287.2944030761719, - "eval_logps/rejected": -270.9002990722656, - "eval_loss": 0.5371336936950684, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.7539682388305664, - "eval_rewards/chosen": 0.1229194775223732, - "eval_rewards/margins": 1.0489554405212402, - "eval_rewards/rejected": -0.9260359406471252, - "eval_runtime": 151.8781, - "eval_samples_per_second": 13.168, - "eval_steps_per_second": 0.415, - "eval_use_label": 0.0, - "step": 400 + "use_label": 2621.550048828125 }, { "epoch": 0.43, - "learning_rate": 3.1744186046511626e-07, - "logits/chosen": -2.8114898204803467, - "logits/rejected": -2.7443149089813232, - "logps/chosen": -308.30804443359375, - "logps/rejected": -297.49749755859375, - "loss": 0.5548, - "pred_label": 0.0, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.004237680230289698, - "rewards/margins": 0.7485671043395996, - "rewards/rejected": -0.7528048157691956, + "learning_rate": 3.1722933643771827e-07, + "logits/chosen": -1.3879801034927368, + "logits/rejected": -1.4610755443572998, + "logps/chosen": -391.5262756347656, + "logps/rejected": -361.547607421875, + "loss": 0.4672, + "pred_label": 569.0499877929688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.5551117658615112, + "rewards/margins": 1.2884727716445923, + "rewards/rejected": 0.2666390836238861, "step": 410, - "use_label": 0.0 + "use_label": 2672.949951171875 }, { "epoch": 0.44, - "learning_rate": 3.116279069767442e-07, - "logits/chosen": -2.6755497455596924, - "logits/rejected": -2.623680591583252, - "logps/chosen": -251.78466796875, - "logps/rejected": -202.69125366210938, - "loss": 0.5576, - "pred_label": 0.0, + "learning_rate": 3.1140861466821885e-07, + "logits/chosen": -1.3801909685134888, + "logits/rejected": -1.4261410236358643, + "logps/chosen": -263.9217224121094, + "logps/rejected": -260.2132568359375, + "loss": 0.4391, + "pred_label": 595.8499755859375, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": 0.09348449110984802, - "rewards/margins": 0.8450796008110046, - "rewards/rejected": -0.751595139503479, + "rewards/chosen": 0.9185078740119934, + "rewards/margins": 1.1499378681182861, + "rewards/rejected": -0.2314300537109375, "step": 420, - "use_label": 0.0 + "use_label": 2726.14990234375 }, { "epoch": 0.45, - "learning_rate": 3.058139534883721e-07, - "logits/chosen": -2.7282052040100098, - "logits/rejected": -2.6777048110961914, - "logps/chosen": -286.0669860839844, - "logps/rejected": -259.3023376464844, - "loss": 0.5476, - "pred_label": 0.0, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": 0.2035273015499115, - "rewards/margins": 0.9668741226196289, - "rewards/rejected": -0.763346791267395, + "learning_rate": 3.0558789289871943e-07, + "logits/chosen": -1.4430896043777466, + "logits/rejected": -1.4777699708938599, + "logps/chosen": -270.7845458984375, + "logps/rejected": -265.51568603515625, + "loss": 0.4777, + "pred_label": 626.7999877929688, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.1179291009902954, + "rewards/margins": 1.0040278434753418, + "rewards/rejected": 0.1139012798666954, "step": 430, - "use_label": 0.0 + "use_label": 2775.199951171875 }, { "epoch": 0.46, - "learning_rate": 3e-07, - "logits/chosen": -2.611921787261963, - "logits/rejected": -2.676251173019409, - "logps/chosen": -278.88824462890625, - "logps/rejected": -255.1051788330078, - "loss": 0.5149, - "pred_label": 0.0, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.1889255791902542, - "rewards/margins": 0.9589231610298157, - "rewards/rejected": -0.7699976563453674, + "learning_rate": 2.9976717112922e-07, + "logits/chosen": -1.4349843263626099, + "logits/rejected": -1.4729284048080444, + "logps/chosen": -311.2239990234375, + "logps/rejected": -299.1993103027344, + "loss": 0.4809, + "pred_label": 650.2999877929688, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 1.2415423393249512, + "rewards/margins": 0.8923840522766113, + "rewards/rejected": 0.34915822744369507, "step": 440, - "use_label": 0.0 + "use_label": 2831.699951171875 }, { "epoch": 0.47, - "learning_rate": 2.941860465116279e-07, - "logits/chosen": -2.58278226852417, - "logits/rejected": -2.551997423171997, - "logps/chosen": -274.6985778808594, - "logps/rejected": -328.51422119140625, - "loss": 0.5667, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": 0.1181306466460228, - "rewards/margins": 1.005082368850708, - "rewards/rejected": -0.8869515657424927, + "learning_rate": 2.939464493597206e-07, + "logits/chosen": -1.2597442865371704, + "logits/rejected": -1.30291748046875, + "logps/chosen": -256.5679626464844, + "logps/rejected": -269.8310852050781, + "loss": 0.4691, + "pred_label": 675.0, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 1.0971431732177734, + "rewards/margins": 0.8100937008857727, + "rewards/rejected": 0.2870495319366455, "step": 450, - "use_label": 0.0 + "use_label": 2887.0 }, { "epoch": 0.48, - "learning_rate": 2.883720930232558e-07, - "logits/chosen": -2.6847758293151855, - "logits/rejected": -2.6493821144104004, - "logps/chosen": -277.9380798339844, - "logps/rejected": -285.26324462890625, - "loss": 0.533, - "pred_label": 0.0, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.1580689549446106, - "rewards/margins": 1.3047949075698853, - "rewards/rejected": -1.146726131439209, + "learning_rate": 2.8812572759022117e-07, + "logits/chosen": -1.4251500368118286, + "logits/rejected": -1.4686391353607178, + "logps/chosen": -337.9930725097656, + "logps/rejected": -332.3681945800781, + "loss": 0.4543, + "pred_label": 701.6500244140625, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.3275657892227173, + "rewards/margins": 1.003480076789856, + "rewards/rejected": 0.324085533618927, "step": 460, - "use_label": 0.0 + "use_label": 2940.35009765625 }, { "epoch": 0.49, - "learning_rate": 2.825581395348837e-07, - "logits/chosen": -2.6480162143707275, - "logits/rejected": -2.5979321002960205, - "logps/chosen": -279.9370422363281, - "logps/rejected": -275.7652587890625, - "loss": 0.5222, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.015222841873764992, - "rewards/margins": 0.9643913507461548, - "rewards/rejected": -0.9491683840751648, + "learning_rate": 2.8230500582072175e-07, + "logits/chosen": -1.353682041168213, + "logits/rejected": -1.3279917240142822, + "logps/chosen": -335.19329833984375, + "logps/rejected": -268.65606689453125, + "loss": 0.4462, + "pred_label": 728.75, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.3157131671905518, + "rewards/margins": 1.3176007270812988, + "rewards/rejected": -0.0018875360256060958, "step": 470, - "use_label": 0.0 + "use_label": 2993.25 }, { "epoch": 0.5, - "learning_rate": 2.767441860465116e-07, - "logits/chosen": -2.643878698348999, - "logits/rejected": -2.5506885051727295, - "logps/chosen": -231.51913452148438, - "logps/rejected": -236.3944091796875, - "loss": 0.5759, - "pred_label": 0.0, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": -0.12248492240905762, - "rewards/margins": 0.6468067169189453, - "rewards/rejected": -0.7692916393280029, + "learning_rate": 2.7648428405122233e-07, + "logits/chosen": -1.3808891773223877, + "logits/rejected": -1.3530418872833252, + "logps/chosen": -266.064453125, + "logps/rejected": -263.7291564941406, + "loss": 0.4507, + "pred_label": 754.5, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.7782207727432251, + "rewards/margins": 0.7220892906188965, + "rewards/rejected": 0.05613153427839279, "step": 480, - "use_label": 0.0 + "use_label": 3047.5 }, { "epoch": 0.51, - "learning_rate": 2.709302325581395e-07, - "logits/chosen": -2.70454740524292, - "logits/rejected": -2.6749463081359863, - "logps/chosen": -275.100830078125, - "logps/rejected": -261.96124267578125, - "loss": 0.5667, - "pred_label": 0.0, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.030958181247115135, - "rewards/margins": 0.7133909463882446, - "rewards/rejected": -0.7443491220474243, + "learning_rate": 2.706635622817229e-07, + "logits/chosen": -1.3707979917526245, + "logits/rejected": -1.399265170097351, + "logps/chosen": -318.2115783691406, + "logps/rejected": -310.44512939453125, + "loss": 0.4379, + "pred_label": 780.5999755859375, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 1.2304394245147705, + "rewards/margins": 1.0040814876556396, + "rewards/rejected": 0.22635769844055176, "step": 490, - "use_label": 0.0 + "use_label": 3101.39990234375 }, { "epoch": 0.52, - "learning_rate": 2.651162790697674e-07, - "logits/chosen": -2.6521294116973877, - "logits/rejected": -2.639777421951294, - "logps/chosen": -267.96221923828125, - "logps/rejected": -275.6853942871094, - "loss": 0.5303, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": 0.0007066220277920365, - "rewards/margins": 0.7814868688583374, - "rewards/rejected": -0.7807803153991699, + "learning_rate": 2.648428405122235e-07, + "logits/chosen": -1.3638988733291626, + "logits/rejected": -1.3315374851226807, + "logps/chosen": -320.8160400390625, + "logps/rejected": -311.55328369140625, + "loss": 0.4435, + "pred_label": 812.4000244140625, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 1.2452471256256104, + "rewards/margins": 1.6832224130630493, + "rewards/rejected": -0.4379752576351166, "step": 500, - "use_label": 0.0 - }, - { - "epoch": 0.52, - "eval_logits/chosen": -2.7494895458221436, - "eval_logits/rejected": -2.74151349067688, - "eval_logps/chosen": -287.7681579589844, - "eval_logps/rejected": -271.1743469238281, - "eval_loss": 0.5362493991851807, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.738095223903656, - "eval_rewards/chosen": 0.07554519176483154, - "eval_rewards/margins": 1.0289891958236694, - "eval_rewards/rejected": -0.9534440040588379, - "eval_runtime": 152.1284, - "eval_samples_per_second": 13.147, - "eval_steps_per_second": 0.414, - "eval_use_label": 0.0, - "step": 500 + "use_label": 3149.60009765625 }, { "epoch": 0.53, - "learning_rate": 2.593023255813954e-07, - "logits/chosen": -2.6452856063842773, - "logits/rejected": -2.635679244995117, - "logps/chosen": -292.32196044921875, - "logps/rejected": -265.02752685546875, - "loss": 0.5349, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": 0.0869629979133606, - "rewards/margins": 0.8606294393539429, - "rewards/rejected": -0.7736665606498718, + "learning_rate": 2.590221187427241e-07, + "logits/chosen": -1.470384120941162, + "logits/rejected": -1.5311263799667358, + "logps/chosen": -307.1033935546875, + "logps/rejected": -274.86114501953125, + "loss": 0.4278, + "pred_label": 838.0499877929688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.8434027433395386, + "rewards/margins": 1.0462173223495483, + "rewards/rejected": -0.20281438529491425, "step": 510, - "use_label": 0.0 + "use_label": 3203.949951171875 }, { "epoch": 0.54, - "learning_rate": 2.534883720930233e-07, - "logits/chosen": -2.707009792327881, - "logits/rejected": -2.695380926132202, - "logps/chosen": -273.6498107910156, - "logps/rejected": -243.3380889892578, - "loss": 0.5236, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": 0.14563575387001038, - "rewards/margins": 1.0186597108840942, - "rewards/rejected": -0.8730238676071167, + "learning_rate": 2.5320139697322466e-07, + "logits/chosen": -1.3783634901046753, + "logits/rejected": -1.3840962648391724, + "logps/chosen": -316.1509704589844, + "logps/rejected": -297.71527099609375, + "loss": 0.4795, + "pred_label": 861.8499755859375, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 1.3357948064804077, + "rewards/margins": 1.1968021392822266, + "rewards/rejected": 0.13899262249469757, "step": 520, - "use_label": 0.0 + "use_label": 3260.14990234375 }, { "epoch": 0.55, - "learning_rate": 2.4767441860465117e-07, - "logits/chosen": -2.677058458328247, - "logits/rejected": -2.6737570762634277, - "logps/chosen": -270.5714416503906, - "logps/rejected": -275.0865783691406, - "loss": 0.5015, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.2971714735031128, - "rewards/margins": 1.0320571660995483, - "rewards/rejected": -1.3292286396026611, + "learning_rate": 2.4738067520372524e-07, + "logits/chosen": -1.3981616497039795, + "logits/rejected": -1.3489913940429688, + "logps/chosen": -341.50408935546875, + "logps/rejected": -313.69134521484375, + "loss": 0.4405, + "pred_label": 883.5499877929688, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 1.3740103244781494, + "rewards/margins": 1.0623902082443237, + "rewards/rejected": 0.31162017583847046, "step": 530, - "use_label": 0.0 + "use_label": 3318.449951171875 }, { - "epoch": 0.56, - "learning_rate": 2.4186046511627906e-07, - "logits/chosen": -2.7393388748168945, - "logits/rejected": -2.723223924636841, - "logps/chosen": -288.07904052734375, - "logps/rejected": -284.3858642578125, - "loss": 0.5325, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.023196930065751076, - "rewards/margins": 1.3002605438232422, - "rewards/rejected": -1.2770636081695557, + "epoch": 0.57, + "learning_rate": 2.415599534342258e-07, + "logits/chosen": -1.3855282068252563, + "logits/rejected": -1.3808072805404663, + "logps/chosen": -341.39154052734375, + "logps/rejected": -329.89276123046875, + "loss": 0.4216, + "pred_label": 915.0, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 1.546844482421875, + "rewards/margins": 1.5073612928390503, + "rewards/rejected": 0.039483144879341125, "step": 540, - "use_label": 0.0 + "use_label": 3367.0 }, { "epoch": 0.58, - "learning_rate": 2.3604651162790695e-07, - "logits/chosen": -2.7268896102905273, - "logits/rejected": -2.699005365371704, - "logps/chosen": -293.6747131347656, - "logps/rejected": -266.55059814453125, - "loss": 0.5283, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.11607713997364044, - "rewards/margins": 1.205520510673523, - "rewards/rejected": -1.0894434452056885, + "learning_rate": 2.3573923166472642e-07, + "logits/chosen": -1.411578893661499, + "logits/rejected": -1.4331611394882202, + "logps/chosen": -319.5863952636719, + "logps/rejected": -259.39813232421875, + "loss": 0.4388, + "pred_label": 943.7999877929688, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 1.2579312324523926, + "rewards/margins": 0.9186943769454956, + "rewards/rejected": 0.3392367959022522, "step": 550, - "use_label": 0.0 + "use_label": 3418.199951171875 }, { "epoch": 0.59, - "learning_rate": 2.3023255813953487e-07, - "logits/chosen": -2.6480965614318848, - "logits/rejected": -2.66930890083313, - "logps/chosen": -264.1002197265625, - "logps/rejected": -240.21029663085938, - "loss": 0.6114, - "pred_label": 0.0, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.06454001367092133, - "rewards/margins": 0.681505560874939, - "rewards/rejected": -0.746045708656311, + "learning_rate": 2.2991850989522698e-07, + "logits/chosen": -1.4499213695526123, + "logits/rejected": -1.4626659154891968, + "logps/chosen": -320.32757568359375, + "logps/rejected": -273.2851867675781, + "loss": 0.4447, + "pred_label": 967.6500244140625, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 1.5723648071289062, + "rewards/margins": 1.1994202136993408, + "rewards/rejected": 0.37294459342956543, "step": 560, - "use_label": 0.0 + "use_label": 3474.35009765625 }, { "epoch": 0.6, - "learning_rate": 2.2441860465116278e-07, - "logits/chosen": -2.708428382873535, - "logits/rejected": -2.7417664527893066, - "logps/chosen": -296.1793212890625, - "logps/rejected": -299.2248840332031, - "loss": 0.5686, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": 0.24114087224006653, - "rewards/margins": 0.9173105955123901, - "rewards/rejected": -0.6761698126792908, + "learning_rate": 2.2409778812572759e-07, + "logits/chosen": -1.4085966348648071, + "logits/rejected": -1.4106355905532837, + "logps/chosen": -275.1546325683594, + "logps/rejected": -290.6806640625, + "loss": 0.452, + "pred_label": 994.75, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.4340472221374512, + "rewards/margins": 1.0139697790145874, + "rewards/rejected": 0.42007726430892944, "step": 570, - "use_label": 0.0 + "use_label": 3527.25 }, { "epoch": 0.61, - "learning_rate": 2.186046511627907e-07, - "logits/chosen": -2.6812610626220703, - "logits/rejected": -2.6830496788024902, - "logps/chosen": -283.55950927734375, - "logps/rejected": -271.852783203125, - "loss": 0.5146, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.09269438683986664, - "rewards/margins": 0.9761162996292114, - "rewards/rejected": -1.0688107013702393, + "learning_rate": 2.1827706635622817e-07, + "logits/chosen": -1.4144864082336426, + "logits/rejected": -1.3897688388824463, + "logps/chosen": -291.3064880371094, + "logps/rejected": -258.45001220703125, + "loss": 0.4618, + "pred_label": 1023.5499877929688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.6620935201644897, + "rewards/margins": 1.2861305475234985, + "rewards/rejected": 0.375963032245636, "step": 580, - "use_label": 0.0 + "use_label": 3578.449951171875 }, { "epoch": 0.62, - "learning_rate": 2.127906976744186e-07, - "logits/chosen": -2.647878646850586, - "logits/rejected": -2.62599515914917, - "logps/chosen": -290.76544189453125, - "logps/rejected": -325.32684326171875, - "loss": 0.5352, - "pred_label": 0.0, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.14185315370559692, - "rewards/margins": 0.620773196220398, - "rewards/rejected": -0.7626262903213501, + "learning_rate": 2.1245634458672875e-07, + "logits/chosen": -1.3765242099761963, + "logits/rejected": -1.36930251121521, + "logps/chosen": -277.45513916015625, + "logps/rejected": -277.4739685058594, + "loss": 0.4351, + "pred_label": 1045.0, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.9612810015678406, + "rewards/margins": 0.7380627393722534, + "rewards/rejected": 0.22321827709674835, "step": 590, - "use_label": 0.0 + "use_label": 3637.0 }, { "epoch": 0.63, - "learning_rate": 2.0697674418604648e-07, - "logits/chosen": -2.6654324531555176, - "logits/rejected": -2.6334807872772217, - "logps/chosen": -318.82781982421875, - "logps/rejected": -323.73052978515625, - "loss": 0.5791, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": 0.07647746801376343, - "rewards/margins": 0.8629099726676941, - "rewards/rejected": -0.7864325642585754, + "learning_rate": 2.0663562281722933e-07, + "logits/chosen": -1.3574728965759277, + "logits/rejected": -1.3883976936340332, + "logps/chosen": -383.3297424316406, + "logps/rejected": -419.20562744140625, + "loss": 0.4241, + "pred_label": 1069.0999755859375, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 1.8888603448867798, + "rewards/margins": 1.7262849807739258, + "rewards/rejected": 0.16257524490356445, "step": 600, - "use_label": 0.0 - }, - { - "epoch": 0.63, - "eval_logits/chosen": -2.759521245956421, - "eval_logits/rejected": -2.751826763153076, - "eval_logps/chosen": -288.24688720703125, - "eval_logps/rejected": -271.9148864746094, - "eval_loss": 0.5281260013580322, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.7460317611694336, - "eval_rewards/chosen": 0.02767007239162922, - "eval_rewards/margins": 1.0551676750183105, - "eval_rewards/rejected": -1.02749764919281, - "eval_runtime": 151.9139, - "eval_samples_per_second": 13.165, - "eval_steps_per_second": 0.415, - "eval_use_label": 0.0, - "step": 600 + "use_label": 3692.89990234375 }, { "epoch": 0.64, - "learning_rate": 2.0116279069767443e-07, - "logits/chosen": -2.727267026901245, - "logits/rejected": -2.6593177318573, - "logps/chosen": -285.02386474609375, - "logps/rejected": -229.25363159179688, - "loss": 0.5251, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.22663810849189758, - "rewards/margins": 0.9340082406997681, - "rewards/rejected": -1.1606463193893433, + "learning_rate": 2.008149010477299e-07, + "logits/chosen": -1.4299789667129517, + "logits/rejected": -1.4499460458755493, + "logps/chosen": -298.21856689453125, + "logps/rejected": -250.89303588867188, + "loss": 0.3996, + "pred_label": 1102.800048828125, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 1.500954508781433, + "rewards/margins": 1.2322769165039062, + "rewards/rejected": 0.2686777114868164, "step": 610, - "use_label": 0.0 + "use_label": 3739.199951171875 }, { "epoch": 0.65, - "learning_rate": 1.9534883720930232e-07, - "logits/chosen": -2.7362422943115234, - "logits/rejected": -2.684910297393799, - "logps/chosen": -272.39776611328125, - "logps/rejected": -238.19973754882812, - "loss": 0.5186, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.16883151233196259, - "rewards/margins": 0.9152736663818359, - "rewards/rejected": -1.0841050148010254, + "learning_rate": 1.949941792782305e-07, + "logits/chosen": -1.409200668334961, + "logits/rejected": -1.4022369384765625, + "logps/chosen": -324.9916076660156, + "logps/rejected": -291.18145751953125, + "loss": 0.3997, + "pred_label": 1130.0999755859375, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": 1.2097212076187134, + "rewards/margins": 1.1757129430770874, + "rewards/rejected": 0.03400828689336777, "step": 620, - "use_label": 0.0 + "use_label": 3791.89990234375 }, { "epoch": 0.66, - "learning_rate": 1.895348837209302e-07, - "logits/chosen": -2.6318321228027344, - "logits/rejected": -2.720790386199951, - "logps/chosen": -252.7714080810547, - "logps/rejected": -261.837158203125, - "loss": 0.508, - "pred_label": 0.0, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.06653375923633575, - "rewards/margins": 1.351811408996582, - "rewards/rejected": -1.2852776050567627, + "learning_rate": 1.8917345750873107e-07, + "logits/chosen": -1.4302200078964233, + "logits/rejected": -1.3909845352172852, + "logps/chosen": -304.67205810546875, + "logps/rejected": -302.0281066894531, + "loss": 0.435, + "pred_label": 1159.6500244140625, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.0896503925323486, + "rewards/margins": 1.4318714141845703, + "rewards/rejected": -0.34222090244293213, "step": 630, - "use_label": 0.0 + "use_label": 3842.35009765625 }, { "epoch": 0.67, - "learning_rate": 1.8372093023255813e-07, - "logits/chosen": -2.708900213241577, - "logits/rejected": -2.6771461963653564, - "logps/chosen": -248.0977325439453, - "logps/rejected": -243.92178344726562, - "loss": 0.509, - "pred_label": 0.0, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.1819048821926117, - "rewards/margins": 1.3170161247253418, - "rewards/rejected": -1.1351112127304077, + "learning_rate": 1.8335273573923165e-07, + "logits/chosen": -1.3809505701065063, + "logits/rejected": -1.40725576877594, + "logps/chosen": -292.699951171875, + "logps/rejected": -259.057861328125, + "loss": 0.412, + "pred_label": 1190.0999755859375, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.1348669528961182, + "rewards/margins": 1.3778162002563477, + "rewards/rejected": -0.2429492473602295, "step": 640, - "use_label": 0.0 + "use_label": 3891.89990234375 }, { "epoch": 0.68, - "learning_rate": 1.7790697674418602e-07, - "logits/chosen": -2.767035722732544, - "logits/rejected": -2.7256782054901123, - "logps/chosen": -292.49127197265625, - "logps/rejected": -259.6093444824219, - "loss": 0.5359, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.06279899179935455, - "rewards/margins": 1.0030735731124878, - "rewards/rejected": -1.0658727884292603, + "learning_rate": 1.7753201396973226e-07, + "logits/chosen": -1.4333328008651733, + "logits/rejected": -1.4184446334838867, + "logps/chosen": -347.40069580078125, + "logps/rejected": -302.5298156738281, + "loss": 0.424, + "pred_label": 1222.949951171875, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 1.7120205163955688, + "rewards/margins": 1.4448004961013794, + "rewards/rejected": 0.26721999049186707, "step": 650, - "use_label": 0.0 + "use_label": 3939.050048828125 }, { "epoch": 0.69, - "learning_rate": 1.7209302325581396e-07, - "logits/chosen": -2.7260286808013916, - "logits/rejected": -2.7639498710632324, - "logps/chosen": -286.2920837402344, - "logps/rejected": -274.6990966796875, - "loss": 0.5137, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": 0.031030695885419846, - "rewards/margins": 0.9901006817817688, - "rewards/rejected": -0.9590700268745422, + "learning_rate": 1.7171129220023281e-07, + "logits/chosen": -1.441620945930481, + "logits/rejected": -1.4552855491638184, + "logps/chosen": -370.9482727050781, + "logps/rejected": -276.6924133300781, + "loss": 0.4167, + "pred_label": 1254.8499755859375, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.308371901512146, + "rewards/margins": 1.105975866317749, + "rewards/rejected": 0.20239587128162384, "step": 660, - "use_label": 0.0 + "use_label": 3987.14990234375 }, { "epoch": 0.7, - "learning_rate": 1.6627906976744186e-07, - "logits/chosen": -2.6322426795959473, - "logits/rejected": -2.632668972015381, - "logps/chosen": -303.038818359375, - "logps/rejected": -288.877197265625, - "loss": 0.514, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": 0.10557667911052704, - "rewards/margins": 1.2127177715301514, - "rewards/rejected": -1.1071412563323975, + "learning_rate": 1.658905704307334e-07, + "logits/chosen": -1.3389856815338135, + "logits/rejected": -1.4132729768753052, + "logps/chosen": -337.841796875, + "logps/rejected": -343.3721923828125, + "loss": 0.4335, + "pred_label": 1284.449951171875, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.2429578304290771, + "rewards/margins": 1.373488187789917, + "rewards/rejected": -0.13053035736083984, "step": 670, - "use_label": 0.0 + "use_label": 4037.550048828125 }, { "epoch": 0.71, - "learning_rate": 1.6046511627906975e-07, - "logits/chosen": -2.5468828678131104, - "logits/rejected": -2.5757508277893066, - "logps/chosen": -228.77261352539062, - "logps/rejected": -234.0619659423828, - "loss": 0.5591, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": -0.0003312155604362488, - "rewards/margins": 0.6896525621414185, - "rewards/rejected": -0.6899837255477905, + "learning_rate": 1.60069848661234e-07, + "logits/chosen": -1.4116053581237793, + "logits/rejected": -1.3834308385849, + "logps/chosen": -286.65435791015625, + "logps/rejected": -264.6783752441406, + "loss": 0.4426, + "pred_label": 1311.800048828125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 1.0291013717651367, + "rewards/margins": 0.6306339502334595, + "rewards/rejected": 0.398467481136322, "step": 680, - "use_label": 0.0 + "use_label": 4090.199951171875 }, { "epoch": 0.72, - "learning_rate": 1.5465116279069766e-07, - "logits/chosen": -2.636993885040283, - "logits/rejected": -2.585117816925049, - "logps/chosen": -250.43234252929688, - "logps/rejected": -217.508056640625, - "loss": 0.5132, - "pred_label": 0.0, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.18243907392024994, - "rewards/margins": 1.0671703815460205, - "rewards/rejected": -0.8847312927246094, + "learning_rate": 1.5424912689173456e-07, + "logits/chosen": -1.3959333896636963, + "logits/rejected": -1.3411847352981567, + "logps/chosen": -257.6075744628906, + "logps/rejected": -232.0232391357422, + "loss": 0.4287, + "pred_label": 1343.199951171875, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.9776744842529297, + "rewards/margins": 1.2699415683746338, + "rewards/rejected": -0.2922670245170593, "step": 690, - "use_label": 0.0 + "use_label": 4138.7998046875 }, { "epoch": 0.73, - "learning_rate": 1.4883720930232558e-07, - "logits/chosen": -2.63033390045166, - "logits/rejected": -2.5760812759399414, - "logps/chosen": -255.9130096435547, - "logps/rejected": -274.47467041015625, - "loss": 0.5238, - "pred_label": 0.0, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.016175851225852966, - "rewards/margins": 1.1498843431472778, - "rewards/rejected": -1.1337083578109741, + "learning_rate": 1.4842840512223514e-07, + "logits/chosen": -1.339285135269165, + "logits/rejected": -1.3700721263885498, + "logps/chosen": -287.5785217285156, + "logps/rejected": -265.7539978027344, + "loss": 0.4009, + "pred_label": 1377.449951171875, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.8038662672042847, + "rewards/margins": 1.3565984964370728, + "rewards/rejected": -0.552731990814209, "step": 700, - "use_label": 0.0 - }, - { - "epoch": 0.73, - "eval_logits/chosen": -2.733815908432007, - "eval_logits/rejected": -2.726200580596924, - "eval_logps/chosen": -288.18280029296875, - "eval_logps/rejected": -272.3071594238281, - "eval_loss": 0.5295113325119019, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.7539682388305664, - "eval_rewards/chosen": 0.034078579396009445, - "eval_rewards/margins": 1.1008045673370361, - "eval_rewards/rejected": -1.0667259693145752, - "eval_runtime": 151.6475, - "eval_samples_per_second": 13.188, - "eval_steps_per_second": 0.415, - "eval_use_label": 0.0, - "step": 700 + "use_label": 4184.5498046875 }, { "epoch": 0.74, - "learning_rate": 1.4302325581395347e-07, - "logits/chosen": -2.6521668434143066, - "logits/rejected": -2.6327109336853027, - "logps/chosen": -305.80029296875, - "logps/rejected": -239.8463897705078, - "loss": 0.5036, - "pred_label": 0.0, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.14674702286720276, - "rewards/margins": 1.1667625904083252, - "rewards/rejected": -1.0200153589248657, + "learning_rate": 1.4260768335273574e-07, + "logits/chosen": -1.3346189260482788, + "logits/rejected": -1.3581318855285645, + "logps/chosen": -307.48382568359375, + "logps/rejected": -266.8608703613281, + "loss": 0.4291, + "pred_label": 1405.949951171875, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.1296558380126953, + "rewards/margins": 1.2326912879943848, + "rewards/rejected": -0.1030355915427208, "step": 710, - "use_label": 0.0 + "use_label": 4236.0498046875 }, { "epoch": 0.75, - "learning_rate": 1.372093023255814e-07, - "logits/chosen": -2.6996383666992188, - "logits/rejected": -2.70395827293396, - "logps/chosen": -334.99652099609375, - "logps/rejected": -312.52581787109375, - "loss": 0.4854, - "pred_label": 0.0, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.011823808774352074, - "rewards/margins": 1.3162919282913208, - "rewards/rejected": -1.3044681549072266, + "learning_rate": 1.3678696158323632e-07, + "logits/chosen": -1.4102370738983154, + "logits/rejected": -1.434556484222412, + "logps/chosen": -361.06298828125, + "logps/rejected": -317.8431091308594, + "loss": 0.4146, + "pred_label": 1440.3499755859375, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 1.1569219827651978, + "rewards/margins": 1.3604518175125122, + "rewards/rejected": -0.2035297155380249, "step": 720, - "use_label": 0.0 + "use_label": 4281.64990234375 }, { "epoch": 0.76, - "learning_rate": 1.3139534883720928e-07, - "logits/chosen": -2.6853585243225098, - "logits/rejected": -2.634413480758667, - "logps/chosen": -327.4559020996094, - "logps/rejected": -264.45123291015625, - "loss": 0.5201, - "pred_label": 0.0, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.2056887149810791, - "rewards/margins": 1.0861843824386597, - "rewards/rejected": -0.8804956674575806, + "learning_rate": 1.3096623981373688e-07, + "logits/chosen": -1.4674919843673706, + "logits/rejected": -1.4597179889678955, + "logps/chosen": -350.6000671386719, + "logps/rejected": -272.87091064453125, + "loss": 0.4332, + "pred_label": 1467.0999755859375, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.227412462234497, + "rewards/margins": 1.0035221576690674, + "rewards/rejected": 0.2238902598619461, "step": 730, - "use_label": 0.0 + "use_label": 4334.89990234375 }, { "epoch": 0.77, - "learning_rate": 1.2558139534883723e-07, - "logits/chosen": -2.686652660369873, - "logits/rejected": -2.6892364025115967, - "logps/chosen": -297.3292541503906, - "logps/rejected": -277.6313781738281, - "loss": 0.5333, - "pred_label": 0.0, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.06698469072580338, - "rewards/margins": 1.1885979175567627, - "rewards/rejected": -1.1216132640838623, + "learning_rate": 1.2514551804423749e-07, + "logits/chosen": -1.3954675197601318, + "logits/rejected": -1.3886655569076538, + "logps/chosen": -387.49664306640625, + "logps/rejected": -334.8097839355469, + "loss": 0.4182, + "pred_label": 1499.800048828125, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 1.6025422811508179, + "rewards/margins": 1.8493826389312744, + "rewards/rejected": -0.24684014916419983, "step": 740, - "use_label": 0.0 + "use_label": 4382.2001953125 }, { "epoch": 0.78, - "learning_rate": 1.1976744186046512e-07, - "logits/chosen": -2.6189768314361572, - "logits/rejected": -2.598482370376587, - "logps/chosen": -260.7591247558594, - "logps/rejected": -294.9975280761719, - "loss": 0.523, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.0578201524913311, - "rewards/margins": 1.053215742111206, - "rewards/rejected": -1.1110358238220215, + "learning_rate": 1.1932479627473807e-07, + "logits/chosen": -1.3316097259521484, + "logits/rejected": -1.354921579360962, + "logps/chosen": -326.4532165527344, + "logps/rejected": -342.7228698730469, + "loss": 0.4293, + "pred_label": 1531.550048828125, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.2817234992980957, + "rewards/margins": 1.2376317977905273, + "rewards/rejected": 0.04409169405698776, "step": 750, - "use_label": 0.0 + "use_label": 4430.4501953125 }, { - "epoch": 0.79, - "learning_rate": 1.1395348837209302e-07, - "logits/chosen": -2.705519199371338, - "logits/rejected": -2.696622371673584, - "logps/chosen": -306.7921447753906, - "logps/rejected": -287.7425842285156, - "loss": 0.4972, - "pred_label": 0.0, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": -0.11914374679327011, - "rewards/margins": 0.7654374837875366, - "rewards/rejected": -0.8845812082290649, + "epoch": 0.8, + "learning_rate": 1.1350407450523865e-07, + "logits/chosen": -1.4075146913528442, + "logits/rejected": -1.379639983177185, + "logps/chosen": -321.2019348144531, + "logps/rejected": -295.81097412109375, + "loss": 0.3994, + "pred_label": 1558.199951171875, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.4901134967803955, + "rewards/margins": 1.1105048656463623, + "rewards/rejected": 0.3796086311340332, "step": 760, - "use_label": 0.0 + "use_label": 4483.7998046875 }, { "epoch": 0.81, - "learning_rate": 1.0813953488372093e-07, - "logits/chosen": -2.677530288696289, - "logits/rejected": -2.637589931488037, - "logps/chosen": -304.0654296875, - "logps/rejected": -326.1291809082031, - "loss": 0.5239, - "pred_label": 0.0, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.015724360942840576, - "rewards/margins": 1.0277526378631592, - "rewards/rejected": -1.0434770584106445, + "learning_rate": 1.0768335273573923e-07, + "logits/chosen": -1.42855703830719, + "logits/rejected": -1.4395478963851929, + "logps/chosen": -401.9483337402344, + "logps/rejected": -382.68719482421875, + "loss": 0.423, + "pred_label": 1587.8499755859375, + "rewards/accuracies": 0.625, + "rewards/chosen": 1.3799264430999756, + "rewards/margins": 1.2153711318969727, + "rewards/rejected": 0.16455543041229248, "step": 770, - "use_label": 0.0 + "use_label": 4534.14990234375 }, { "epoch": 0.82, - "learning_rate": 1.0232558139534883e-07, - "logits/chosen": -2.674903154373169, - "logits/rejected": -2.6580498218536377, - "logps/chosen": -249.53701782226562, - "logps/rejected": -244.45858764648438, - "loss": 0.5269, - "pred_label": 0.0, + "learning_rate": 1.0186263096623981e-07, + "logits/chosen": -1.470218539237976, + "logits/rejected": -1.4779566526412964, + "logps/chosen": -231.970458984375, + "logps/rejected": -231.0379638671875, + "loss": 0.4331, + "pred_label": 1611.0999755859375, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.016674162819981575, - "rewards/margins": 0.7822299599647522, - "rewards/rejected": -0.7989041805267334, + "rewards/chosen": 1.0060170888900757, + "rewards/margins": 0.8901361227035522, + "rewards/rejected": 0.11588089168071747, "step": 780, - "use_label": 0.0 + "use_label": 4590.89990234375 }, { "epoch": 0.83, - "learning_rate": 9.651162790697674e-08, - "logits/chosen": -2.661597728729248, - "logits/rejected": -2.700005054473877, - "logps/chosen": -275.3680114746094, - "logps/rejected": -290.7065734863281, - "loss": 0.5744, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.020705239847302437, - "rewards/margins": 1.2380138635635376, - "rewards/rejected": -1.2587188482284546, + "learning_rate": 9.604190919674039e-08, + "logits/chosen": -1.3265211582183838, + "logits/rejected": -1.3321948051452637, + "logps/chosen": -322.144287109375, + "logps/rejected": -319.94805908203125, + "loss": 0.427, + "pred_label": 1633.0999755859375, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.9607486724853516, + "rewards/margins": 0.85713130235672, + "rewards/rejected": 0.10361733287572861, "step": 790, - "use_label": 0.0 + "use_label": 4648.89990234375 }, { "epoch": 0.84, - "learning_rate": 9.069767441860465e-08, - "logits/chosen": -2.626075506210327, - "logits/rejected": -2.600184202194214, - "logps/chosen": -380.11334228515625, - "logps/rejected": -328.88665771484375, - "loss": 0.515, - "pred_label": 0.0, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.10672910511493683, - "rewards/margins": 0.9256986379623413, - "rewards/rejected": -1.0324275493621826, + "learning_rate": 9.022118742724097e-08, + "logits/chosen": -1.2604429721832275, + "logits/rejected": -1.2056382894515991, + "logps/chosen": -399.75701904296875, + "logps/rejected": -295.15570068359375, + "loss": 0.4324, + "pred_label": 1658.9000244140625, + "rewards/accuracies": 0.625, + "rewards/chosen": 1.0637061595916748, + "rewards/margins": 1.153640866279602, + "rewards/rejected": -0.08993469178676605, "step": 800, - "use_label": 0.0 - }, - { - "epoch": 0.84, - "eval_logits/chosen": -2.754448413848877, - "eval_logits/rejected": -2.747894763946533, - "eval_logps/chosen": -288.57720947265625, - "eval_logps/rejected": -272.82855224609375, - "eval_loss": 0.5257741212844849, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.7539682388305664, - "eval_rewards/chosen": -0.00535963149741292, - "eval_rewards/margins": 1.11350679397583, - "eval_rewards/rejected": -1.1188664436340332, - "eval_runtime": 152.0351, - "eval_samples_per_second": 13.155, - "eval_steps_per_second": 0.414, - "eval_use_label": 0.0, - "step": 800 + "use_label": 4703.10009765625 }, { "epoch": 0.85, - "learning_rate": 8.488372093023254e-08, - "logits/chosen": -2.720799684524536, - "logits/rejected": -2.6458606719970703, - "logps/chosen": -279.9139404296875, - "logps/rejected": -267.57965087890625, - "loss": 0.5294, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.003540521953254938, - "rewards/margins": 0.9458888173103333, - "rewards/rejected": -0.9494293332099915, + "learning_rate": 8.440046565774157e-08, + "logits/chosen": -1.4043221473693848, + "logits/rejected": -1.393165111541748, + "logps/chosen": -328.1530456542969, + "logps/rejected": -278.7724914550781, + "loss": 0.4131, + "pred_label": 1681.800048828125, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.7224707007408142, + "rewards/margins": 0.7582930326461792, + "rewards/rejected": -0.035822317004203796, "step": 810, - "use_label": 0.0 + "use_label": 4760.2001953125 }, { "epoch": 0.86, - "learning_rate": 7.906976744186046e-08, - "logits/chosen": -2.6257495880126953, - "logits/rejected": -2.6502857208251953, - "logps/chosen": -245.8484344482422, - "logps/rejected": -254.8400115966797, - "loss": 0.5512, - "pred_label": 0.0, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": 0.12137588113546371, - "rewards/margins": 1.019413709640503, - "rewards/rejected": -0.8980380296707153, + "learning_rate": 7.857974388824213e-08, + "logits/chosen": -1.4293477535247803, + "logits/rejected": -1.3876526355743408, + "logps/chosen": -344.793212890625, + "logps/rejected": -273.80633544921875, + "loss": 0.4219, + "pred_label": 1706.699951171875, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": 1.0625591278076172, + "rewards/margins": 1.4430253505706787, + "rewards/rejected": -0.38046637177467346, "step": 820, - "use_label": 0.0 + "use_label": 4815.2998046875 }, { "epoch": 0.87, - "learning_rate": 7.325581395348837e-08, - "logits/chosen": -2.715819835662842, - "logits/rejected": -2.6348624229431152, - "logps/chosen": -256.454833984375, - "logps/rejected": -287.5106506347656, - "loss": 0.4857, - "pred_label": 0.0, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": -0.025987720116972923, - "rewards/margins": 1.199789047241211, - "rewards/rejected": -1.2257767915725708, + "learning_rate": 7.275902211874273e-08, + "logits/chosen": -1.3367773294448853, + "logits/rejected": -1.3325202465057373, + "logps/chosen": -288.30853271484375, + "logps/rejected": -340.27947998046875, + "loss": 0.4016, + "pred_label": 1736.0, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 0.9091051816940308, + "rewards/margins": 1.0785413980484009, + "rewards/rejected": -0.16943617165088654, "step": 830, - "use_label": 0.0 + "use_label": 4866.0 }, { "epoch": 0.88, - "learning_rate": 6.744186046511628e-08, - "logits/chosen": -2.6387436389923096, - "logits/rejected": -2.6407735347747803, - "logps/chosen": -301.9522705078125, - "logps/rejected": -316.30950927734375, - "loss": 0.5383, - "pred_label": 0.0, - "rewards/accuracies": 0.737500011920929, - "rewards/chosen": 0.08713732659816742, - "rewards/margins": 1.017157793045044, - "rewards/rejected": -0.9300203323364258, + "learning_rate": 6.693830034924331e-08, + "logits/chosen": -1.3973602056503296, + "logits/rejected": -1.42702054977417, + "logps/chosen": -320.7757263183594, + "logps/rejected": -301.08648681640625, + "loss": 0.4395, + "pred_label": 1768.5999755859375, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.8193642497062683, + "rewards/margins": 0.6294553875923157, + "rewards/rejected": 0.18990902602672577, "step": 840, - "use_label": 0.0 + "use_label": 4913.39990234375 }, { "epoch": 0.89, - "learning_rate": 6.162790697674419e-08, - "logits/chosen": -2.8061776161193848, - "logits/rejected": -2.7850382328033447, - "logps/chosen": -308.90155029296875, - "logps/rejected": -261.96075439453125, - "loss": 0.537, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.09603701531887054, - "rewards/margins": 0.9941841959953308, - "rewards/rejected": -0.8981472253799438, + "learning_rate": 6.111757857974389e-08, + "logits/chosen": -1.3880376815795898, + "logits/rejected": -1.3541958332061768, + "logps/chosen": -338.11419677734375, + "logps/rejected": -272.3611755371094, + "loss": 0.4452, + "pred_label": 1792.699951171875, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.19716477394104, + "rewards/margins": 0.992131233215332, + "rewards/rejected": 0.20503361523151398, "step": 850, - "use_label": 0.0 + "use_label": 4969.2998046875 }, { "epoch": 0.9, - "learning_rate": 5.5813953488372087e-08, - "logits/chosen": -2.6717400550842285, - "logits/rejected": -2.691214084625244, - "logps/chosen": -292.5924072265625, - "logps/rejected": -275.4133605957031, - "loss": 0.5371, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.14807067811489105, - "rewards/margins": 1.216901183128357, - "rewards/rejected": -1.3649718761444092, + "learning_rate": 5.529685681024446e-08, + "logits/chosen": -1.4271256923675537, + "logits/rejected": -1.3966145515441895, + "logps/chosen": -348.1614685058594, + "logps/rejected": -303.44366455078125, + "loss": 0.4074, + "pred_label": 1821.3499755859375, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 1.1399493217468262, + "rewards/margins": 1.3317511081695557, + "rewards/rejected": -0.19180157780647278, "step": 860, - "use_label": 0.0 + "use_label": 5020.64990234375 }, { "epoch": 0.91, - "learning_rate": 5e-08, - "logits/chosen": -2.7427048683166504, - "logits/rejected": -2.779627799987793, - "logps/chosen": -242.98812866210938, - "logps/rejected": -253.3491973876953, - "loss": 0.5322, - "pred_label": 0.0, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.05029615759849548, - "rewards/margins": 1.148262619972229, - "rewards/rejected": -1.1985588073730469, + "learning_rate": 4.947613504074505e-08, + "logits/chosen": -1.3959957361221313, + "logits/rejected": -1.3792731761932373, + "logps/chosen": -294.4374694824219, + "logps/rejected": -295.8282165527344, + "loss": 0.4377, + "pred_label": 1854.699951171875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 1.4856358766555786, + "rewards/margins": 1.3335050344467163, + "rewards/rejected": 0.15213069319725037, "step": 870, - "use_label": 0.0 + "use_label": 5067.2998046875 }, { "epoch": 0.92, - "learning_rate": 4.41860465116279e-08, - "logits/chosen": -2.7546846866607666, - "logits/rejected": -2.697383165359497, - "logps/chosen": -284.3351135253906, - "logps/rejected": -287.35260009765625, - "loss": 0.4992, - "pred_label": 0.0, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.2294842004776001, - "rewards/margins": 0.8660097122192383, - "rewards/rejected": -1.095494031906128, + "learning_rate": 4.365541327124563e-08, + "logits/chosen": -1.3475821018218994, + "logits/rejected": -1.3881584405899048, + "logps/chosen": -361.41815185546875, + "logps/rejected": -341.0059509277344, + "loss": 0.3875, + "pred_label": 1890.75, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.0190831422805786, + "rewards/margins": 1.0306825637817383, + "rewards/rejected": -0.011599564924836159, "step": 880, - "use_label": 0.0 + "use_label": 5111.25 }, { "epoch": 0.93, - "learning_rate": 3.837209302325581e-08, - "logits/chosen": -2.7058663368225098, - "logits/rejected": -2.6869189739227295, - "logps/chosen": -246.6070556640625, - "logps/rejected": -248.0365753173828, - "loss": 0.5038, - "pred_label": 0.0, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.039266109466552734, - "rewards/margins": 0.955298125743866, - "rewards/rejected": -0.9945642352104187, + "learning_rate": 3.783469150174622e-08, + "logits/chosen": -1.356093168258667, + "logits/rejected": -1.3647395372390747, + "logps/chosen": -305.94012451171875, + "logps/rejected": -321.16632080078125, + "loss": 0.4422, + "pred_label": 1924.9000244140625, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.3352932929992676, + "rewards/margins": 1.4743802547454834, + "rewards/rejected": -0.13908688724040985, "step": 890, - "use_label": 0.0 + "use_label": 5157.10009765625 }, { "epoch": 0.94, - "learning_rate": 3.255813953488372e-08, - "logits/chosen": -2.571969509124756, - "logits/rejected": -2.546536684036255, - "logps/chosen": -254.21237182617188, - "logps/rejected": -247.3881072998047, - "loss": 0.5166, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": 0.08884133398532867, - "rewards/margins": 1.227858304977417, - "rewards/rejected": -1.13901686668396, + "learning_rate": 3.20139697322468e-08, + "logits/chosen": -1.2386057376861572, + "logits/rejected": -1.2891590595245361, + "logps/chosen": -356.11444091796875, + "logps/rejected": -326.7837829589844, + "loss": 0.4248, + "pred_label": 1953.9000244140625, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.934838593006134, + "rewards/margins": 1.1246929168701172, + "rewards/rejected": -0.1898542195558548, "step": 900, - "use_label": 0.0 - }, - { - "epoch": 0.94, - "eval_logits/chosen": -2.782860040664673, - "eval_logits/rejected": -2.777499198913574, - "eval_logps/chosen": -289.315673828125, - "eval_logps/rejected": -273.0716552734375, - "eval_loss": 0.5272806286811829, - "eval_pred_label": 0.0, - "eval_rewards/accuracies": 0.761904776096344, - "eval_rewards/chosen": -0.07920397818088531, - "eval_rewards/margins": 1.0639697313308716, - "eval_rewards/rejected": -1.1431735754013062, - "eval_runtime": 151.9262, - "eval_samples_per_second": 13.164, - "eval_steps_per_second": 0.415, - "eval_use_label": 0.0, - "step": 900 + "use_label": 5208.10009765625 }, { "epoch": 0.95, - "learning_rate": 2.6744186046511626e-08, - "logits/chosen": -2.7148184776306152, - "logits/rejected": -2.7250776290893555, - "logps/chosen": -236.6240234375, - "logps/rejected": -242.1452178955078, - "loss": 0.4962, - "pred_label": 0.0, - "rewards/accuracies": 0.6875, - "rewards/chosen": -0.22720427811145782, - "rewards/margins": 0.8557177782058716, - "rewards/rejected": -1.0829219818115234, + "learning_rate": 2.619324796274738e-08, + "logits/chosen": -1.4316788911819458, + "logits/rejected": -1.4342458248138428, + "logps/chosen": -307.85601806640625, + "logps/rejected": -290.3519592285156, + "loss": 0.4028, + "pred_label": 1981.25, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 1.2007009983062744, + "rewards/margins": 1.2482540607452393, + "rewards/rejected": -0.04755309969186783, "step": 910, - "use_label": 0.0 + "use_label": 5260.75 }, { "epoch": 0.96, - "learning_rate": 2.0930232558139533e-08, - "logits/chosen": -2.641587734222412, - "logits/rejected": -2.6006321907043457, - "logps/chosen": -259.6849060058594, - "logps/rejected": -247.21450805664062, - "loss": 0.4621, - "pred_label": 0.0, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.009412238374352455, - "rewards/margins": 1.2470829486846924, - "rewards/rejected": -1.237670660018921, + "learning_rate": 2.037252619324796e-08, + "logits/chosen": -1.4033372402191162, + "logits/rejected": -1.414004921913147, + "logps/chosen": -313.77239990234375, + "logps/rejected": -298.83465576171875, + "loss": 0.4144, + "pred_label": 2008.5, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.128159523010254, + "rewards/margins": 1.3455536365509033, + "rewards/rejected": -0.21739396452903748, "step": 920, - "use_label": 0.0 + "use_label": 5313.5 }, { "epoch": 0.97, - "learning_rate": 1.511627906976744e-08, - "logits/chosen": -2.7401022911071777, - "logits/rejected": -2.733879566192627, - "logps/chosen": -280.37432861328125, - "logps/rejected": -234.80264282226562, - "loss": 0.5163, - "pred_label": 0.0, - "rewards/accuracies": 0.8125, - "rewards/chosen": -0.03051159344613552, - "rewards/margins": 1.1736021041870117, - "rewards/rejected": -1.2041137218475342, + "learning_rate": 1.4551804423748545e-08, + "logits/chosen": -1.4188601970672607, + "logits/rejected": -1.4161401987075806, + "logps/chosen": -271.213134765625, + "logps/rejected": -271.7073669433594, + "loss": 0.4048, + "pred_label": 2038.9000244140625, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 1.2960989475250244, + "rewards/margins": 1.6117242574691772, + "rewards/rejected": -0.3156254291534424, "step": 930, - "use_label": 0.0 + "use_label": 5363.10009765625 }, { "epoch": 0.98, - "learning_rate": 9.302325581395349e-09, - "logits/chosen": -2.7654836177825928, - "logits/rejected": -2.7249624729156494, - "logps/chosen": -323.6404724121094, - "logps/rejected": -323.39886474609375, - "loss": 0.4899, - "pred_label": 0.0, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": -0.04380204528570175, - "rewards/margins": 1.207780361175537, - "rewards/rejected": -1.2515825033187866, + "learning_rate": 8.731082654249125e-09, + "logits/chosen": -1.3573418855667114, + "logits/rejected": -1.3962044715881348, + "logps/chosen": -368.0345153808594, + "logps/rejected": -342.75604248046875, + "loss": 0.3982, + "pred_label": 2071.39990234375, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 1.0156645774841309, + "rewards/margins": 1.1141072511672974, + "rewards/rejected": -0.09844263643026352, "step": 940, - "use_label": 0.0 + "use_label": 5410.60009765625 }, { "epoch": 0.99, - "learning_rate": 3.4883720930232554e-09, - "logits/chosen": -2.725027084350586, - "logits/rejected": -2.711854934692383, - "logps/chosen": -288.2428894042969, - "logps/rejected": -280.0832214355469, - "loss": 0.5456, - "pred_label": 0.0, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": -0.0814594253897667, - "rewards/margins": 1.5194867849349976, - "rewards/rejected": -1.6009461879730225, + "learning_rate": 2.910360884749709e-09, + "logits/chosen": -1.3523130416870117, + "logits/rejected": -1.3391929864883423, + "logps/chosen": -287.52691650390625, + "logps/rejected": -279.154541015625, + "loss": 0.4503, + "pred_label": 2100.5, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": 0.7535191774368286, + "rewards/margins": 1.287442684173584, + "rewards/rejected": -0.5339235067367554, "step": 950, - "use_label": 0.0 + "use_label": 5461.5 + }, + { + "epoch": 1.0, + "eval_logits/chosen": -1.3773202896118164, + "eval_logits/rejected": -1.3735064268112183, + "eval_logps/chosen": -342.40301513671875, + "eval_logps/rejected": -313.79144287109375, + "eval_loss": 0.43090611696243286, + "eval_pred_label": 2177.77783203125, + "eval_rewards/accuracies": 0.7460317611694336, + "eval_rewards/chosen": 1.2747503519058228, + "eval_rewards/margins": 1.4222474098205566, + "eval_rewards/rejected": -0.14749698340892792, + "eval_runtime": 275.5038, + "eval_samples_per_second": 7.259, + "eval_steps_per_second": 0.229, + "eval_use_label": 5590.22216796875, + "step": 955 }, { "epoch": 1.0, - "step": 956, + "step": 955, "total_flos": 0.0, - "train_loss": 0.5461576643349236, - "train_runtime": 10580.1696, - "train_samples_per_second": 5.778, - "train_steps_per_second": 0.09 + "train_loss": 0.47434414693822413, + "train_runtime": 13309.0876, + "train_samples_per_second": 4.593, + "train_steps_per_second": 0.072 } ], "logging_steps": 10, - "max_steps": 956, + "max_steps": 955, "num_train_epochs": 1, "save_steps": 50, "total_flos": 0.0,