diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,1882 +1,982 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.9865871833084947, + "epoch": 3.0, "eval_steps": 500, - "global_step": 501, + "global_step": 252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.029806259314456036, - "grad_norm": 1823.4444580078125, + "epoch": 0.05952380952380952, + "grad_norm": 1882.16845703125, "learning_rate": 2.5000000000000004e-07, - "log_odds_chosen": -0.22316808998584747, - "log_odds_ratio": -1.008633017539978, - "logits/chosen": 204.3894805908203, - "logits/rejected": 203.0862274169922, - "logps/chosen": -14.825190544128418, - "logps/rejected": -14.602023124694824, - "loss": 15.3951, - "nll_loss": 14.544248580932617, + "log_odds_chosen": -0.12500545382499695, + "log_odds_ratio": -0.9542725682258606, + "logits/chosen": 164.27560424804688, + "logits/rejected": 208.2156219482422, + "logps/chosen": -14.962623596191406, + "logps/rejected": -14.837625503540039, + "loss": 15.2102, + "nll_loss": 14.645106315612793, "rewards/accuracies": 0.3499999940395355, - "rewards/chosen": -7.412595272064209, - "rewards/margins": -0.1115839034318924, - "rewards/rejected": -7.301011562347412, + "rewards/chosen": -7.481311798095703, + "rewards/margins": -0.06250032037496567, + "rewards/rejected": -7.4188127517700195, "step": 5 }, { - "epoch": 0.05961251862891207, - "grad_norm": 1235.2249755859375, + "epoch": 0.11904761904761904, + "grad_norm": 1088.200927734375, "learning_rate": 5.000000000000001e-07, - "log_odds_chosen": 0.2599984109401703, - "log_odds_ratio": -0.7687832713127136, - "logits/chosen": 219.47842407226562, - "logits/rejected": 223.53195190429688, - "logps/chosen": -12.257360458374023, - "logps/rejected": -12.516705513000488, - "loss": 13.0935, - "nll_loss": 12.36180305480957, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -6.128680229187012, - "rewards/margins": 0.1296723335981369, - "rewards/rejected": -6.258352756500244, + "log_odds_chosen": -0.05857907608151436, + "log_odds_ratio": -1.1228755712509155, + "logits/chosen": 244.7198486328125, + "logits/rejected": 227.472412109375, + "logps/chosen": -13.305212020874023, + "logps/rejected": -13.2466402053833, + "loss": 13.2102, + "nll_loss": 12.955018997192383, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -6.652606010437012, + "rewards/margins": -0.02928643301129341, + "rewards/rejected": -6.62332010269165, "step": 10 }, { - "epoch": 0.08941877794336811, - "grad_norm": 800.8740844726562, + "epoch": 0.17857142857142858, + "grad_norm": 750.9570922851562, "learning_rate": 7.5e-07, - "log_odds_chosen": 0.07262952625751495, - "log_odds_ratio": -0.7626513242721558, - "logits/chosen": 284.1068115234375, - "logits/rejected": 263.359619140625, - "logps/chosen": -8.042943000793457, - "logps/rejected": -8.115506172180176, - "loss": 8.8161, - "nll_loss": 8.040520668029785, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -4.0214715003967285, - "rewards/margins": 0.03628172725439072, - "rewards/rejected": -4.057753086090088, + "log_odds_chosen": -0.21123185753822327, + "log_odds_ratio": -0.9812790751457214, + "logits/chosen": 247.1725311279297, + "logits/rejected": 319.17498779296875, + "logps/chosen": -8.516304969787598, + "logps/rejected": -8.305025100708008, + "loss": 8.5319, + "nll_loss": 8.219998359680176, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -4.258152484893799, + "rewards/margins": -0.10563965886831284, + "rewards/rejected": -4.152512550354004, "step": 15 }, { - "epoch": 0.11922503725782414, - "grad_norm": 245.36756896972656, + "epoch": 0.23809523809523808, + "grad_norm": 150.73776245117188, "learning_rate": 1.0000000000000002e-06, - "log_odds_chosen": -0.1374012976884842, - "log_odds_ratio": -0.9441366195678711, - "logits/chosen": 284.14410400390625, - "logits/rejected": 279.113037109375, - "logps/chosen": -5.502886772155762, - "logps/rejected": -5.366313934326172, - "loss": 5.9511, - "nll_loss": 5.569659233093262, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -2.751443386077881, - "rewards/margins": -0.06828644871711731, - "rewards/rejected": -2.683156967163086, + "log_odds_chosen": 0.13484135270118713, + "log_odds_ratio": -0.7639249563217163, + "logits/chosen": 232.5557098388672, + "logits/rejected": 278.36358642578125, + "logps/chosen": -5.115365505218506, + "logps/rejected": -5.248563289642334, + "loss": 5.7184, + "nll_loss": 5.363820552825928, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.557682752609253, + "rewards/margins": 0.06659835577011108, + "rewards/rejected": -2.624281644821167, "step": 20 }, { - "epoch": 0.14903129657228018, - "grad_norm": 150.3289337158203, + "epoch": 0.2976190476190476, + "grad_norm": 132.29954528808594, "learning_rate": 1.25e-06, - "log_odds_chosen": -0.0038304091431200504, - "log_odds_ratio": -0.8650141954421997, - "logits/chosen": 296.8591003417969, - "logits/rejected": 306.09613037109375, - "logps/chosen": -3.4051125049591064, - "logps/rejected": -3.383711338043213, - "loss": 3.9954, - "nll_loss": 3.5142741203308105, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -1.7025562524795532, - "rewards/margins": -0.010700652375817299, - "rewards/rejected": -1.6918556690216064, + "log_odds_chosen": -0.15154710412025452, + "log_odds_ratio": -0.918846607208252, + "logits/chosen": 329.36016845703125, + "logits/rejected": 322.80316162109375, + "logps/chosen": -3.490060329437256, + "logps/rejected": -3.336357593536377, + "loss": 3.7837, + "nll_loss": 3.5224061012268066, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.745030164718628, + "rewards/margins": -0.07685144990682602, + "rewards/rejected": -1.6681787967681885, "step": 25 }, { - "epoch": 0.17883755588673622, - "grad_norm": 95.30947875976562, + "epoch": 0.35714285714285715, + "grad_norm": 66.39665985107422, "learning_rate": 1.5e-06, - "log_odds_chosen": -0.02694419026374817, - "log_odds_ratio": -0.8674762845039368, - "logits/chosen": 341.46405029296875, - "logits/rejected": 369.68597412109375, - "logps/chosen": -2.748025894165039, - "logps/rejected": -2.6941990852355957, - "loss": 3.0003, - "nll_loss": 2.7433438301086426, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -1.3740129470825195, - "rewards/margins": -0.02691327966749668, - "rewards/rejected": -1.3470995426177979, + "log_odds_chosen": 0.1455865204334259, + "log_odds_ratio": -0.7501406669616699, + "logits/chosen": 351.40875244140625, + "logits/rejected": 335.61932373046875, + "logps/chosen": -2.3319332599639893, + "logps/rejected": -2.4487643241882324, + "loss": 2.9334, + "nll_loss": 2.8211100101470947, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1659666299819946, + "rewards/margins": 0.05841563269495964, + "rewards/rejected": -1.2243821620941162, "step": 30 }, { - "epoch": 0.20864381520119224, - "grad_norm": 61.000022888183594, + "epoch": 0.4166666666666667, + "grad_norm": 50.658451080322266, "learning_rate": 1.75e-06, - "log_odds_chosen": 0.20579464733600616, - "log_odds_ratio": -0.7235999703407288, - "logits/chosen": 380.79266357421875, - "logits/rejected": 368.7811279296875, - "logps/chosen": -1.8658641576766968, - "logps/rejected": -2.010704517364502, - "loss": 2.4897, - "nll_loss": 2.063866376876831, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.9329320788383484, - "rewards/margins": 0.07242023944854736, - "rewards/rejected": -1.005352258682251, + "log_odds_chosen": 0.17109766602516174, + "log_odds_ratio": -0.6774098873138428, + "logits/chosen": 386.70220947265625, + "logits/rejected": 384.7711486816406, + "logps/chosen": -1.935703992843628, + "logps/rejected": -2.0668416023254395, + "loss": 2.3732, + "nll_loss": 2.1755499839782715, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.967851996421814, + "rewards/margins": 0.06556873768568039, + "rewards/rejected": -1.0334208011627197, "step": 35 }, { - "epoch": 0.23845007451564829, - "grad_norm": 52.54775619506836, + "epoch": 0.47619047619047616, + "grad_norm": 50.109127044677734, "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.14920620620250702, - "log_odds_ratio": -0.7215272188186646, - "logits/chosen": 373.4493713378906, - "logits/rejected": 372.3648376464844, - "logps/chosen": -1.7074140310287476, - "logps/rejected": -1.8317168951034546, - "loss": 2.3294, - "nll_loss": 2.0935075283050537, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.8537070155143738, - "rewards/margins": 0.06215135008096695, - "rewards/rejected": -0.9158584475517273, + "log_odds_chosen": 0.5132101774215698, + "log_odds_ratio": -0.58674156665802, + "logits/chosen": 395.3487243652344, + "logits/rejected": 396.77911376953125, + "logps/chosen": -1.6742804050445557, + "logps/rejected": -2.092700481414795, + "loss": 2.2549, + "nll_loss": 1.8625695705413818, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8371402025222778, + "rewards/margins": 0.20921015739440918, + "rewards/rejected": -1.0463502407073975, "step": 40 }, { - "epoch": 0.26825633383010433, - "grad_norm": 58.8853645324707, + "epoch": 0.5357142857142857, + "grad_norm": 36.12651062011719, "learning_rate": 2.25e-06, - "log_odds_chosen": 0.20898687839508057, - "log_odds_ratio": -0.666365385055542, - "logits/chosen": 390.11090087890625, - "logits/rejected": 399.29052734375, - "logps/chosen": -1.6940256357192993, - "logps/rejected": -1.859763503074646, - "loss": 2.2384, - "nll_loss": 1.81049382686615, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.8470128178596497, - "rewards/margins": 0.08286889642477036, - "rewards/rejected": -0.929881751537323, + "log_odds_chosen": 0.40647760033607483, + "log_odds_ratio": -0.7087821364402771, + "logits/chosen": 416.9222717285156, + "logits/rejected": 409.3716125488281, + "logps/chosen": -1.7572282552719116, + "logps/rejected": -2.102989435195923, + "loss": 2.1445, + "nll_loss": 1.97689950466156, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8786141276359558, + "rewards/margins": 0.1728806048631668, + "rewards/rejected": -1.0514947175979614, "step": 45 }, { - "epoch": 0.29806259314456035, - "grad_norm": 53.57241439819336, + "epoch": 0.5952380952380952, + "grad_norm": 129.66111755371094, "learning_rate": 2.5e-06, - "log_odds_chosen": 0.4837431013584137, - "log_odds_ratio": -0.5640600919723511, - "logits/chosen": 397.261474609375, - "logits/rejected": 418.50787353515625, - "logps/chosen": -1.548592209815979, - "logps/rejected": -1.9497201442718506, - "loss": 2.2689, - "nll_loss": 1.9370243549346924, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.7742961049079895, - "rewards/margins": 0.20056398212909698, - "rewards/rejected": -0.9748600721359253, + "log_odds_chosen": 0.4818713068962097, + "log_odds_ratio": -0.607313871383667, + "logits/chosen": 385.31231689453125, + "logits/rejected": 411.05029296875, + "logps/chosen": -1.5724234580993652, + "logps/rejected": -1.9852949380874634, + "loss": 2.1136, + "nll_loss": 1.7941957712173462, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7862117290496826, + "rewards/margins": 0.20643571019172668, + "rewards/rejected": -0.9926474690437317, "step": 50 }, { - "epoch": 0.32786885245901637, - "grad_norm": 54.863670349121094, + "epoch": 0.6547619047619048, + "grad_norm": 65.99632263183594, "learning_rate": 2.7500000000000004e-06, - "log_odds_chosen": 0.10273619741201401, - "log_odds_ratio": -0.7500853538513184, - "logits/chosen": 393.6875, - "logits/rejected": 385.71246337890625, - "logps/chosen": -1.6378755569458008, - "logps/rejected": -1.7148265838623047, - "loss": 2.343, - "nll_loss": 2.059002637863159, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.8189377784729004, - "rewards/margins": 0.038475506007671356, - "rewards/rejected": -0.8574132919311523, + "log_odds_chosen": 0.3759257197380066, + "log_odds_ratio": -0.658983588218689, + "logits/chosen": 393.38006591796875, + "logits/rejected": 373.91265869140625, + "logps/chosen": -1.5574285984039307, + "logps/rejected": -1.8452409505844116, + "loss": 2.0521, + "nll_loss": 1.9158170223236084, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.7787142992019653, + "rewards/margins": 0.14390619099140167, + "rewards/rejected": -0.9226204752922058, "step": 55 }, { - "epoch": 0.35767511177347244, - "grad_norm": 52.10759735107422, + "epoch": 0.7142857142857143, + "grad_norm": 42.2856559753418, "learning_rate": 3e-06, - "log_odds_chosen": 0.20460805296897888, - "log_odds_ratio": -0.6378257870674133, - "logits/chosen": 393.1427917480469, - "logits/rejected": 384.66424560546875, - "logps/chosen": -1.5411105155944824, - "logps/rejected": -1.693114995956421, - "loss": 2.0643, - "nll_loss": 1.8742763996124268, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.7705552577972412, - "rewards/margins": 0.07600229978561401, - "rewards/rejected": -0.8465574979782104, + "log_odds_chosen": 0.6560322642326355, + "log_odds_ratio": -0.5206496119499207, + "logits/chosen": 394.558349609375, + "logits/rejected": 419.30908203125, + "logps/chosen": -1.252516746520996, + "logps/rejected": -1.736289620399475, + "loss": 1.9986, + "nll_loss": 1.6350934505462646, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.626258373260498, + "rewards/margins": 0.2418864220380783, + "rewards/rejected": -0.8681448101997375, "step": 60 }, { - "epoch": 0.38748137108792846, - "grad_norm": 36.03993225097656, + "epoch": 0.7738095238095238, + "grad_norm": 51.25124740600586, "learning_rate": 3.2500000000000002e-06, - "log_odds_chosen": 0.31621289253234863, - "log_odds_ratio": -0.6504405736923218, - "logits/chosen": 391.6888427734375, - "logits/rejected": 392.4349365234375, - "logps/chosen": -1.3424034118652344, - "logps/rejected": -1.5777828693389893, - "loss": 1.9359, - "nll_loss": 1.5658212900161743, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6712017059326172, - "rewards/margins": 0.11768964678049088, - "rewards/rejected": -0.7888914346694946, + "log_odds_chosen": 0.20805387198925018, + "log_odds_ratio": -0.700042724609375, + "logits/chosen": 386.62237548828125, + "logits/rejected": 375.99847412109375, + "logps/chosen": -1.3062386512756348, + "logps/rejected": -1.4444353580474854, + "loss": 1.9611, + "nll_loss": 1.55000901222229, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.6531193256378174, + "rewards/margins": 0.06909838318824768, + "rewards/rejected": -0.7222176790237427, "step": 65 }, { - "epoch": 0.4172876304023845, - "grad_norm": 41.6033821105957, + "epoch": 0.8333333333333334, + "grad_norm": 56.384552001953125, "learning_rate": 3.5e-06, - "log_odds_chosen": 0.3797632157802582, - "log_odds_ratio": -0.6185951828956604, - "logits/chosen": 376.75579833984375, - "logits/rejected": 385.7332763671875, - "logps/chosen": -1.507440209388733, - "logps/rejected": -1.8190501928329468, - "loss": 2.0024, - "nll_loss": 1.7485978603363037, + "log_odds_chosen": 0.2420281618833542, + "log_odds_ratio": -0.6607708930969238, + "logits/chosen": 376.65167236328125, + "logits/rejected": 374.722900390625, + "logps/chosen": -1.2764971256256104, + "logps/rejected": -1.4956505298614502, + "loss": 1.9944, + "nll_loss": 1.656867265701294, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.7537201046943665, - "rewards/margins": 0.1558050811290741, - "rewards/rejected": -0.9095250964164734, + "rewards/chosen": -0.6382485628128052, + "rewards/margins": 0.10957670211791992, + "rewards/rejected": -0.7478252649307251, "step": 70 }, { - "epoch": 0.44709388971684055, - "grad_norm": 49.81876754760742, + "epoch": 0.8928571428571429, + "grad_norm": 35.837562561035156, "learning_rate": 3.7500000000000005e-06, - "log_odds_chosen": 0.5595852732658386, - "log_odds_ratio": -0.531262218952179, - "logits/chosen": 394.0195617675781, - "logits/rejected": 380.848388671875, - "logps/chosen": -1.278014898300171, - "logps/rejected": -1.7234604358673096, - "loss": 1.8859, - "nll_loss": 1.5626357793807983, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.6390074491500854, - "rewards/margins": 0.22272281348705292, - "rewards/rejected": -0.8617302179336548, + "log_odds_chosen": 0.43760427832603455, + "log_odds_ratio": -0.5639179944992065, + "logits/chosen": 369.7068786621094, + "logits/rejected": 380.7843017578125, + "logps/chosen": -1.254392385482788, + "logps/rejected": -1.5828098058700562, + "loss": 1.904, + "nll_loss": 1.5474170446395874, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.627196192741394, + "rewards/margins": 0.16420873999595642, + "rewards/rejected": -0.7914049029350281, "step": 75 }, { - "epoch": 0.47690014903129657, - "grad_norm": 49.527286529541016, + "epoch": 0.9523809523809523, + "grad_norm": 61.186588287353516, "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.4827095866203308, - "log_odds_ratio": -0.5827818512916565, - "logits/chosen": 383.16455078125, - "logits/rejected": 405.3050231933594, - "logps/chosen": -1.4663052558898926, - "logps/rejected": -1.8465086221694946, - "loss": 1.9594, - "nll_loss": 1.6925424337387085, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.7331526279449463, - "rewards/margins": 0.19010166823863983, - "rewards/rejected": -0.9232543110847473, + "log_odds_chosen": 0.20079848170280457, + "log_odds_ratio": -0.7042320370674133, + "logits/chosen": 372.26202392578125, + "logits/rejected": 395.8300476074219, + "logps/chosen": -1.294389009475708, + "logps/rejected": -1.4353959560394287, + "loss": 1.8227, + "nll_loss": 1.4922560453414917, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.647194504737854, + "rewards/margins": 0.07050346583127975, + "rewards/rejected": -0.7176979780197144, "step": 80 }, { - "epoch": 0.5067064083457526, - "grad_norm": 35.50678634643555, + "epoch": 1.0, + "eval_log_odds_chosen": 0.16461706161499023, + "eval_log_odds_ratio": -0.6902630925178528, + "eval_logits/chosen": 315.19403076171875, + "eval_logits/rejected": 257.8447265625, + "eval_logps/chosen": -1.2099318504333496, + "eval_logps/rejected": -1.348587989807129, + "eval_loss": 1.9615823030471802, + "eval_nll_loss": 1.6718581914901733, + "eval_rewards/accuracies": 0.5, + "eval_rewards/chosen": -0.6049659252166748, + "eval_rewards/margins": 0.06932813674211502, + "eval_rewards/rejected": -0.6742939949035645, + "eval_runtime": 201.4785, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.347, + "step": 84 + }, + { + "epoch": 1.0119047619047619, + "grad_norm": 49.942501068115234, "learning_rate": 4.25e-06, - "log_odds_chosen": 0.32843467593193054, - "log_odds_ratio": -0.7204899787902832, - "logits/chosen": 409.7994689941406, - "logits/rejected": 394.08575439453125, - "logps/chosen": -1.5447129011154175, - "logps/rejected": -1.7741330862045288, - "loss": 1.9717, - "nll_loss": 1.7773849964141846, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.7723564505577087, - "rewards/margins": 0.11471016705036163, - "rewards/rejected": -0.8870665431022644, + "log_odds_chosen": 0.3827090859413147, + "log_odds_ratio": -0.5646133422851562, + "logits/chosen": 385.1676330566406, + "logits/rejected": 408.2163391113281, + "logps/chosen": -1.2135608196258545, + "logps/rejected": -1.460314154624939, + "loss": 1.8474, + "nll_loss": 1.4997951984405518, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6067804098129272, + "rewards/margins": 0.1233767420053482, + "rewards/rejected": -0.7301570773124695, "step": 85 }, { - "epoch": 0.5365126676602087, - "grad_norm": 32.97898864746094, + "epoch": 1.0714285714285714, + "grad_norm": 121.16907501220703, "learning_rate": 4.5e-06, - "log_odds_chosen": 1.082437515258789, - "log_odds_ratio": -0.48028555512428284, - "logits/chosen": 400.08160400390625, - "logits/rejected": 415.0059509277344, - "logps/chosen": -1.4097836017608643, - "logps/rejected": -2.338646411895752, - "loss": 1.7591, - "nll_loss": 1.5575084686279297, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.7048918008804321, - "rewards/margins": 0.4644315242767334, - "rewards/rejected": -1.169323205947876, + "log_odds_chosen": 0.7085806131362915, + "log_odds_ratio": -0.4863010346889496, + "logits/chosen": 366.0420837402344, + "logits/rejected": 378.7876281738281, + "logps/chosen": -1.073919653892517, + "logps/rejected": -1.589540719985962, + "loss": 1.6684, + "nll_loss": 1.5809502601623535, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5369598269462585, + "rewards/margins": 0.2578105330467224, + "rewards/rejected": -0.794770359992981, "step": 90 }, { - "epoch": 0.5663189269746647, - "grad_norm": 59.56174850463867, + "epoch": 1.130952380952381, + "grad_norm": 148.60858154296875, "learning_rate": 4.75e-06, - "log_odds_chosen": 0.43072813749313354, - "log_odds_ratio": -0.5813931226730347, - "logits/chosen": 370.78253173828125, - "logits/rejected": 383.03009033203125, - "logps/chosen": -1.1654596328735352, - "logps/rejected": -1.4328858852386475, - "loss": 1.8455, - "nll_loss": 1.4887888431549072, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.5827298164367676, - "rewards/margins": 0.13371312618255615, - "rewards/rejected": -0.7164429426193237, + "log_odds_chosen": 0.875158965587616, + "log_odds_ratio": -0.42892536520957947, + "logits/chosen": 411.90411376953125, + "logits/rejected": 384.31939697265625, + "logps/chosen": -1.1889146566390991, + "logps/rejected": -1.8593413829803467, + "loss": 1.7409, + "nll_loss": 1.6732642650604248, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.5944573283195496, + "rewards/margins": 0.3352133333683014, + "rewards/rejected": -0.9296706914901733, "step": 95 }, { - "epoch": 0.5961251862891207, - "grad_norm": 98.94036102294922, + "epoch": 1.1904761904761905, + "grad_norm": 21.357654571533203, "learning_rate": 5e-06, - "log_odds_chosen": 0.6077507138252258, - "log_odds_ratio": -0.5442172884941101, - "logits/chosen": 406.62945556640625, - "logits/rejected": 442.9092712402344, - "logps/chosen": -1.3020541667938232, - "logps/rejected": -1.8070911169052124, - "loss": 1.8887, - "nll_loss": 1.6196590662002563, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6510270833969116, - "rewards/margins": 0.2525184154510498, - "rewards/rejected": -0.9035455584526062, + "log_odds_chosen": 0.9980722665786743, + "log_odds_ratio": -0.40565505623817444, + "logits/chosen": 396.3813171386719, + "logits/rejected": 391.2807312011719, + "logps/chosen": -1.028236985206604, + "logps/rejected": -1.7792075872421265, + "loss": 1.6731, + "nll_loss": 1.3774528503417969, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.514118492603302, + "rewards/margins": 0.37548530101776123, + "rewards/rejected": -0.8896037936210632, "step": 100 }, { - "epoch": 0.6259314456035767, - "grad_norm": 64.72420501708984, + "epoch": 1.25, + "grad_norm": 92.72647857666016, "learning_rate": 4.8795003647426654e-06, - "log_odds_chosen": 0.6622194051742554, - "log_odds_ratio": -0.5436784625053406, - "logits/chosen": 396.576416015625, - "logits/rejected": 409.37408447265625, - "logps/chosen": -1.239157795906067, - "logps/rejected": -1.7216756343841553, - "loss": 2.0086, - "nll_loss": 1.743311882019043, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.6195788979530334, - "rewards/margins": 0.24125893414020538, - "rewards/rejected": -0.8608378171920776, + "log_odds_chosen": 1.2707650661468506, + "log_odds_ratio": -0.3489342927932739, + "logits/chosen": 329.43988037109375, + "logits/rejected": 359.312744140625, + "logps/chosen": -0.9563199877738953, + "logps/rejected": -1.8967196941375732, + "loss": 1.5343, + "nll_loss": 1.4084365367889404, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.47815999388694763, + "rewards/margins": 0.4701998233795166, + "rewards/rejected": -0.9483598470687866, "step": 105 }, { - "epoch": 0.6557377049180327, - "grad_norm": 156.0756072998047, + "epoch": 1.3095238095238095, + "grad_norm": 23.097333908081055, "learning_rate": 4.767312946227961e-06, - "log_odds_chosen": 0.552756667137146, - "log_odds_ratio": -0.5492970943450928, - "logits/chosen": 376.5224304199219, - "logits/rejected": 374.7490234375, - "logps/chosen": -1.258983850479126, - "logps/rejected": -1.6897951364517212, - "loss": 1.8644, - "nll_loss": 1.6051260232925415, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.629491925239563, - "rewards/margins": 0.21540567278862, - "rewards/rejected": -0.8448975682258606, + "log_odds_chosen": 1.0622062683105469, + "log_odds_ratio": -0.40513938665390015, + "logits/chosen": 364.5774841308594, + "logits/rejected": 378.61834716796875, + "logps/chosen": -0.97075355052948, + "logps/rejected": -1.6918100118637085, + "loss": 1.5379, + "nll_loss": 1.3676984310150146, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.48537677526474, + "rewards/margins": 0.36052826046943665, + "rewards/rejected": -0.8459050059318542, "step": 110 }, { - "epoch": 0.6855439642324889, - "grad_norm": 27.666635513305664, + "epoch": 1.369047619047619, + "grad_norm": 29.07915496826172, "learning_rate": 4.662524041201569e-06, - "log_odds_chosen": 0.7742821574211121, - "log_odds_ratio": -0.4874287545681, - "logits/chosen": 401.1915283203125, - "logits/rejected": 406.65716552734375, - "logps/chosen": -1.1674349308013916, - "logps/rejected": -1.7199904918670654, - "loss": 1.8546, - "nll_loss": 1.5520570278167725, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.5837174654006958, - "rewards/margins": 0.2762778401374817, - "rewards/rejected": -0.8599952459335327, + "log_odds_chosen": 0.8752411603927612, + "log_odds_ratio": -0.4561616778373718, + "logits/chosen": 365.3411560058594, + "logits/rejected": 359.665771484375, + "logps/chosen": -1.142798662185669, + "logps/rejected": -1.8039169311523438, + "loss": 1.4911, + "nll_loss": 1.328161597251892, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.5713993310928345, + "rewards/margins": 0.3305591344833374, + "rewards/rejected": -0.9019584655761719, "step": 115 }, { - "epoch": 0.7153502235469449, - "grad_norm": 39.84739303588867, + "epoch": 1.4285714285714286, + "grad_norm": 25.719493865966797, "learning_rate": 4.564354645876385e-06, - "log_odds_chosen": 0.7211123108863831, - "log_odds_ratio": -0.5134823322296143, - "logits/chosen": 388.77838134765625, - "logits/rejected": 388.0323181152344, - "logps/chosen": -1.1216659545898438, - "logps/rejected": -1.614723801612854, - "loss": 1.8847, - "nll_loss": 1.5851175785064697, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.5608329772949219, - "rewards/margins": 0.24652889370918274, - "rewards/rejected": -0.807361900806427, + "log_odds_chosen": 1.2218338251113892, + "log_odds_ratio": -0.38108527660369873, + "logits/chosen": 393.53240966796875, + "logits/rejected": 404.69671630859375, + "logps/chosen": -0.9683561325073242, + "logps/rejected": -1.8645473718643188, + "loss": 1.5413, + "nll_loss": 1.2556110620498657, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.4841780662536621, + "rewards/margins": 0.4480956196784973, + "rewards/rejected": -0.9322736859321594, "step": 120 }, { - "epoch": 0.7451564828614009, - "grad_norm": 29.12469482421875, + "epoch": 1.4880952380952381, + "grad_norm": 33.33296585083008, "learning_rate": 4.47213595499958e-06, - "log_odds_chosen": 0.3196253776550293, - "log_odds_ratio": -0.6582637429237366, - "logits/chosen": 386.79583740234375, - "logits/rejected": 398.7740783691406, - "logps/chosen": -1.351342797279358, - "logps/rejected": -1.5734379291534424, - "loss": 1.8179, - "nll_loss": 1.5471035242080688, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.675671398639679, - "rewards/margins": 0.11104752123355865, - "rewards/rejected": -0.7867189645767212, + "log_odds_chosen": 0.8903130292892456, + "log_odds_ratio": -0.4254421591758728, + "logits/chosen": 376.06280517578125, + "logits/rejected": 374.7559814453125, + "logps/chosen": -1.0784931182861328, + "logps/rejected": -1.7109521627426147, + "loss": 1.5507, + "nll_loss": 1.3247863054275513, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.5392465591430664, + "rewards/margins": 0.3162294924259186, + "rewards/rejected": -0.8554760813713074, "step": 125 }, { - "epoch": 0.7749627421758569, - "grad_norm": 38.485130310058594, + "epoch": 1.5476190476190477, + "grad_norm": 30.346759796142578, "learning_rate": 4.385290096535147e-06, - "log_odds_chosen": 0.6995328068733215, - "log_odds_ratio": -0.5530967712402344, - "logits/chosen": 409.95849609375, - "logits/rejected": 397.23114013671875, - "logps/chosen": -1.1655128002166748, - "logps/rejected": -1.6789804697036743, - "loss": 1.8506, - "nll_loss": 1.5426290035247803, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.5827564001083374, - "rewards/margins": 0.2567337453365326, - "rewards/rejected": -0.8394902348518372, + "log_odds_chosen": 1.1133849620819092, + "log_odds_ratio": -0.4015190601348877, + "logits/chosen": 380.5897521972656, + "logits/rejected": 411.142333984375, + "logps/chosen": -1.0903799533843994, + "logps/rejected": -1.9482700824737549, + "loss": 1.5205, + "nll_loss": 1.3193198442459106, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.5451899766921997, + "rewards/margins": 0.42894500494003296, + "rewards/rejected": -0.9741350412368774, "step": 130 }, { - "epoch": 0.8047690014903129, - "grad_norm": 52.03726577758789, + "epoch": 1.6071428571428572, + "grad_norm": 23.75062370300293, "learning_rate": 4.303314829119352e-06, - "log_odds_chosen": 0.42176657915115356, - "log_odds_ratio": -0.6203495264053345, - "logits/chosen": 415.891357421875, - "logits/rejected": 418.27984619140625, - "logps/chosen": -1.4386600255966187, - "logps/rejected": -1.7602298259735107, - "loss": 1.873, - "nll_loss": 1.672498106956482, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.7193300127983093, - "rewards/margins": 0.16078472137451172, - "rewards/rejected": -0.8801149129867554, + "log_odds_chosen": 0.8445339202880859, + "log_odds_ratio": -0.4824402332305908, + "logits/chosen": 383.6838073730469, + "logits/rejected": 385.19677734375, + "logps/chosen": -1.1689434051513672, + "logps/rejected": -1.7730737924575806, + "loss": 1.6065, + "nll_loss": 1.434819221496582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.5844717025756836, + "rewards/margins": 0.3020651936531067, + "rewards/rejected": -0.8865368962287903, "step": 135 }, { - "epoch": 0.834575260804769, - "grad_norm": 37.35191345214844, + "epoch": 1.6666666666666665, + "grad_norm": 27.057226181030273, "learning_rate": 4.2257712736425835e-06, - "log_odds_chosen": 0.30007433891296387, - "log_odds_ratio": -0.659325897693634, - "logits/chosen": 397.5603942871094, - "logits/rejected": 403.28192138671875, - "logps/chosen": -1.314620852470398, - "logps/rejected": -1.552886962890625, - "loss": 1.8804, - "nll_loss": 1.7158609628677368, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.657310426235199, - "rewards/margins": 0.11913303285837173, - "rewards/rejected": -0.7764434814453125, + "log_odds_chosen": 0.6634833216667175, + "log_odds_ratio": -0.5241434574127197, + "logits/chosen": 355.0412902832031, + "logits/rejected": 345.8267517089844, + "logps/chosen": -0.9373375773429871, + "logps/rejected": -1.3723398447036743, + "loss": 1.5457, + "nll_loss": 1.1873310804367065, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.46866878867149353, + "rewards/margins": 0.21750116348266602, + "rewards/rejected": -0.6861699223518372, "step": 140 }, { - "epoch": 0.8643815201192251, - "grad_norm": 40.69832992553711, + "epoch": 1.7261904761904763, + "grad_norm": 24.420185089111328, "learning_rate": 4.1522739926869985e-06, - "log_odds_chosen": 0.15929968655109406, - "log_odds_ratio": -0.6508444547653198, - "logits/chosen": 392.15386962890625, - "logits/rejected": 395.3309631347656, - "logps/chosen": -1.2985050678253174, - "logps/rejected": -1.4000908136367798, - "loss": 1.8468, - "nll_loss": 1.581002950668335, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.6492525339126587, - "rewards/margins": 0.0507928729057312, - "rewards/rejected": -0.7000454068183899, + "log_odds_chosen": 1.1559978723526, + "log_odds_ratio": -0.4130094647407532, + "logits/chosen": 353.3649597167969, + "logits/rejected": 403.1065368652344, + "logps/chosen": -0.9650250673294067, + "logps/rejected": -1.8172032833099365, + "loss": 1.4955, + "nll_loss": 1.2150856256484985, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.48251253366470337, + "rewards/margins": 0.4260891377925873, + "rewards/rejected": -0.9086016416549683, "step": 145 }, { - "epoch": 0.8941877794336811, - "grad_norm": 33.805145263671875, + "epoch": 1.7857142857142856, + "grad_norm": 19.80653953552246, "learning_rate": 4.082482904638631e-06, - "log_odds_chosen": 0.7655292749404907, - "log_odds_ratio": -0.514534592628479, - "logits/chosen": 398.47711181640625, - "logits/rejected": 415.9359436035156, - "logps/chosen": -1.1850507259368896, - "logps/rejected": -1.7938915491104126, - "loss": 1.7734, - "nll_loss": 1.4344916343688965, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.5925253629684448, - "rewards/margins": 0.3044203519821167, - "rewards/rejected": -0.8969457745552063, + "log_odds_chosen": 0.7758156657218933, + "log_odds_ratio": -0.4749727249145508, + "logits/chosen": 381.03961181640625, + "logits/rejected": 387.2470703125, + "logps/chosen": -1.0167145729064941, + "logps/rejected": -1.5349647998809814, + "loss": 1.5271, + "nll_loss": 1.3638825416564941, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5083572864532471, + "rewards/margins": 0.25912514328956604, + "rewards/rejected": -0.7674823999404907, "step": 150 }, { - "epoch": 0.9239940387481371, - "grad_norm": 21.895387649536133, + "epoch": 1.8452380952380953, + "grad_norm": 16.095684051513672, "learning_rate": 4.016096644512495e-06, - "log_odds_chosen": 0.26020118594169617, - "log_odds_ratio": -0.6326924562454224, - "logits/chosen": 378.7313537597656, - "logits/rejected": 393.6383361816406, - "logps/chosen": -1.2521542310714722, - "logps/rejected": -1.4581738710403442, - "loss": 1.7575, - "nll_loss": 1.3882125616073608, + "log_odds_chosen": 0.729617714881897, + "log_odds_ratio": -0.5468782782554626, + "logits/chosen": 389.2290344238281, + "logits/rejected": 382.7873840332031, + "logps/chosen": -1.1134544610977173, + "logps/rejected": -1.6494137048721313, + "loss": 1.5063, + "nll_loss": 1.3405725955963135, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.6260771155357361, - "rewards/margins": 0.10300980508327484, - "rewards/rejected": -0.7290869355201721, + "rewards/chosen": -0.5567272305488586, + "rewards/margins": 0.26797956228256226, + "rewards/rejected": -0.8247068524360657, "step": 155 }, { - "epoch": 0.9538002980625931, - "grad_norm": 65.63549041748047, + "epoch": 1.9047619047619047, + "grad_norm": 27.426509857177734, "learning_rate": 3.952847075210474e-06, - "log_odds_chosen": 0.28196483850479126, - "log_odds_ratio": -0.6715171337127686, - "logits/chosen": 386.80755615234375, - "logits/rejected": 431.31146240234375, - "logps/chosen": -1.1752804517745972, - "logps/rejected": -1.395171880722046, - "loss": 1.7333, - "nll_loss": 1.422628402709961, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.5876402258872986, - "rewards/margins": 0.1099456176161766, - "rewards/rejected": -0.697585940361023, + "log_odds_chosen": 0.8723602294921875, + "log_odds_ratio": -0.5240803956985474, + "logits/chosen": 381.7905578613281, + "logits/rejected": 403.63665771484375, + "logps/chosen": -1.0924057960510254, + "logps/rejected": -1.7700122594833374, + "loss": 1.5276, + "nll_loss": 1.3452767133712769, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.5462028980255127, + "rewards/margins": 0.3388032019138336, + "rewards/rejected": -0.8850061297416687, "step": 160 }, { - "epoch": 0.9836065573770492, - "grad_norm": 31.469694137573242, + "epoch": 1.9642857142857144, + "grad_norm": 26.49502944946289, "learning_rate": 3.892494720807615e-06, - "log_odds_chosen": 0.20551732182502747, - "log_odds_ratio": -0.6562041640281677, - "logits/chosen": 399.196533203125, - "logits/rejected": 412.1044006347656, - "logps/chosen": -1.233426809310913, - "logps/rejected": -1.369274377822876, - "loss": 1.8238, - "nll_loss": 1.516705870628357, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.6167134046554565, - "rewards/margins": 0.06792382895946503, - "rewards/rejected": -0.684637188911438, + "log_odds_chosen": 1.3228471279144287, + "log_odds_ratio": -0.42377227544784546, + "logits/chosen": 396.58697509765625, + "logits/rejected": 413.573974609375, + "logps/chosen": -1.0189117193222046, + "logps/rejected": -2.0456955432891846, + "loss": 1.4803, + "nll_loss": 1.2141263484954834, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.5094558596611023, + "rewards/margins": 0.5133919715881348, + "rewards/rejected": -1.0228477716445923, "step": 165 }, { - "epoch": 0.9955290611028316, - "eval_log_odds_chosen": 0.2615682780742645, - "eval_log_odds_ratio": -0.6631397604942322, - "eval_logits/chosen": 322.6454162597656, - "eval_logits/rejected": 293.2744445800781, - "eval_logps/chosen": -1.0755056142807007, - "eval_logps/rejected": -1.263703465461731, - "eval_loss": 1.8175820112228394, - "eval_nll_loss": 1.4782638549804688, - "eval_rewards/accuracies": 0.5467625856399536, - "eval_rewards/chosen": -0.5377528071403503, - "eval_rewards/margins": 0.09409892559051514, - "eval_rewards/rejected": -0.6318517327308655, - "eval_runtime": 351.5652, - "eval_samples_per_second": 1.573, - "eval_steps_per_second": 0.395, - "step": 167 - }, - { - "epoch": 1.0134128166915053, - "grad_norm": 19.610740661621094, + "epoch": 2.0, + "eval_log_odds_chosen": 0.25614500045776367, + "eval_log_odds_ratio": -0.6718475818634033, + "eval_logits/chosen": 328.0206604003906, + "eval_logits/rejected": 274.3525695800781, + "eval_logps/chosen": -1.0924270153045654, + "eval_logps/rejected": -1.3016821146011353, + "eval_loss": 1.7681158781051636, + "eval_nll_loss": 1.4853813648223877, + "eval_rewards/accuracies": 0.5285714268684387, + "eval_rewards/chosen": -0.5462135076522827, + "eval_rewards/margins": 0.10462753474712372, + "eval_rewards/rejected": -0.6508410573005676, + "eval_runtime": 201.7398, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.347, + "step": 168 + }, + { + "epoch": 2.0238095238095237, + "grad_norm": 20.41162872314453, "learning_rate": 3.834824944236852e-06, - "log_odds_chosen": 0.7168810963630676, - "log_odds_ratio": -0.48998212814331055, - "logits/chosen": 379.9788513183594, - "logits/rejected": 404.4870910644531, - "logps/chosen": -1.0245921611785889, - "logps/rejected": -1.525979995727539, - "loss": 1.6186, - "nll_loss": 1.2430717945098877, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.5122960805892944, - "rewards/margins": 0.2506938874721527, - "rewards/rejected": -0.7629899978637695, + "log_odds_chosen": 1.3698937892913818, + "log_odds_ratio": -0.44814401865005493, + "logits/chosen": 386.51275634765625, + "logits/rejected": 390.8349304199219, + "logps/chosen": -1.1337850093841553, + "logps/rejected": -2.192959785461426, + "loss": 1.3382, + "nll_loss": 1.3109896183013916, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.5668925046920776, + "rewards/margins": 0.52958744764328, + "rewards/rejected": -1.096479892730713, "step": 170 }, { - "epoch": 1.0432190760059612, - "grad_norm": 27.745262145996094, + "epoch": 2.0833333333333335, + "grad_norm": 29.53614044189453, "learning_rate": 3.7796447300922724e-06, - "log_odds_chosen": 1.422102689743042, - "log_odds_ratio": -0.3321012854576111, - "logits/chosen": 357.04718017578125, - "logits/rejected": 398.559814453125, - "logps/chosen": -0.7521129846572876, - "logps/rejected": -1.7101812362670898, - "loss": 1.286, - "nll_loss": 1.213651418685913, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.3760564923286438, - "rewards/margins": 0.47903409600257874, - "rewards/rejected": -0.8550906181335449, + "log_odds_chosen": 3.1386218070983887, + "log_odds_ratio": -0.13156357407569885, + "logits/chosen": 374.08306884765625, + "logits/rejected": 370.32257080078125, + "logps/chosen": -0.6971138119697571, + "logps/rejected": -2.928040027618408, + "loss": 0.963, + "nll_loss": 1.0014644861221313, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.34855690598487854, + "rewards/margins": 1.115463137626648, + "rewards/rejected": -1.464020013809204, "step": 175 }, { - "epoch": 1.0730253353204173, - "grad_norm": 20.82083511352539, + "epoch": 2.142857142857143, + "grad_norm": 20.2189884185791, "learning_rate": 3.72677996249965e-06, - "log_odds_chosen": 1.3344449996948242, - "log_odds_ratio": -0.3116641640663147, - "logits/chosen": 356.35601806640625, - "logits/rejected": 331.2690124511719, - "logps/chosen": -0.8246381878852844, - "logps/rejected": -1.7447946071624756, - "loss": 1.3455, - "nll_loss": 1.2647905349731445, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.4123190939426422, - "rewards/margins": 0.46007823944091797, - "rewards/rejected": -0.8723973035812378, + "log_odds_chosen": 2.8878941535949707, + "log_odds_ratio": -0.10618897527456284, + "logits/chosen": 361.66241455078125, + "logits/rejected": 393.54351806640625, + "logps/chosen": -0.572822093963623, + "logps/rejected": -2.587498664855957, + "loss": 0.9084, + "nll_loss": 0.8843202590942383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2864110469818115, + "rewards/margins": 1.007338285446167, + "rewards/rejected": -1.2937493324279785, "step": 180 }, { - "epoch": 1.1028315946348732, - "grad_norm": 33.03415298461914, + "epoch": 2.2023809523809526, + "grad_norm": 15.425383567810059, "learning_rate": 3.6760731104690393e-06, - "log_odds_chosen": 1.7977418899536133, - "log_odds_ratio": -0.29093313217163086, - "logits/chosen": 388.28021240234375, - "logits/rejected": 380.4255065917969, - "logps/chosen": -0.8684544563293457, - "logps/rejected": -2.099797487258911, - "loss": 1.2411, - "nll_loss": 1.0944446325302124, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.43422722816467285, - "rewards/margins": 0.6156715750694275, - "rewards/rejected": -1.0498987436294556, + "log_odds_chosen": 2.7572951316833496, + "log_odds_ratio": -0.13107402622699738, + "logits/chosen": 328.209228515625, + "logits/rejected": 318.043701171875, + "logps/chosen": -0.5289679765701294, + "logps/rejected": -2.471531629562378, + "loss": 0.9075, + "nll_loss": 0.8783146142959595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2644839882850647, + "rewards/margins": 0.9712821245193481, + "rewards/rejected": -1.235765814781189, "step": 185 }, { - "epoch": 1.1326378539493294, - "grad_norm": 48.01770782470703, + "epoch": 2.261904761904762, + "grad_norm": 17.949966430664062, "learning_rate": 3.6273812505500587e-06, - "log_odds_chosen": 1.2517069578170776, - "log_odds_ratio": -0.3731076717376709, - "logits/chosen": 356.5531005859375, - "logits/rejected": 402.0587463378906, - "logps/chosen": -0.8801576495170593, - "logps/rejected": -1.7424513101577759, - "loss": 1.326, - "nll_loss": 1.140998125076294, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.44007882475852966, - "rewards/margins": 0.4311468005180359, - "rewards/rejected": -0.8712256550788879, + "log_odds_chosen": 2.646030902862549, + "log_odds_ratio": -0.1722700595855713, + "logits/chosen": 373.1665954589844, + "logits/rejected": 363.8042297363281, + "logps/chosen": -0.6163553595542908, + "logps/rejected": -2.3227782249450684, + "loss": 0.8853, + "nll_loss": 0.8921284675598145, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.3081776797771454, + "rewards/margins": 0.8532115817070007, + "rewards/rejected": -1.1613891124725342, "step": 190 }, { - "epoch": 1.1624441132637853, - "grad_norm": 25.167158126831055, + "epoch": 2.3214285714285716, + "grad_norm": 15.254051208496094, "learning_rate": 3.5805743701971648e-06, - "log_odds_chosen": 1.9819135665893555, - "log_odds_ratio": -0.29898396134376526, - "logits/chosen": 382.34100341796875, - "logits/rejected": 395.79425048828125, - "logps/chosen": -0.9413985013961792, - "logps/rejected": -2.448883533477783, - "loss": 1.3195, - "nll_loss": 1.2245818376541138, + "log_odds_chosen": 3.064070463180542, + "log_odds_ratio": -0.11556991189718246, + "logits/chosen": 355.3418884277344, + "logits/rejected": 384.1878967285156, + "logps/chosen": -0.5151618123054504, + "logps/rejected": -2.5838632583618164, + "loss": 0.9013, + "nll_loss": 0.8009279370307922, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.4706992506980896, - "rewards/margins": 0.7537423968315125, - "rewards/rejected": -1.2244417667388916, + "rewards/chosen": -0.2575809061527252, + "rewards/margins": 1.0343506336212158, + "rewards/rejected": -1.2919316291809082, "step": 195 }, { - "epoch": 1.1922503725782414, - "grad_norm": 29.544389724731445, + "epoch": 2.380952380952381, + "grad_norm": 18.17316246032715, "learning_rate": 3.5355339059327378e-06, - "log_odds_chosen": 2.2445785999298096, - "log_odds_ratio": -0.27407315373420715, - "logits/chosen": 399.3606262207031, - "logits/rejected": 381.97125244140625, - "logps/chosen": -0.7390884757041931, - "logps/rejected": -2.445159435272217, - "loss": 1.3027, - "nll_loss": 1.0836920738220215, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.36954423785209656, - "rewards/margins": 0.853035569190979, - "rewards/rejected": -1.2225797176361084, + "log_odds_chosen": 3.8410885334014893, + "log_odds_ratio": -0.07899609953165054, + "logits/chosen": 351.03741455078125, + "logits/rejected": 372.4954528808594, + "logps/chosen": -0.4664763808250427, + "logps/rejected": -3.2068772315979004, + "loss": 0.8742, + "nll_loss": 0.7695188522338867, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.23323819041252136, + "rewards/margins": 1.3702003955841064, + "rewards/rejected": -1.6034386157989502, "step": 200 }, { - "epoch": 1.2220566318926975, - "grad_norm": 33.41192626953125, + "epoch": 2.4404761904761907, + "grad_norm": 13.679876327514648, "learning_rate": 3.4921514788478916e-06, - "log_odds_chosen": 2.2366576194763184, - "log_odds_ratio": -0.2573709785938263, - "logits/chosen": 359.3567810058594, - "logits/rejected": 353.2535400390625, - "logps/chosen": -0.7408849596977234, - "logps/rejected": -2.3824286460876465, - "loss": 1.2558, - "nll_loss": 1.1112185716629028, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.3704424798488617, - "rewards/margins": 0.8207718729972839, - "rewards/rejected": -1.1912143230438232, + "log_odds_chosen": 2.8450164794921875, + "log_odds_ratio": -0.16636498272418976, + "logits/chosen": 386.46038818359375, + "logits/rejected": 371.1928405761719, + "logps/chosen": -0.5075671076774597, + "logps/rejected": -2.49824595451355, + "loss": 0.8909, + "nll_loss": 0.7808379530906677, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.25378355383872986, + "rewards/margins": 0.9953393936157227, + "rewards/rejected": -1.249122977256775, "step": 205 }, { - "epoch": 1.2518628912071534, - "grad_norm": 18.833148956298828, + "epoch": 2.5, + "grad_norm": 15.127974510192871, "learning_rate": 3.450327796711771e-06, - "log_odds_chosen": 2.773773431777954, - "log_odds_ratio": -0.24581894278526306, - "logits/chosen": 363.11090087890625, - "logits/rejected": 392.077880859375, - "logps/chosen": -0.7906020879745483, - "logps/rejected": -2.9855308532714844, - "loss": 1.2167, - "nll_loss": 1.0773674249649048, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.39530104398727417, - "rewards/margins": 1.0974645614624023, - "rewards/rejected": -1.4927654266357422, + "log_odds_chosen": 3.1379058361053467, + "log_odds_ratio": -0.0948343575000763, + "logits/chosen": 385.96038818359375, + "logits/rejected": 346.40997314453125, + "logps/chosen": -0.46884965896606445, + "logps/rejected": -2.5977988243103027, + "loss": 0.8884, + "nll_loss": 0.9064348340034485, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.23442482948303223, + "rewards/margins": 1.0644747018814087, + "rewards/rejected": -1.2988994121551514, "step": 210 }, { - "epoch": 1.2816691505216096, - "grad_norm": 32.97148895263672, + "epoch": 2.5595238095238093, + "grad_norm": 14.862520217895508, "learning_rate": 3.409971697352368e-06, - "log_odds_chosen": 2.409407138824463, - "log_odds_ratio": -0.2513101100921631, - "logits/chosen": 386.68804931640625, - "logits/rejected": 369.2494812011719, - "logps/chosen": -0.7668555378913879, - "logps/rejected": -2.570010185241699, - "loss": 1.232, - "nll_loss": 1.1109261512756348, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.38342776894569397, - "rewards/margins": 0.901577353477478, - "rewards/rejected": -1.2850050926208496, + "log_odds_chosen": 3.3548312187194824, + "log_odds_ratio": -0.07949019968509674, + "logits/chosen": 366.4288024902344, + "logits/rejected": 379.4535827636719, + "logps/chosen": -0.48801979422569275, + "logps/rejected": -2.8395187854766846, + "loss": 0.8614, + "nll_loss": 0.8571261167526245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24400989711284637, + "rewards/margins": 1.1757495403289795, + "rewards/rejected": -1.4197593927383423, "step": 215 }, { - "epoch": 1.3114754098360657, - "grad_norm": 27.4957218170166, + "epoch": 2.619047619047619, + "grad_norm": 14.53584098815918, "learning_rate": 3.3709993123162106e-06, - "log_odds_chosen": 1.301114797592163, - "log_odds_ratio": -0.34698307514190674, - "logits/chosen": 377.33697509765625, - "logits/rejected": 371.03192138671875, - "logps/chosen": -0.8284207582473755, - "logps/rejected": -1.6063474416732788, - "loss": 1.2242, - "nll_loss": 1.0992552042007446, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.41421037912368774, - "rewards/margins": 0.3889634311199188, - "rewards/rejected": -0.8031737208366394, + "log_odds_chosen": 2.9063503742218018, + "log_odds_ratio": -0.11560215055942535, + "logits/chosen": 353.8319396972656, + "logits/rejected": 356.432373046875, + "logps/chosen": -0.4644307494163513, + "logps/rejected": -2.378437042236328, + "loss": 0.9108, + "nll_loss": 0.7656416893005371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23221537470817566, + "rewards/margins": 0.9570032358169556, + "rewards/rejected": -1.189218521118164, "step": 220 }, { - "epoch": 1.3412816691505216, - "grad_norm": 27.05774688720703, + "epoch": 2.678571428571429, + "grad_norm": 17.206817626953125, "learning_rate": 3.3333333333333333e-06, - "log_odds_chosen": 1.0342134237289429, - "log_odds_ratio": -0.3879423439502716, - "logits/chosen": 373.6796875, - "logits/rejected": 366.01123046875, - "logps/chosen": -0.9923228025436401, - "logps/rejected": -1.7060105800628662, - "loss": 1.2664, - "nll_loss": 1.197096586227417, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.49616140127182007, - "rewards/margins": 0.3568439185619354, - "rewards/rejected": -0.8530052900314331, + "log_odds_chosen": 3.093144178390503, + "log_odds_ratio": -0.10332699865102768, + "logits/chosen": 360.0953063964844, + "logits/rejected": 369.03460693359375, + "logps/chosen": -0.6034930944442749, + "logps/rejected": -2.8071255683898926, + "loss": 0.9202, + "nll_loss": 0.8482205271720886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30174654722213745, + "rewards/margins": 1.1018160581588745, + "rewards/rejected": -1.4035627841949463, "step": 225 }, { - "epoch": 1.3710879284649775, - "grad_norm": 32.180084228515625, + "epoch": 2.738095238095238, + "grad_norm": 32.90928268432617, "learning_rate": 3.296902366978936e-06, - "log_odds_chosen": 1.9371551275253296, - "log_odds_ratio": -0.265031635761261, - "logits/chosen": 354.0810546875, - "logits/rejected": 372.2239990234375, - "logps/chosen": -0.8565861582756042, - "logps/rejected": -2.277017116546631, - "loss": 1.2422, - "nll_loss": 0.9957939982414246, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.4282930791378021, - "rewards/margins": 0.7102155685424805, - "rewards/rejected": -1.1385085582733154, + "log_odds_chosen": 2.7679336071014404, + "log_odds_ratio": -0.1059746965765953, + "logits/chosen": 395.354736328125, + "logits/rejected": 391.44451904296875, + "logps/chosen": -0.48557600378990173, + "logps/rejected": -2.241939067840576, + "loss": 0.9492, + "nll_loss": 0.863217830657959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24278800189495087, + "rewards/margins": 0.8781815767288208, + "rewards/rejected": -1.120969533920288, "step": 230 }, { - "epoch": 1.4008941877794336, - "grad_norm": 20.605178833007812, + "epoch": 2.7976190476190474, + "grad_norm": 19.33934783935547, "learning_rate": 3.2616403652672114e-06, - "log_odds_chosen": 1.8511009216308594, - "log_odds_ratio": -0.2493407279253006, - "logits/chosen": 376.54779052734375, - "logits/rejected": 390.1648254394531, - "logps/chosen": -0.7664806246757507, - "logps/rejected": -2.081376075744629, - "loss": 1.1997, - "nll_loss": 1.0387040376663208, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.38324031233787537, - "rewards/margins": 0.6574475765228271, - "rewards/rejected": -1.0406880378723145, + "log_odds_chosen": 2.7701828479766846, + "log_odds_ratio": -0.13227275013923645, + "logits/chosen": 384.26873779296875, + "logits/rejected": 360.5152893066406, + "logps/chosen": -0.5336109399795532, + "logps/rejected": -2.403452157974243, + "loss": 0.9262, + "nll_loss": 0.8939388394355774, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2668054699897766, + "rewards/margins": 0.9349204897880554, + "rewards/rejected": -1.2017260789871216, "step": 235 }, { - "epoch": 1.4307004470938898, - "grad_norm": 24.71708106994629, + "epoch": 2.857142857142857, + "grad_norm": 20.41938591003418, "learning_rate": 3.2274861218395142e-06, - "log_odds_chosen": 1.3407671451568604, - "log_odds_ratio": -0.33692649006843567, - "logits/chosen": 401.45428466796875, - "logits/rejected": 407.09161376953125, - "logps/chosen": -0.9054458737373352, - "logps/rejected": -1.8345670700073242, - "loss": 1.3273, - "nll_loss": 1.143675446510315, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.4527229368686676, - "rewards/margins": 0.46456044912338257, - "rewards/rejected": -0.9172835350036621, + "log_odds_chosen": 3.104609251022339, + "log_odds_ratio": -0.09935127198696136, + "logits/chosen": 356.3617248535156, + "logits/rejected": 407.67620849609375, + "logps/chosen": -0.5356149673461914, + "logps/rejected": -2.744302272796631, + "loss": 0.8823, + "nll_loss": 0.7891393899917603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2678074836730957, + "rewards/margins": 1.1043436527252197, + "rewards/rejected": -1.3721511363983154, "step": 240 }, { - "epoch": 1.4605067064083457, - "grad_norm": 37.8891487121582, + "epoch": 2.9166666666666665, + "grad_norm": 28.204269409179688, "learning_rate": 3.1943828249997e-06, - "log_odds_chosen": 1.8552640676498413, - "log_odds_ratio": -0.272559255361557, - "logits/chosen": 400.0021667480469, - "logits/rejected": 387.4122009277344, - "logps/chosen": -0.7997323274612427, - "logps/rejected": -2.1247220039367676, - "loss": 1.3455, - "nll_loss": 1.303070306777954, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.39986616373062134, - "rewards/margins": 0.6624947786331177, - "rewards/rejected": -1.0623610019683838, + "log_odds_chosen": 2.208242893218994, + "log_odds_ratio": -0.1713695377111435, + "logits/chosen": 374.45611572265625, + "logits/rejected": 340.09619140625, + "logps/chosen": -0.7715775966644287, + "logps/rejected": -2.332731008529663, + "loss": 0.9365, + "nll_loss": 0.9215513467788696, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.38578879833221436, + "rewards/margins": 0.7805767059326172, + "rewards/rejected": -1.1663655042648315, "step": 245 }, { - "epoch": 1.4903129657228018, - "grad_norm": 35.78718948364258, + "epoch": 2.9761904761904763, + "grad_norm": 30.126291275024414, "learning_rate": 3.1622776601683796e-06, - "log_odds_chosen": 1.5998339653015137, - "log_odds_ratio": -0.3105994760990143, - "logits/chosen": 369.63177490234375, - "logits/rejected": 376.02960205078125, - "logps/chosen": -0.7897557020187378, - "logps/rejected": -1.8593695163726807, - "loss": 1.2365, - "nll_loss": 1.0359935760498047, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.3948778510093689, - "rewards/margins": 0.5348068475723267, - "rewards/rejected": -0.9296847581863403, - "step": 250 - }, - { - "epoch": 1.520119225037258, - "grad_norm": 25.08989143371582, - "learning_rate": 3.131121455425748e-06, - "log_odds_chosen": 1.8597294092178345, - "log_odds_ratio": -0.21304909884929657, - "logits/chosen": 384.16290283203125, - "logits/rejected": 387.0345153808594, - "logps/chosen": -0.7654696702957153, - "logps/rejected": -2.019379138946533, - "loss": 1.2499, - "nll_loss": 1.030097246170044, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.38273483514785767, - "rewards/margins": 0.6269546747207642, - "rewards/rejected": -1.0096895694732666, - "step": 255 - }, - { - "epoch": 1.5499254843517138, - "grad_norm": 21.55389976501465, - "learning_rate": 3.1008683647302113e-06, - "log_odds_chosen": 1.4927784204483032, - "log_odds_ratio": -0.29883819818496704, - "logits/chosen": 362.99444580078125, - "logits/rejected": 404.9141845703125, - "logps/chosen": -0.8609813451766968, - "logps/rejected": -1.9159471988677979, - "loss": 1.2228, - "nll_loss": 1.09743070602417, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.4304906725883484, - "rewards/margins": 0.5274828672409058, - "rewards/rejected": -0.9579735994338989, - "step": 260 - }, - { - "epoch": 1.5797317436661698, - "grad_norm": 19.87468910217285, - "learning_rate": 3.0714755841697565e-06, - "log_odds_chosen": 1.570449709892273, - "log_odds_ratio": -0.3343026041984558, - "logits/chosen": 374.11920166015625, - "logits/rejected": 396.1978454589844, - "logps/chosen": -0.8117960691452026, - "logps/rejected": -1.8302360773086548, - "loss": 1.3153, - "nll_loss": 1.1015130281448364, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.4058980345726013, - "rewards/margins": 0.5092200040817261, - "rewards/rejected": -0.9151180386543274, - "step": 265 - }, - { - "epoch": 1.6095380029806259, - "grad_norm": 20.458053588867188, - "learning_rate": 3.0429030972509227e-06, - "log_odds_chosen": 1.468200922012329, - "log_odds_ratio": -0.3185100853443146, - "logits/chosen": 360.3885192871094, - "logits/rejected": 371.0684509277344, - "logps/chosen": -0.948271632194519, - "logps/rejected": -1.9939063787460327, - "loss": 1.2987, - "nll_loss": 1.2175120115280151, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.4741358160972595, - "rewards/margins": 0.5228174328804016, - "rewards/rejected": -0.9969531893730164, - "step": 270 - }, - { - "epoch": 1.639344262295082, - "grad_norm": 21.885934829711914, - "learning_rate": 3.0151134457776365e-06, - "log_odds_chosen": 1.2957884073257446, - "log_odds_ratio": -0.34294062852859497, - "logits/chosen": 351.11566162109375, - "logits/rejected": 341.6363830566406, - "logps/chosen": -0.816031277179718, - "logps/rejected": -1.6235980987548828, - "loss": 1.3141, - "nll_loss": 1.0874848365783691, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.408015638589859, - "rewards/margins": 0.40378338098526, - "rewards/rejected": -0.8117990493774414, - "step": 275 - }, - { - "epoch": 1.669150521609538, - "grad_norm": 32.71710205078125, - "learning_rate": 2.988071523335984e-06, - "log_odds_chosen": 1.3881560564041138, - "log_odds_ratio": -0.44494813680648804, - "logits/chosen": 399.6920471191406, - "logits/rejected": 387.6843566894531, - "logps/chosen": -0.9040805697441101, - "logps/rejected": -1.8093194961547852, - "loss": 1.3021, - "nll_loss": 1.1397716999053955, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.45204028487205505, - "rewards/margins": 0.4526194930076599, - "rewards/rejected": -0.9046597480773926, - "step": 280 - }, - { - "epoch": 1.698956780923994, - "grad_norm": 31.200809478759766, - "learning_rate": 2.961744388795462e-06, - "log_odds_chosen": 1.7313816547393799, - "log_odds_ratio": -0.28469711542129517, - "logits/chosen": 359.5746154785156, - "logits/rejected": 362.81494140625, - "logps/chosen": -0.6776639223098755, - "logps/rejected": -1.7751470804214478, - "loss": 1.2089, - "nll_loss": 1.0214917659759521, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.33883196115493774, - "rewards/margins": 0.5487415790557861, - "rewards/rejected": -0.8875735402107239, - "step": 285 - }, - { - "epoch": 1.7287630402384502, - "grad_norm": 17.744766235351562, - "learning_rate": 2.9361010975735177e-06, - "log_odds_chosen": 1.5064774751663208, - "log_odds_ratio": -0.3122694492340088, - "logits/chosen": 376.41033935546875, - "logits/rejected": 415.8057556152344, - "logps/chosen": -0.8741256594657898, - "logps/rejected": -1.9100143909454346, - "loss": 1.259, - "nll_loss": 1.0986948013305664, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.4370628297328949, - "rewards/margins": 0.5179442167282104, - "rewards/rejected": -0.9550071954727173, - "step": 290 - }, - { - "epoch": 1.758569299552906, - "grad_norm": 22.985361099243164, - "learning_rate": 2.9111125486979104e-06, - "log_odds_chosen": 1.4937174320220947, - "log_odds_ratio": -0.3182430565357208, - "logits/chosen": 353.3531188964844, - "logits/rejected": 398.23382568359375, - "logps/chosen": -0.8985480070114136, - "logps/rejected": -1.8789575099945068, - "loss": 1.2814, - "nll_loss": 1.1132934093475342, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.4492740035057068, - "rewards/margins": 0.4902048110961914, - "rewards/rejected": -0.9394787549972534, - "step": 295 - }, - { - "epoch": 1.788375558867362, - "grad_norm": 25.093547821044922, - "learning_rate": 2.8867513459481293e-06, - "log_odds_chosen": 1.7519241571426392, - "log_odds_ratio": -0.23565292358398438, - "logits/chosen": 396.87640380859375, - "logits/rejected": 374.09942626953125, - "logps/chosen": -0.6939581632614136, - "logps/rejected": -1.8357816934585571, - "loss": 1.201, - "nll_loss": 0.9846609234809875, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.3469790816307068, - "rewards/margins": 0.5709116458892822, - "rewards/rejected": -0.9178908467292786, - "step": 300 - }, - { - "epoch": 1.8181818181818183, - "grad_norm": 22.51293182373047, - "learning_rate": 2.862991671569341e-06, - "log_odds_chosen": 1.3752100467681885, - "log_odds_ratio": -0.39724329113960266, - "logits/chosen": 392.15093994140625, - "logits/rejected": 400.69677734375, - "logps/chosen": -0.9685919880867004, - "logps/rejected": -2.020549774169922, - "loss": 1.2557, - "nll_loss": 1.2231824398040771, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.4842959940433502, - "rewards/margins": 0.5259788036346436, - "rewards/rejected": -1.010274887084961, - "step": 305 - }, - { - "epoch": 1.8479880774962743, - "grad_norm": 18.64189910888672, - "learning_rate": 2.839809171235324e-06, - "log_odds_chosen": 1.9540401697158813, - "log_odds_ratio": -0.28014156222343445, - "logits/chosen": 374.20074462890625, - "logits/rejected": 382.771240234375, - "logps/chosen": -0.823870837688446, - "logps/rejected": -2.250458002090454, - "loss": 1.3098, - "nll_loss": 1.1509441137313843, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.411935418844223, - "rewards/margins": 0.7132935523986816, - "rewards/rejected": -1.125229001045227, - "step": 310 - }, - { - "epoch": 1.8777943368107302, - "grad_norm": 20.656198501586914, - "learning_rate": 2.817180849095055e-06, - "log_odds_chosen": 0.9874370694160461, - "log_odds_ratio": -0.43443575501441956, - "logits/chosen": 348.67279052734375, - "logits/rejected": 367.7172546386719, - "logps/chosen": -1.063990831375122, - "logps/rejected": -1.7773240804672241, - "loss": 1.3159, - "nll_loss": 1.2891852855682373, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.531995415687561, - "rewards/margins": 0.3566665053367615, - "rewards/rejected": -0.8886620402336121, - "step": 315 - }, - { - "epoch": 1.9076005961251863, - "grad_norm": 22.823558807373047, - "learning_rate": 2.7950849718747376e-06, - "log_odds_chosen": 1.9457191228866577, - "log_odds_ratio": -0.23255281150341034, - "logits/chosen": 369.242431640625, - "logits/rejected": 389.74346923828125, - "logps/chosen": -0.7030061483383179, - "logps/rejected": -2.0039026737213135, - "loss": 1.1894, - "nll_loss": 1.0204839706420898, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.35150307416915894, - "rewards/margins": 0.650448203086853, - "rewards/rejected": -1.0019513368606567, - "step": 320 - }, - { - "epoch": 1.9374068554396424, - "grad_norm": 27.562149047851562, - "learning_rate": 2.773500981126146e-06, - "log_odds_chosen": 1.998150110244751, - "log_odds_ratio": -0.24417133629322052, - "logits/chosen": 365.95684814453125, - "logits/rejected": 397.9959411621094, - "logps/chosen": -0.7909399271011353, - "logps/rejected": -2.2285313606262207, - "loss": 1.179, - "nll_loss": 1.0059034824371338, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.3954699635505676, - "rewards/margins": 0.7187958359718323, - "rewards/rejected": -1.1142656803131104, - "step": 325 - }, - { - "epoch": 1.9672131147540983, - "grad_norm": 27.540767669677734, - "learning_rate": 2.752409412815902e-06, - "log_odds_chosen": 1.38077712059021, - "log_odds_ratio": -0.3091193735599518, - "logits/chosen": 361.24969482421875, - "logits/rejected": 369.6480407714844, - "logps/chosen": -0.8252070546150208, - "logps/rejected": -1.7504304647445679, - "loss": 1.2534, - "nll_loss": 0.9596601724624634, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.4126035273075104, - "rewards/margins": 0.46261176466941833, - "rewards/rejected": -0.8752152323722839, - "step": 330 - }, - { - "epoch": 1.9970193740685542, - "grad_norm": 35.79298782348633, - "learning_rate": 2.7317918235407652e-06, - "log_odds_chosen": 1.0996582508087158, - "log_odds_ratio": -0.4153285622596741, - "logits/chosen": 389.44366455078125, - "logits/rejected": 379.0640563964844, - "logps/chosen": -1.0918314456939697, - "logps/rejected": -1.8735624551773071, - "loss": 1.3092, - "nll_loss": 1.2989994287490845, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.5459157228469849, - "rewards/margins": 0.3908655047416687, - "rewards/rejected": -0.9367812275886536, - "step": 335 - }, - { - "epoch": 1.9970193740685542, - "eval_log_odds_chosen": 0.32236072421073914, - "eval_log_odds_ratio": -0.6637296676635742, - "eval_logits/chosen": 309.3900146484375, - "eval_logits/rejected": 279.0658874511719, - "eval_logps/chosen": -1.040497064590454, - "eval_logps/rejected": -1.2617474794387817, - "eval_loss": 1.7559822797775269, - "eval_nll_loss": 1.4054137468338013, - "eval_rewards/accuracies": 0.5323740839958191, - "eval_rewards/chosen": -0.520248532295227, - "eval_rewards/margins": 0.11062520742416382, - "eval_rewards/rejected": -0.6308737397193909, - "eval_runtime": 351.6864, - "eval_samples_per_second": 1.572, - "eval_steps_per_second": 0.395, - "step": 335 - }, - { - "epoch": 2.0268256333830106, - "grad_norm": 22.68450927734375, - "learning_rate": 2.711630722733202e-06, - "log_odds_chosen": 3.2875800132751465, - "log_odds_ratio": -0.10148422420024872, - "logits/chosen": 390.20111083984375, - "logits/rejected": 362.7218017578125, - "logps/chosen": -0.4825616478919983, - "logps/rejected": -2.6374728679656982, - "loss": 0.7684, - "nll_loss": 0.8495180010795593, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.24128082394599915, - "rewards/margins": 1.0774555206298828, - "rewards/rejected": -1.3187364339828491, - "step": 340 - }, - { - "epoch": 2.0566318926974665, - "grad_norm": 12.678057670593262, - "learning_rate": 2.691909510290828e-06, - "log_odds_chosen": 3.9607760906219482, - "log_odds_ratio": -0.06652864068746567, - "logits/chosen": 340.1753234863281, - "logits/rejected": 337.1167907714844, - "logps/chosen": -0.44624167680740356, - "logps/rejected": -3.1436822414398193, - "loss": 0.6642, - "nll_loss": 0.6589840650558472, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.22312083840370178, - "rewards/margins": 1.3487205505371094, - "rewards/rejected": -1.5718411207199097, - "step": 345 - }, - { - "epoch": 2.0864381520119224, - "grad_norm": 18.899171829223633, - "learning_rate": 2.6726124191242444e-06, - "log_odds_chosen": 3.980642318725586, - "log_odds_ratio": -0.04676336795091629, - "logits/chosen": 340.1651916503906, - "logits/rejected": 373.43426513671875, - "logps/chosen": -0.43081822991371155, - "logps/rejected": -3.3248093128204346, - "loss": 0.6728, - "nll_loss": 0.6396235227584839, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21540911495685577, - "rewards/margins": 1.446995496749878, - "rewards/rejected": -1.6624046564102173, - "step": 350 - }, - { - "epoch": 2.1162444113263787, - "grad_norm": 15.860565185546875, - "learning_rate": 2.6537244621713765e-06, - "log_odds_chosen": 3.6839728355407715, - "log_odds_ratio": -0.07649212330579758, - "logits/chosen": 345.269287109375, - "logits/rejected": 359.37823486328125, - "logps/chosen": -0.45684829354286194, - "logps/rejected": -3.046020984649658, - "loss": 0.666, - "nll_loss": 0.7428454160690308, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.22842414677143097, - "rewards/margins": 1.294586420059204, - "rewards/rejected": -1.523010492324829, - "step": 355 - }, - { - "epoch": 2.1460506706408347, - "grad_norm": 13.459833145141602, - "learning_rate": 2.6352313834736496e-06, - "log_odds_chosen": 4.002976417541504, - "log_odds_ratio": -0.04872741550207138, - "logits/chosen": 340.22161865234375, - "logits/rejected": 384.5272521972656, - "logps/chosen": -0.433218777179718, - "logps/rejected": -3.1645781993865967, - "loss": 0.6602, - "nll_loss": 0.6463421583175659, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.216609388589859, - "rewards/margins": 1.3656798601150513, - "rewards/rejected": -1.5822890996932983, - "step": 360 - }, - { - "epoch": 2.1758569299552906, - "grad_norm": 14.115008354187012, - "learning_rate": 2.6171196129510684e-06, - "log_odds_chosen": 3.177072763442993, - "log_odds_ratio": -0.0845123901963234, - "logits/chosen": 326.1507568359375, - "logits/rejected": 306.5514831542969, - "logps/chosen": -0.41812819242477417, - "logps/rejected": -2.4115400314331055, - "loss": 0.6679, - "nll_loss": 0.6390963196754456, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.20906409621238708, - "rewards/margins": 0.9967058300971985, - "rewards/rejected": -1.2057700157165527, - "step": 365 - }, - { - "epoch": 2.2056631892697465, - "grad_norm": 15.614830017089844, - "learning_rate": 2.599376224550182e-06, - "log_odds_chosen": 3.3914806842803955, - "log_odds_ratio": -0.08955641835927963, - "logits/chosen": 287.53955078125, - "logits/rejected": 306.90667724609375, - "logps/chosen": -0.44138556718826294, - "logps/rejected": -2.749290943145752, - "loss": 0.6969, - "nll_loss": 0.7107062935829163, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.22069278359413147, - "rewards/margins": 1.153952717781067, - "rewards/rejected": -1.374645471572876, - "step": 370 - }, - { - "epoch": 2.235469448584203, - "grad_norm": 15.579936027526855, - "learning_rate": 2.5819888974716113e-06, - "log_odds_chosen": 3.3506603240966797, - "log_odds_ratio": -0.09304364025592804, - "logits/chosen": 339.9989929199219, - "logits/rejected": 357.97528076171875, - "logps/chosen": -0.5137821435928345, - "logps/rejected": -2.7970046997070312, - "loss": 0.6856, - "nll_loss": 0.7090636491775513, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.25689107179641724, - "rewards/margins": 1.1416113376617432, - "rewards/rejected": -1.3985023498535156, - "step": 375 - }, - { - "epoch": 2.2652757078986587, - "grad_norm": 17.770183563232422, - "learning_rate": 2.564945880212886e-06, - "log_odds_chosen": 3.6279399394989014, - "log_odds_ratio": -0.04611171409487724, - "logits/chosen": 343.6501159667969, - "logits/rejected": 322.5646057128906, - "logps/chosen": -0.35303473472595215, - "logps/rejected": -2.671903610229492, - "loss": 0.6689, - "nll_loss": 0.6217206120491028, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17651736736297607, - "rewards/margins": 1.1594343185424805, - "rewards/rejected": -1.335951805114746, - "step": 380 - }, - { - "epoch": 2.2950819672131146, - "grad_norm": 12.41324520111084, - "learning_rate": 2.5482359571881276e-06, - "log_odds_chosen": 3.9699618816375732, - "log_odds_ratio": -0.04786146804690361, - "logits/chosen": 341.5869445800781, - "logits/rejected": 333.04608154296875, - "logps/chosen": -0.3405749499797821, - "logps/rejected": -2.8487496376037598, - "loss": 0.6364, - "nll_loss": 0.5744936466217041, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.17028747498989105, - "rewards/margins": 1.254087209701538, - "rewards/rejected": -1.4243748188018799, - "step": 385 - }, - { - "epoch": 2.3248882265275705, - "grad_norm": 14.415823936462402, - "learning_rate": 2.5318484177091667e-06, - "log_odds_chosen": 3.405970335006714, - "log_odds_ratio": -0.05260590463876724, - "logits/chosen": 360.901611328125, - "logits/rejected": 378.63616943359375, - "logps/chosen": -0.5018097162246704, - "logps/rejected": -2.8674509525299072, - "loss": 0.7074, - "nll_loss": 0.6968181729316711, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2509048581123352, - "rewards/margins": 1.1828205585479736, - "rewards/rejected": -1.4337254762649536, - "step": 390 - }, - { - "epoch": 2.354694485842027, - "grad_norm": 11.383499145507812, - "learning_rate": 2.515773027133138e-06, - "log_odds_chosen": 3.9227728843688965, - "log_odds_ratio": -0.05491284653544426, - "logits/chosen": 349.3078308105469, - "logits/rejected": 332.0811462402344, - "logps/chosen": -0.32619425654411316, - "logps/rejected": -2.7826080322265625, - "loss": 0.6264, - "nll_loss": 0.5774807929992676, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.16309712827205658, - "rewards/margins": 1.2282068729400635, - "rewards/rejected": -1.3913040161132812, - "step": 395 - }, - { - "epoch": 2.384500745156483, - "grad_norm": 15.452503204345703, - "learning_rate": 2.5e-06, - "log_odds_chosen": 3.7418270111083984, - "log_odds_ratio": -0.08289723098278046, - "logits/chosen": 353.244873046875, - "logits/rejected": 371.33941650390625, - "logps/chosen": -0.4247261583805084, - "logps/rejected": -3.0048041343688965, - "loss": 0.6678, - "nll_loss": 0.5866397619247437, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.2123630791902542, - "rewards/margins": 1.2900387048721313, - "rewards/rejected": -1.5024020671844482, - "step": 400 - }, - { - "epoch": 2.4143070044709387, - "grad_norm": 14.744851112365723, - "learning_rate": 2.484519974999767e-06, - "log_odds_chosen": 4.066527366638184, - "log_odds_ratio": -0.07095837593078613, - "logits/chosen": 405.4268493652344, - "logits/rejected": 363.4831237792969, - "logps/chosen": -0.4019082188606262, - "logps/rejected": -3.0571980476379395, - "loss": 0.6775, - "nll_loss": 0.6186262965202332, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.2009541094303131, - "rewards/margins": 1.3276448249816895, - "rewards/rejected": -1.5285990238189697, - "step": 405 - }, - { - "epoch": 2.444113263785395, - "grad_norm": 15.641300201416016, - "learning_rate": 2.4693239916239746e-06, - "log_odds_chosen": 3.6895904541015625, - "log_odds_ratio": -0.08013327419757843, - "logits/chosen": 352.7126770019531, - "logits/rejected": 362.0486145019531, - "logps/chosen": -0.4512965679168701, - "logps/rejected": -2.8881852626800537, - "loss": 0.6612, - "nll_loss": 0.6566362380981445, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.22564828395843506, - "rewards/margins": 1.2184442281723022, - "rewards/rejected": -1.4440926313400269, - "step": 410 - }, - { - "epoch": 2.473919523099851, - "grad_norm": 13.112720489501953, - "learning_rate": 2.4544034683690802e-06, - "log_odds_chosen": 4.032966613769531, - "log_odds_ratio": -0.03849425166845322, - "logits/chosen": 352.24176025390625, - "logits/rejected": 376.4973449707031, - "logps/chosen": -0.38949456810951233, - "logps/rejected": -3.1059412956237793, - "loss": 0.6593, - "nll_loss": 0.5457557439804077, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19474728405475616, - "rewards/margins": 1.35822331905365, - "rewards/rejected": -1.5529706478118896, - "step": 415 - }, - { - "epoch": 2.503725782414307, - "grad_norm": 13.079460144042969, - "learning_rate": 2.4397501823713327e-06, - "log_odds_chosen": 3.7700283527374268, - "log_odds_ratio": -0.06273230165243149, - "logits/chosen": 351.4734802246094, - "logits/rejected": 317.71209716796875, - "logps/chosen": -0.3789517283439636, - "logps/rejected": -2.9251832962036133, - "loss": 0.6409, - "nll_loss": 0.7009339928627014, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.1894758641719818, - "rewards/margins": 1.273115634918213, - "rewards/rejected": -1.4625916481018066, - "step": 420 - }, - { - "epoch": 2.533532041728763, - "grad_norm": 20.27842903137207, - "learning_rate": 2.4253562503633297e-06, - "log_odds_chosen": 4.24957799911499, - "log_odds_ratio": -0.032464753836393356, - "logits/chosen": 348.3805847167969, - "logits/rejected": 337.718994140625, - "logps/chosen": -0.3897190988063812, - "logps/rejected": -3.382542848587036, - "loss": 0.6474, - "nll_loss": 0.6459823250770569, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1948595494031906, - "rewards/margins": 1.4964120388031006, - "rewards/rejected": -1.691271424293518, - "step": 425 - }, - { - "epoch": 2.563338301043219, - "grad_norm": 14.640429496765137, - "learning_rate": 2.411214110852061e-06, - "log_odds_chosen": 4.611870765686035, - "log_odds_ratio": -0.03490515798330307, - "logits/chosen": 347.74298095703125, - "logits/rejected": 351.59912109375, - "logps/chosen": -0.3038939833641052, - "logps/rejected": -3.4484775066375732, - "loss": 0.6441, - "nll_loss": 0.5448944568634033, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1519469916820526, - "rewards/margins": 1.572291612625122, - "rewards/rejected": -1.7242387533187866, - "step": 430 - }, - { - "epoch": 2.593144560357675, - "grad_norm": 15.337629318237305, - "learning_rate": 2.3973165074269213e-06, - "log_odds_chosen": 3.7105846405029297, - "log_odds_ratio": -0.04743606597185135, - "logits/chosen": 360.7786560058594, - "logits/rejected": 318.19854736328125, - "logps/chosen": -0.36348485946655273, - "logps/rejected": -2.7734932899475098, - "loss": 0.6432, - "nll_loss": 0.5634604692459106, + "log_odds_chosen": 2.9035000801086426, + "log_odds_ratio": -0.11840321123600006, + "logits/chosen": 376.98828125, + "logits/rejected": 380.9736328125, + "logps/chosen": -0.48586076498031616, + "logps/rejected": -2.4321203231811523, + "loss": 0.9109, + "nll_loss": 0.7184505462646484, "rewards/accuracies": 1.0, - "rewards/chosen": -0.18174242973327637, - "rewards/margins": 1.205004096031189, - "rewards/rejected": -1.3867466449737549, - "step": 435 - }, - { - "epoch": 2.6229508196721314, - "grad_norm": 13.271897315979004, - "learning_rate": 2.3836564731139807e-06, - "log_odds_chosen": 3.826213836669922, - "log_odds_ratio": -0.04420660808682442, - "logits/chosen": 342.50848388671875, - "logits/rejected": 348.86456298828125, - "logps/chosen": -0.30872535705566406, - "logps/rejected": -2.6104750633239746, - "loss": 0.666, - "nll_loss": 0.6212347745895386, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15436267852783203, - "rewards/margins": 1.1508748531341553, - "rewards/rejected": -1.3052375316619873, - "step": 440 - }, - { - "epoch": 2.6527570789865873, - "grad_norm": 17.46467399597168, - "learning_rate": 2.3702273156998867e-06, - "log_odds_chosen": 4.261384963989258, - "log_odds_ratio": -0.03544224798679352, - "logits/chosen": 323.58251953125, - "logits/rejected": 354.87847900390625, - "logps/chosen": -0.4285755753517151, - "logps/rejected": -3.4066290855407715, - "loss": 0.6866, - "nll_loss": 0.6935943961143494, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21428778767585754, - "rewards/margins": 1.4890267848968506, - "rewards/rejected": -1.7033145427703857, - "step": 445 - }, - { - "epoch": 2.682563338301043, - "grad_norm": 12.280491828918457, - "learning_rate": 2.357022603955159e-06, - "log_odds_chosen": 3.510754108428955, - "log_odds_ratio": -0.05934901908040047, - "logits/chosen": 356.21588134765625, - "logits/rejected": 349.2935485839844, - "logps/chosen": -0.4579055905342102, - "logps/rejected": -2.802351474761963, - "loss": 0.6947, - "nll_loss": 0.6444963216781616, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2289527952671051, - "rewards/margins": 1.1722228527069092, - "rewards/rejected": -1.4011757373809814, - "step": 450 - }, - { - "epoch": 2.712369597615499, - "grad_norm": 14.349300384521484, - "learning_rate": 2.3440361546924774e-06, - "log_odds_chosen": 3.9128804206848145, - "log_odds_ratio": -0.05345267057418823, - "logits/chosen": 379.7029724121094, - "logits/rejected": 353.4925842285156, - "logps/chosen": -0.4496696889400482, - "logps/rejected": -3.064148426055908, - "loss": 0.7279, - "nll_loss": 0.6569377779960632, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2248348444700241, - "rewards/margins": 1.307239294052124, - "rewards/rejected": -1.532074213027954, - "step": 455 - }, - { - "epoch": 2.742175856929955, - "grad_norm": 13.55716609954834, - "learning_rate": 2.3312620206007847e-06, - "log_odds_chosen": 4.291790962219238, - "log_odds_ratio": -0.044444940984249115, - "logits/chosen": 374.377197265625, - "logits/rejected": 386.43414306640625, - "logps/chosen": -0.3812227249145508, - "logps/rejected": -3.4395527839660645, - "loss": 0.6847, - "nll_loss": 0.7294474840164185, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1906113624572754, - "rewards/margins": 1.5291650295257568, - "rewards/rejected": -1.7197763919830322, - "step": 460 - }, - { - "epoch": 2.7719821162444114, - "grad_norm": 13.988215446472168, - "learning_rate": 2.3186944788008413e-06, - "log_odds_chosen": 4.281071186065674, - "log_odds_ratio": -0.04669572785496712, - "logits/chosen": 359.35101318359375, - "logits/rejected": 352.8810729980469, - "logps/chosen": -0.30659520626068115, - "logps/rejected": -3.1890709400177, - "loss": 0.6956, - "nll_loss": 0.6471156477928162, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15329760313034058, - "rewards/margins": 1.4412376880645752, - "rewards/rejected": -1.59453547000885, - "step": 465 - }, - { - "epoch": 2.8017883755588673, - "grad_norm": 16.967275619506836, - "learning_rate": 2.3063280200722128e-06, - "log_odds_chosen": 3.476628065109253, - "log_odds_ratio": -0.07849601656198502, - "logits/chosen": 363.3095703125, - "logits/rejected": 323.81842041015625, - "logps/chosen": -0.43367767333984375, - "logps/rejected": -2.7612318992614746, - "loss": 0.6725, - "nll_loss": 0.6623119711875916, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.21683883666992188, - "rewards/margins": 1.1637769937515259, - "rewards/rejected": -1.3806159496307373, - "step": 470 - }, - { - "epoch": 2.8315946348733236, - "grad_norm": 19.491451263427734, - "learning_rate": 2.2941573387056174e-06, - "log_odds_chosen": 3.7874722480773926, - "log_odds_ratio": -0.04359927773475647, - "logits/chosen": 332.71844482421875, - "logits/rejected": 350.5658264160156, - "logps/chosen": -0.39931654930114746, - "logps/rejected": -2.8959929943084717, - "loss": 0.6421, - "nll_loss": 0.5615902543067932, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19965827465057373, - "rewards/margins": 1.248338222503662, - "rewards/rejected": -1.4479964971542358, - "step": 475 - }, - { - "epoch": 2.8614008941877795, - "grad_norm": 18.62973976135254, - "learning_rate": 2.2821773229381924e-06, - "log_odds_chosen": 4.226737976074219, - "log_odds_ratio": -0.04461986571550369, - "logits/chosen": 344.9051208496094, - "logits/rejected": 380.61822509765625, - "logps/chosen": -0.3830781877040863, - "logps/rejected": -3.3659965991973877, - "loss": 0.6189, - "nll_loss": 0.5661858320236206, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19153909385204315, - "rewards/margins": 1.4914592504501343, - "rewards/rejected": -1.6829982995986938, - "step": 480 - }, - { - "epoch": 2.8912071535022354, - "grad_norm": 14.639021873474121, - "learning_rate": 2.270383045932499e-06, - "log_odds_chosen": 3.5930495262145996, - "log_odds_ratio": -0.06206268072128296, - "logits/chosen": 341.22650146484375, - "logits/rejected": 361.0518493652344, - "logps/chosen": -0.48062896728515625, - "logps/rejected": -2.968130111694336, - "loss": 0.6645, - "nll_loss": 0.6335129737854004, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.24031448364257812, - "rewards/margins": 1.2437504529953003, - "rewards/rejected": -1.484065055847168, - "step": 485 - }, - { - "epoch": 2.9210134128166914, - "grad_norm": 13.041991233825684, - "learning_rate": 2.2587697572631284e-06, - "log_odds_chosen": 3.418093204498291, - "log_odds_ratio": -0.06181482598185539, - "logits/chosen": 360.23419189453125, - "logits/rejected": 314.44183349609375, - "logps/chosen": -0.4381326138973236, - "logps/rejected": -2.6530632972717285, - "loss": 0.7257, - "nll_loss": 0.6311744451522827, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2190663069486618, - "rewards/margins": 1.107465386390686, - "rewards/rejected": -1.3265316486358643, - "step": 490 - }, - { - "epoch": 2.9508196721311473, - "grad_norm": 13.917710304260254, - "learning_rate": 2.2473328748774737e-06, - "log_odds_chosen": 3.9481112957000732, - "log_odds_ratio": -0.07351940125226974, - "logits/chosen": 354.31781005859375, - "logits/rejected": 374.8128356933594, - "logps/chosen": -0.5134202837944031, - "logps/rejected": -3.2178916931152344, - "loss": 0.6495, - "nll_loss": 0.6444075703620911, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.25671014189720154, - "rewards/margins": 1.3522356748580933, - "rewards/rejected": -1.6089458465576172, - "step": 495 - }, - { - "epoch": 2.9806259314456036, - "grad_norm": 13.279515266418457, - "learning_rate": 2.23606797749979e-06, - "log_odds_chosen": 3.631924867630005, - "log_odds_ratio": -0.05388236045837402, - "logits/chosen": 363.3906555175781, - "logits/rejected": 350.91546630859375, - "logps/chosen": -0.3526715636253357, - "logps/rejected": -2.695366382598877, - "loss": 0.6827, - "nll_loss": 0.5351300835609436, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17633578181266785, - "rewards/margins": 1.1713473796844482, - "rewards/rejected": -1.3476831912994385, - "step": 500 - }, - { - "epoch": 2.9865871833084947, - "eval_log_odds_chosen": 0.42297032475471497, - "eval_log_odds_ratio": -0.6477048993110657, - "eval_logits/chosen": 282.00848388671875, - "eval_logits/rejected": 247.6262664794922, - "eval_logps/chosen": -1.2038370370864868, - "eval_logps/rejected": -1.5014722347259521, - "eval_loss": 1.9079890251159668, - "eval_nll_loss": 1.5545235872268677, - "eval_rewards/accuracies": 0.6258992552757263, - "eval_rewards/chosen": -0.6019185185432434, - "eval_rewards/margins": 0.14881758391857147, - "eval_rewards/rejected": -0.7507361173629761, - "eval_runtime": 351.9872, - "eval_samples_per_second": 1.571, - "eval_steps_per_second": 0.395, - "step": 501 + "rewards/chosen": -0.24293038249015808, + "rewards/margins": 0.9731297492980957, + "rewards/rejected": -1.2160601615905762, + "step": 250 }, { - "epoch": 2.9865871833084947, - "step": 501, + "epoch": 3.0, + "eval_log_odds_chosen": 0.4396049678325653, + "eval_log_odds_ratio": -0.6420542597770691, + "eval_logits/chosen": 301.1214599609375, + "eval_logits/rejected": 240.39068603515625, + "eval_logps/chosen": -1.19857656955719, + "eval_logps/rejected": -1.5204962491989136, + "eval_loss": 1.8577181100845337, + "eval_nll_loss": 1.5532194375991821, + "eval_rewards/accuracies": 0.6142857074737549, + "eval_rewards/chosen": -0.599288284778595, + "eval_rewards/margins": 0.16095994412899017, + "eval_rewards/rejected": -0.7602481245994568, + "eval_runtime": 201.7642, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.347, + "step": 252 + }, + { + "epoch": 3.0, + "step": 252, "total_flos": 0.0, - "train_loss": 1.686337627812536, - "train_runtime": 31593.308, - "train_samples_per_second": 0.509, - "train_steps_per_second": 0.016 + "train_loss": 2.2429193542117165, + "train_runtime": 13126.4564, + "train_samples_per_second": 1.226, + "train_steps_per_second": 0.019 } ], "logging_steps": 5, - "max_steps": 501, + "max_steps": 252, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500,