{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9977426636568849, "eval_steps": 500, "global_step": 221, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.3125, "learning_rate": 4.9747829807701e-06, "log_odds_chosen": 0.29668617248535156, "log_odds_ratio": -0.6379951238632202, "logits/chosen": -3.2007384300231934, "logits/rejected": -3.1898930072784424, "logps/chosen": -0.8916305303573608, "logps/rejected": -1.0567858219146729, "loss": 0.6627, "nll_loss": 0.5468634366989136, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1783261001110077, "rewards/margins": 0.033031076192855835, "rewards/rejected": -0.2113572061061859, "step": 10 }, { "epoch": 0.09, "grad_norm": 3.234375, "learning_rate": 4.89964064152747e-06, "log_odds_chosen": 0.3181908428668976, "log_odds_ratio": -0.6044929623603821, "logits/chosen": -3.215482711791992, "logits/rejected": -3.215388059616089, "logps/chosen": -0.8658155202865601, "logps/rejected": -1.0561679601669312, "loss": 0.6162, "nll_loss": 0.4879694879055023, "rewards/accuracies": 0.625, "rewards/chosen": -0.17316310107707977, "rewards/margins": 0.03807050734758377, "rewards/rejected": -0.21123358607292175, "step": 20 }, { "epoch": 0.14, "grad_norm": 3.3125, "learning_rate": 4.7760888749230414e-06, "log_odds_chosen": 0.3909495174884796, "log_odds_ratio": -0.6076517701148987, "logits/chosen": -3.2152676582336426, "logits/rejected": -3.202721357345581, "logps/chosen": -0.8426260948181152, "logps/rejected": -1.061783790588379, "loss": 0.6037, "nll_loss": 0.4774637222290039, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16852520406246185, "rewards/margins": 0.04383154585957527, "rewards/rejected": -0.21235676109790802, "step": 30 }, { "epoch": 0.18, "grad_norm": 2.96875, "learning_rate": 4.6066201667762944e-06, "log_odds_chosen": 0.3192257285118103, "log_odds_ratio": -0.6141721606254578, "logits/chosen": -3.200482130050659, "logits/rejected": -3.1900203227996826, "logps/chosen": -0.8548553586006165, "logps/rejected": -1.034092903137207, "loss": 0.6009, "nll_loss": 0.4767337441444397, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.17097108066082, "rewards/margins": 0.0358474999666214, "rewards/rejected": -0.2068185806274414, "step": 40 }, { "epoch": 0.23, "grad_norm": 2.84375, "learning_rate": 4.3946533136249926e-06, "log_odds_chosen": 0.3027415871620178, "log_odds_ratio": -0.6166602969169617, "logits/chosen": -3.206148147583008, "logits/rejected": -3.1972875595092773, "logps/chosen": -0.8349093198776245, "logps/rejected": -1.0070207118988037, "loss": 0.5918, "nll_loss": 0.4716118276119232, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.16698187589645386, "rewards/margins": 0.03442227095365524, "rewards/rejected": -0.2014041393995285, "step": 50 }, { "epoch": 0.27, "grad_norm": 3.09375, "learning_rate": 4.1444644532387485e-06, "log_odds_chosen": 0.48492059111595154, "log_odds_ratio": -0.5473419427871704, "logits/chosen": -3.209733247756958, "logits/rejected": -3.202362537384033, "logps/chosen": -0.8019172549247742, "logps/rejected": -1.0743623971939087, "loss": 0.5974, "nll_loss": 0.476468950510025, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.16038347780704498, "rewards/margins": 0.05448903515934944, "rewards/rejected": -0.21487247943878174, "step": 60 }, { "epoch": 0.32, "grad_norm": 3.28125, "learning_rate": 3.861100799460336e-06, "log_odds_chosen": 0.331600546836853, "log_odds_ratio": -0.6023644208908081, "logits/chosen": -3.1663670539855957, "logits/rejected": -3.150857925415039, "logps/chosen": -0.8181543350219727, "logps/rejected": -0.9955471754074097, "loss": 0.603, "nll_loss": 0.4812262952327728, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.163630872964859, "rewards/margins": 0.03547856956720352, "rewards/rejected": -0.19910944998264313, "step": 70 }, { "epoch": 0.36, "grad_norm": 3.328125, "learning_rate": 3.550278821654866e-06, "log_odds_chosen": 0.477538526058197, "log_odds_ratio": -0.5380920171737671, "logits/chosen": -3.194234848022461, "logits/rejected": -3.1841280460357666, "logps/chosen": -0.8073774576187134, "logps/rejected": -1.0695488452911377, "loss": 0.5619, "nll_loss": 0.47237372398376465, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.16147547960281372, "rewards/margins": 0.05243431776762009, "rewards/rejected": -0.2139098197221756, "step": 80 }, { "epoch": 0.41, "grad_norm": 3.390625, "learning_rate": 3.218268922855452e-06, "log_odds_chosen": 0.3934742212295532, "log_odds_ratio": -0.598767101764679, "logits/chosen": -3.1895227432250977, "logits/rejected": -3.173553943634033, "logps/chosen": -0.8475399017333984, "logps/rejected": -1.0798330307006836, "loss": 0.6021, "nll_loss": 0.4789814054965973, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16950799524784088, "rewards/margins": 0.046458613127470016, "rewards/rejected": -0.2159666121006012, "step": 90 }, { "epoch": 0.45, "grad_norm": 2.921875, "learning_rate": 2.871768943064129e-06, "log_odds_chosen": 0.44187504053115845, "log_odds_ratio": -0.5802451372146606, "logits/chosen": -3.1786134243011475, "logits/rejected": -3.1717491149902344, "logps/chosen": -0.8232837915420532, "logps/rejected": -1.0571515560150146, "loss": 0.5994, "nll_loss": 0.49746760725975037, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16465675830841064, "rewards/margins": 0.046773575246334076, "rewards/rejected": -0.21143031120300293, "step": 100 }, { "epoch": 0.5, "grad_norm": 3.75, "learning_rate": 2.517769039603744e-06, "log_odds_chosen": 0.45674100518226624, "log_odds_ratio": -0.5514575242996216, "logits/chosen": -3.192737102508545, "logits/rejected": -3.182710647583008, "logps/chosen": -0.7476301789283752, "logps/rejected": -0.9913870692253113, "loss": 0.5818, "nll_loss": 0.4433298707008362, "rewards/accuracies": 0.75, "rewards/chosen": -0.14952602982521057, "rewards/margins": 0.04875140264630318, "rewards/rejected": -0.19827742874622345, "step": 110 }, { "epoch": 0.54, "grad_norm": 3.34375, "learning_rate": 2.163410670372652e-06, "log_odds_chosen": 0.4035864770412445, "log_odds_ratio": -0.6150745153427124, "logits/chosen": -3.193054676055908, "logits/rejected": -3.178786277770996, "logps/chosen": -0.816441535949707, "logps/rejected": -1.0266228914260864, "loss": 0.5753, "nll_loss": 0.44871068000793457, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16328832507133484, "rewards/margins": 0.042036257684230804, "rewards/rejected": -0.20532457530498505, "step": 120 }, { "epoch": 0.59, "grad_norm": 3.078125, "learning_rate": 1.8158425248197931e-06, "log_odds_chosen": 0.3830532431602478, "log_odds_ratio": -0.5974889993667603, "logits/chosen": -3.196091890335083, "logits/rejected": -3.1858651638031006, "logps/chosen": -0.8129725456237793, "logps/rejected": -1.0201164484024048, "loss": 0.5808, "nll_loss": 0.4634559154510498, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1625945270061493, "rewards/margins": 0.041428789496421814, "rewards/rejected": -0.20402328670024872, "step": 130 }, { "epoch": 0.63, "grad_norm": 3.375, "learning_rate": 1.482076309033254e-06, "log_odds_chosen": 0.6047395467758179, "log_odds_ratio": -0.5417571663856506, "logits/chosen": -3.196877956390381, "logits/rejected": -3.181436061859131, "logps/chosen": -0.7740004062652588, "logps/rejected": -1.068457007408142, "loss": 0.5764, "nll_loss": 0.5040202140808105, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15480007231235504, "rewards/margins": 0.058891307562589645, "rewards/rejected": -0.21369138360023499, "step": 140 }, { "epoch": 0.68, "grad_norm": 2.9375, "learning_rate": 1.1688452942784592e-06, "log_odds_chosen": 0.44741934537887573, "log_odds_ratio": -0.5705805420875549, "logits/chosen": -3.1841094493865967, "logits/rejected": -3.182647705078125, "logps/chosen": -0.8209434747695923, "logps/rejected": -1.067769169807434, "loss": 0.5718, "nll_loss": 0.4761679768562317, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1641887128353119, "rewards/margins": 0.04936511814594269, "rewards/rejected": -0.21355381608009338, "step": 150 }, { "epoch": 0.72, "grad_norm": 3.171875, "learning_rate": 8.824684825733865e-07, "log_odds_chosen": 0.3429742455482483, "log_odds_ratio": -0.6090758442878723, "logits/chosen": -3.207231044769287, "logits/rejected": -3.193406581878662, "logps/chosen": -0.8023210763931274, "logps/rejected": -0.9921668767929077, "loss": 0.5897, "nll_loss": 0.4722462594509125, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.16046421229839325, "rewards/margins": 0.03796914964914322, "rewards/rejected": -0.19843336939811707, "step": 160 }, { "epoch": 0.77, "grad_norm": 4.25, "learning_rate": 6.28723129572247e-07, "log_odds_chosen": 0.3038043677806854, "log_odds_ratio": -0.6364859342575073, "logits/chosen": -3.2201812267303467, "logits/rejected": -3.2117972373962402, "logps/chosen": -0.7899866104125977, "logps/rejected": -0.9470660090446472, "loss": 0.614, "nll_loss": 0.47518712282180786, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15799733996391296, "rewards/margins": 0.03141586109995842, "rewards/rejected": -0.18941320478916168, "step": 170 }, { "epoch": 0.81, "grad_norm": 4.0, "learning_rate": 4.127281964319446e-07, "log_odds_chosen": 0.42332348227500916, "log_odds_ratio": -0.5784670114517212, "logits/chosen": -3.194354295730591, "logits/rejected": -3.1853244304656982, "logps/chosen": -0.7676048278808594, "logps/rejected": -0.984086811542511, "loss": 0.5862, "nll_loss": 0.4467201232910156, "rewards/accuracies": 0.6875, "rewards/chosen": -0.15352095663547516, "rewards/margins": 0.04329640045762062, "rewards/rejected": -0.19681736826896667, "step": 180 }, { "epoch": 0.86, "grad_norm": 3.15625, "learning_rate": 2.388410818585263e-07, "log_odds_chosen": 0.47039803862571716, "log_odds_ratio": -0.5836545825004578, "logits/chosen": -3.205718994140625, "logits/rejected": -3.188554286956787, "logps/chosen": -0.7774848937988281, "logps/rejected": -1.0028108358383179, "loss": 0.5743, "nll_loss": 0.4325433671474457, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1554969847202301, "rewards/margins": 0.045065198093652725, "rewards/rejected": -0.20056216418743134, "step": 190 }, { "epoch": 0.9, "grad_norm": 2.9375, "learning_rate": 1.1056971762161584e-07, "log_odds_chosen": 0.2785649299621582, "log_odds_ratio": -0.643887460231781, "logits/chosen": -3.1912004947662354, "logits/rejected": -3.1815104484558105, "logps/chosen": -0.87229984998703, "logps/rejected": -1.022983193397522, "loss": 0.5796, "nll_loss": 0.4835774004459381, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.17445996403694153, "rewards/margins": 0.030136678367853165, "rewards/rejected": -0.20459666848182678, "step": 200 }, { "epoch": 0.95, "grad_norm": 3.03125, "learning_rate": 3.050180088809973e-08, "log_odds_chosen": 0.30176714062690735, "log_odds_ratio": -0.6279340386390686, "logits/chosen": -3.187382221221924, "logits/rejected": -3.172898054122925, "logps/chosen": -0.842138946056366, "logps/rejected": -1.0073583126068115, "loss": 0.5913, "nll_loss": 0.4963778555393219, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.16842779517173767, "rewards/margins": 0.03304388374090195, "rewards/rejected": -0.20147165656089783, "step": 210 }, { "epoch": 0.99, "grad_norm": 3.34375, "learning_rate": 2.525910147516131e-10, "log_odds_chosen": 0.4221881031990051, "log_odds_ratio": -0.5777419805526733, "logits/chosen": -3.206446886062622, "logits/rejected": -3.1952741146087646, "logps/chosen": -0.7956362962722778, "logps/rejected": -1.0130151510238647, "loss": 0.6099, "nll_loss": 0.4894731640815735, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15912725031375885, "rewards/margins": 0.04347577691078186, "rewards/rejected": -0.2026030272245407, "step": 220 }, { "epoch": 1.0, "step": 221, "total_flos": 0.0, "train_loss": 0.5939142045931579, "train_runtime": 4430.1575, "train_samples_per_second": 3.195, "train_steps_per_second": 0.05 } ], "logging_steps": 10, "max_steps": 221, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }