{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9865871833084947, "eval_steps": 500, "global_step": 501, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029806259314456036, "grad_norm": 1762.8857421875, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": -0.22333388030529022, "log_odds_ratio": -1.0081762075424194, "logits/chosen": 204.30679321289062, "logits/rejected": 202.9920654296875, "logps/chosen": -14.826652526855469, "logps/rejected": -14.603320121765137, "loss": 14.961, "nll_loss": 14.546102523803711, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.7413326501846313, "rewards/margins": -0.011166660115122795, "rewards/rejected": -0.7301660776138306, "step": 5 }, { "epoch": 0.05961251862891207, "grad_norm": 1195.5567626953125, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.25514093041419983, "log_odds_ratio": -0.770182192325592, "logits/chosen": 219.4593505859375, "logits/rejected": 223.51095581054688, "logps/chosen": -12.235333442687988, "logps/rejected": -12.489803314208984, "loss": 12.6124, "nll_loss": 12.337944984436035, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.6117666363716125, "rewards/margins": 0.012723559513688087, "rewards/rejected": -0.6244901418685913, "step": 10 }, { "epoch": 0.08941877794336811, "grad_norm": 721.7440185546875, "learning_rate": 7.5e-07, "log_odds_chosen": 0.04993244633078575, "log_odds_ratio": -0.7743036150932312, "logits/chosen": 281.7969055175781, "logits/rejected": 260.814453125, "logps/chosen": -7.967254638671875, "logps/rejected": -8.01715087890625, "loss": 8.2807, "nll_loss": 7.958427429199219, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3983627259731293, "rewards/margins": 0.0024948143400251865, "rewards/rejected": -0.4008575975894928, "step": 15 }, { "epoch": 0.11922503725782414, "grad_norm": 213.13336181640625, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": -0.1490481197834015, "log_odds_ratio": -0.95225590467453, "logits/chosen": 280.4493103027344, "logits/rejected": 274.66717529296875, "logps/chosen": -5.374236583709717, "logps/rejected": -5.226569175720215, "loss": 5.4432, "nll_loss": 5.450861930847168, "rewards/accuracies": 0.375, "rewards/chosen": -0.2687118351459503, "rewards/margins": -0.007383383810520172, "rewards/rejected": -0.26132842898368835, "step": 20 }, { "epoch": 0.14903129657228018, "grad_norm": 154.36373901367188, "learning_rate": 1.25e-06, "log_odds_chosen": -0.05349766090512276, "log_odds_ratio": -0.8921065330505371, "logits/chosen": 297.8148193359375, "logits/rejected": 307.04766845703125, "logps/chosen": -3.2826087474823, "logps/rejected": -3.2111122608184814, "loss": 3.5, "nll_loss": 3.3887104988098145, "rewards/accuracies": 0.5, "rewards/chosen": -0.16413041949272156, "rewards/margins": -0.0035748339723795652, "rewards/rejected": -0.16055560111999512, "step": 25 }, { "epoch": 0.17883755588673622, "grad_norm": 80.20259094238281, "learning_rate": 1.5e-06, "log_odds_chosen": -0.07229617983102798, "log_odds_ratio": -0.8916282653808594, "logits/chosen": 345.52191162109375, "logits/rejected": 374.13287353515625, "logps/chosen": -2.6274566650390625, "logps/rejected": -2.530172348022461, "loss": 2.5601, "nll_loss": 2.645339012145996, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1313728392124176, "rewards/margins": -0.004864226561039686, "rewards/rejected": -0.12650862336158752, "step": 30 }, { "epoch": 0.20864381520119224, "grad_norm": 41.495731353759766, "learning_rate": 1.75e-06, "log_odds_chosen": 0.1673038899898529, "log_odds_ratio": -0.7395197153091431, "logits/chosen": 379.2995300292969, "logits/rejected": 367.61065673828125, "logps/chosen": -1.7991399765014648, "logps/rejected": -1.9078947305679321, "loss": 2.1231, "nll_loss": 1.9985812902450562, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08995698392391205, "rewards/margins": 0.0054377405904233456, "rewards/rejected": -0.09539473056793213, "step": 35 }, { "epoch": 0.23845007451564829, "grad_norm": 57.26367950439453, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.02127310074865818, "log_odds_ratio": -0.7780741453170776, "logits/chosen": 371.747802734375, "logits/rejected": 370.3223571777344, "logps/chosen": -1.6784114837646484, "logps/rejected": -1.6915397644042969, "loss": 1.9474, "nll_loss": 2.0377304553985596, "rewards/accuracies": 0.5, "rewards/chosen": -0.08392057567834854, "rewards/margins": 0.0006564242066815495, "rewards/rejected": -0.08457700163125992, "step": 40 }, { "epoch": 0.26825633383010433, "grad_norm": 48.953094482421875, "learning_rate": 2.25e-06, "log_odds_chosen": 0.06037778779864311, "log_odds_ratio": -0.7294493317604065, "logits/chosen": 385.0721740722656, "logits/rejected": 395.3931884765625, "logps/chosen": -1.5469728708267212, "logps/rejected": -1.5890170335769653, "loss": 1.8679, "nll_loss": 1.742649793624878, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07734864205121994, "rewards/margins": 0.0021022059954702854, "rewards/rejected": -0.07945084571838379, "step": 45 }, { "epoch": 0.29806259314456035, "grad_norm": 85.16621398925781, "learning_rate": 2.5e-06, "log_odds_chosen": 0.22148697078227997, "log_odds_ratio": -0.6563897728919983, "logits/chosen": 395.87554931640625, "logits/rejected": 417.33563232421875, "logps/chosen": -1.4042726755142212, "logps/rejected": -1.5677330493927002, "loss": 1.8511, "nll_loss": 1.8633716106414795, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07021363079547882, "rewards/margins": 0.008173028007149696, "rewards/rejected": -0.07838664948940277, "step": 50 }, { "epoch": 0.32786885245901637, "grad_norm": 36.78052520751953, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.04750330001115799, "log_odds_ratio": -0.7403008341789246, "logits/chosen": 383.05865478515625, "logits/rejected": 376.47137451171875, "logps/chosen": -1.4311497211456299, "logps/rejected": -1.4584500789642334, "loss": 1.8524, "nll_loss": 1.9031813144683838, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07155750691890717, "rewards/margins": 0.0013650130713358521, "rewards/rejected": -0.07292251288890839, "step": 55 }, { "epoch": 0.35767511177347244, "grad_norm": 43.11362838745117, "learning_rate": 3e-06, "log_odds_chosen": 0.15154634416103363, "log_odds_ratio": -0.6628466844558716, "logits/chosen": 388.72491455078125, "logits/rejected": 380.75030517578125, "logps/chosen": -1.324789047241211, "logps/rejected": -1.4295395612716675, "loss": 1.6907, "nll_loss": 1.754913568496704, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06623945385217667, "rewards/margins": 0.005237526725977659, "rewards/rejected": -0.07147698104381561, "step": 60 }, { "epoch": 0.38748137108792846, "grad_norm": 29.449420928955078, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 0.0873890295624733, "log_odds_ratio": -0.710555911064148, "logits/chosen": 387.2967834472656, "logits/rejected": 388.5743103027344, "logps/chosen": -1.249342679977417, "logps/rejected": -1.2920448780059814, "loss": 1.5953, "nll_loss": 1.5086474418640137, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06246713548898697, "rewards/margins": 0.0021351135801523924, "rewards/rejected": -0.06460224092006683, "step": 65 }, { "epoch": 0.4172876304023845, "grad_norm": 66.8738784790039, "learning_rate": 3.5e-06, "log_odds_chosen": 0.049095284193754196, "log_odds_ratio": -0.7218947410583496, "logits/chosen": 375.4095153808594, "logits/rejected": 383.84027099609375, "logps/chosen": -1.3798081874847412, "logps/rejected": -1.4165852069854736, "loss": 1.632, "nll_loss": 1.642600655555725, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06899039447307587, "rewards/margins": 0.001838861615397036, "rewards/rejected": -0.07082925736904144, "step": 70 }, { "epoch": 0.44709388971684055, "grad_norm": 24.510610580444336, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.21395280957221985, "log_odds_ratio": -0.6359378099441528, "logits/chosen": 395.4688415527344, "logits/rejected": 382.9261169433594, "logps/chosen": -1.1935937404632568, "logps/rejected": -1.337820291519165, "loss": 1.5629, "nll_loss": 1.5003348588943481, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05967969447374344, "rewards/margins": 0.00721132755279541, "rewards/rejected": -0.06689102202653885, "step": 75 }, { "epoch": 0.47690014903129657, "grad_norm": 30.089900970458984, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.20370396971702576, "log_odds_ratio": -0.6502530574798584, "logits/chosen": 382.20904541015625, "logits/rejected": 403.7727355957031, "logps/chosen": -1.17880117893219, "logps/rejected": -1.3107407093048096, "loss": 1.5995, "nll_loss": 1.6122217178344727, "rewards/accuracies": 0.625, "rewards/chosen": -0.058940064162015915, "rewards/margins": 0.006596976425498724, "rewards/rejected": -0.06553704291582108, "step": 80 }, { "epoch": 0.5067064083457526, "grad_norm": 165.75381469726562, "learning_rate": 4.25e-06, "log_odds_chosen": 0.07357416301965714, "log_odds_ratio": -0.8076593279838562, "logits/chosen": 408.95843505859375, "logits/rejected": 394.03826904296875, "logps/chosen": -1.4526355266571045, "logps/rejected": -1.4595062732696533, "loss": 1.6746, "nll_loss": 1.7690614461898804, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07263178378343582, "rewards/margins": 0.0003435421676840633, "rewards/rejected": -0.07297532260417938, "step": 85 }, { "epoch": 0.5365126676602087, "grad_norm": 45.735618591308594, "learning_rate": 4.5e-06, "log_odds_chosen": 0.5337249040603638, "log_odds_ratio": -0.5693989396095276, "logits/chosen": 402.0947570800781, "logits/rejected": 416.75689697265625, "logps/chosen": -1.3862842321395874, "logps/rejected": -1.796555757522583, "loss": 1.5211, "nll_loss": 1.5622494220733643, "rewards/accuracies": 0.625, "rewards/chosen": -0.06931421905755997, "rewards/margins": 0.020513568073511124, "rewards/rejected": -0.0898277759552002, "step": 90 }, { "epoch": 0.5663189269746647, "grad_norm": 43.20003890991211, "learning_rate": 4.75e-06, "log_odds_chosen": 0.18776021897792816, "log_odds_ratio": -0.6678361892700195, "logits/chosen": 367.4861145019531, "logits/rejected": 380.6282958984375, "logps/chosen": -1.1577775478363037, "logps/rejected": -1.240468978881836, "loss": 1.5718, "nll_loss": 1.4726136922836304, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05788888409733772, "rewards/margins": 0.004134564660489559, "rewards/rejected": -0.06202344968914986, "step": 95 }, { "epoch": 0.5961251862891207, "grad_norm": 48.09437561035156, "learning_rate": 5e-06, "log_odds_chosen": 0.23021917045116425, "log_odds_ratio": -0.6669245958328247, "logits/chosen": 398.15692138671875, "logits/rejected": 436.06280517578125, "logps/chosen": -1.3762584924697876, "logps/rejected": -1.5756226778030396, "loss": 1.6621, "nll_loss": 1.676337480545044, "rewards/accuracies": 0.625, "rewards/chosen": -0.06881292164325714, "rewards/margins": 0.009968215599656105, "rewards/rejected": -0.0787811428308487, "step": 100 }, { "epoch": 0.6259314456035767, "grad_norm": 27.461023330688477, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 0.25321143865585327, "log_odds_ratio": -0.6335381269454956, "logits/chosen": 394.9198303222656, "logits/rejected": 407.670166015625, "logps/chosen": -1.1359978914260864, "logps/rejected": -1.282949686050415, "loss": 1.5569, "nll_loss": 1.5841158628463745, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05679989606142044, "rewards/margins": 0.007347588427364826, "rewards/rejected": -0.0641474798321724, "step": 105 }, { "epoch": 0.6557377049180327, "grad_norm": 58.5862922668457, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.17413778603076935, "log_odds_ratio": -0.6657994985580444, "logits/chosen": 372.2387390136719, "logits/rejected": 370.97259521484375, "logps/chosen": -1.1112958192825317, "logps/rejected": -1.2337472438812256, "loss": 1.5196, "nll_loss": 1.5138860940933228, "rewards/accuracies": 0.625, "rewards/chosen": -0.05556480213999748, "rewards/margins": 0.00612256396561861, "rewards/rejected": -0.06168735772371292, "step": 110 }, { "epoch": 0.6855439642324889, "grad_norm": 25.225566864013672, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.2932291030883789, "log_odds_ratio": -0.6261448264122009, "logits/chosen": 398.36285400390625, "logits/rejected": 405.1409912109375, "logps/chosen": -0.9624778032302856, "logps/rejected": -1.100894570350647, "loss": 1.4976, "nll_loss": 1.4066407680511475, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04812389984726906, "rewards/margins": 0.0069208345375955105, "rewards/rejected": -0.05504472926259041, "step": 115 }, { "epoch": 0.7153502235469449, "grad_norm": 25.138811111450195, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.30031102895736694, "log_odds_ratio": -0.6141648292541504, "logits/chosen": 381.42999267578125, "logits/rejected": 381.4985656738281, "logps/chosen": -1.05239999294281, "logps/rejected": -1.2082456350326538, "loss": 1.5521, "nll_loss": 1.5355098247528076, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05262000486254692, "rewards/margins": 0.007792273070663214, "rewards/rejected": -0.06041227653622627, "step": 120 }, { "epoch": 0.7451564828614009, "grad_norm": 19.848705291748047, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 0.05417771264910698, "log_odds_ratio": -0.7723890542984009, "logits/chosen": 375.4615173339844, "logits/rejected": 388.3155517578125, "logps/chosen": -1.1864535808563232, "logps/rejected": -1.1864855289459229, "loss": 1.4682, "nll_loss": 1.473937749862671, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05932268500328064, "rewards/margins": 1.5988014183676569e-06, "rewards/rejected": -0.05932428315281868, "step": 125 }, { "epoch": 0.7749627421758569, "grad_norm": 30.878917694091797, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.1284504234790802, "log_odds_ratio": -0.6890888214111328, "logits/chosen": 400.09014892578125, "logits/rejected": 389.0010070800781, "logps/chosen": -1.1370112895965576, "logps/rejected": -1.1725587844848633, "loss": 1.5141, "nll_loss": 1.4747650623321533, "rewards/accuracies": 0.5, "rewards/chosen": -0.05685057118535042, "rewards/margins": 0.0017773698782548308, "rewards/rejected": -0.05862794071435928, "step": 130 }, { "epoch": 0.8047690014903129, "grad_norm": 34.69911575317383, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 0.07419878244400024, "log_odds_ratio": -0.7176602482795715, "logits/chosen": 412.095703125, "logits/rejected": 414.66827392578125, "logps/chosen": -1.1232882738113403, "logps/rejected": -1.1864019632339478, "loss": 1.5359, "nll_loss": 1.5837700366973877, "rewards/accuracies": 0.5, "rewards/chosen": -0.05616441369056702, "rewards/margins": 0.0031556878238916397, "rewards/rejected": -0.059320103377103806, "step": 135 }, { "epoch": 0.834575260804769, "grad_norm": 33.93345642089844, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": -0.04845789074897766, "log_odds_ratio": -0.7893471121788025, "logits/chosen": 398.22607421875, "logits/rejected": 404.393798828125, "logps/chosen": -1.119332194328308, "logps/rejected": -1.0812653303146362, "loss": 1.5122, "nll_loss": 1.6213722229003906, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.05596661567687988, "rewards/margins": -0.0019033461576327682, "rewards/rejected": -0.05406326800584793, "step": 140 }, { "epoch": 0.8643815201192251, "grad_norm": 22.562604904174805, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": -0.06688841432332993, "log_odds_ratio": -0.7556332349777222, "logits/chosen": 395.27984619140625, "logits/rejected": 398.4122009277344, "logps/chosen": -1.2002326250076294, "logps/rejected": -1.1435927152633667, "loss": 1.5121, "nll_loss": 1.514585256576538, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.06001163646578789, "rewards/margins": -0.0028319929260760546, "rewards/rejected": -0.05717964097857475, "step": 145 }, { "epoch": 0.8941877794336811, "grad_norm": 38.268333435058594, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.3597918152809143, "log_odds_ratio": -0.5650432705879211, "logits/chosen": 401.6814270019531, "logits/rejected": 418.9139709472656, "logps/chosen": -1.0605757236480713, "logps/rejected": -1.296025037765503, "loss": 1.4755, "nll_loss": 1.387669324874878, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.053028784692287445, "rewards/margins": 0.011772466823458672, "rewards/rejected": -0.06480124592781067, "step": 150 }, { "epoch": 0.9239940387481371, "grad_norm": 35.649044036865234, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 0.11360454559326172, "log_odds_ratio": -0.6917680501937866, "logits/chosen": 380.48785400390625, "logits/rejected": 395.10772705078125, "logps/chosen": -1.1738497018814087, "logps/rejected": -1.2541792392730713, "loss": 1.4352, "nll_loss": 1.3315799236297607, "rewards/accuracies": 0.625, "rewards/chosen": -0.058692485094070435, "rewards/margins": 0.004016467835754156, "rewards/rejected": -0.06270895153284073, "step": 155 }, { "epoch": 0.9538002980625931, "grad_norm": 37.8629035949707, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.04191911593079567, "log_odds_ratio": -0.7673999071121216, "logits/chosen": 384.6130065917969, "logits/rejected": 430.66485595703125, "logps/chosen": -1.0005159378051758, "logps/rejected": -1.0551975965499878, "loss": 1.408, "nll_loss": 1.3416965007781982, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.05002579838037491, "rewards/margins": 0.00273408112116158, "rewards/rejected": -0.05275987833738327, "step": 160 }, { "epoch": 0.9836065573770492, "grad_norm": 19.95792007446289, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": 0.05066202953457832, "log_odds_ratio": -0.7182776927947998, "logits/chosen": 395.8006591796875, "logits/rejected": 408.99554443359375, "logps/chosen": -1.0879595279693604, "logps/rejected": -1.125816822052002, "loss": 1.436, "nll_loss": 1.3948609828948975, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.05439797043800354, "rewards/margins": 0.0018928736681118608, "rewards/rejected": -0.056290846318006516, "step": 165 }, { "epoch": 0.9955290611028316, "eval_log_odds_chosen": 0.1983117312192917, "eval_log_odds_ratio": -0.6895310282707214, "eval_logits/chosen": 318.3812255859375, "eval_logits/rejected": 288.9291687011719, "eval_logps/chosen": -1.0157941579818726, "eval_logps/rejected": -1.1419692039489746, "eval_loss": 1.467863917350769, "eval_nll_loss": 1.4121437072753906, "eval_rewards/accuracies": 0.5467625856399536, "eval_rewards/chosen": -0.05078971013426781, "eval_rewards/margins": 0.006308753043413162, "eval_rewards/rejected": -0.05709846317768097, "eval_runtime": 112.1639, "eval_samples_per_second": 4.93, "eval_steps_per_second": 1.239, "step": 167 }, { "epoch": 1.0134128166915053, "grad_norm": 16.564281463623047, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.39181432127952576, "log_odds_ratio": -0.5932676196098328, "logits/chosen": 378.3958435058594, "logits/rejected": 403.1106262207031, "logps/chosen": -0.9357401132583618, "logps/rejected": -1.1598111391067505, "loss": 1.2992, "nll_loss": 1.1567914485931396, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04678700119256973, "rewards/margins": 0.011203557252883911, "rewards/rejected": -0.05799056217074394, "step": 170 }, { "epoch": 1.0432190760059612, "grad_norm": 24.491374969482422, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": 0.8750826120376587, "log_odds_ratio": -0.42914777994155884, "logits/chosen": 358.5318603515625, "logits/rejected": 399.3114929199219, "logps/chosen": -0.6476485133171082, "logps/rejected": -1.1458537578582764, "loss": 1.0769, "nll_loss": 1.1138975620269775, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03238242492079735, "rewards/margins": 0.02491025999188423, "rewards/rejected": -0.05729268863797188, "step": 175 }, { "epoch": 1.0730253353204173, "grad_norm": 22.750883102416992, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.8333228826522827, "log_odds_ratio": -0.43526044487953186, "logits/chosen": 354.4723205566406, "logits/rejected": 329.74591064453125, "logps/chosen": -0.789750874042511, "logps/rejected": -1.287760853767395, "loss": 1.132, "nll_loss": 1.2151093482971191, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03948754072189331, "rewards/margins": 0.02490049973130226, "rewards/rejected": -0.06438804417848587, "step": 180 }, { "epoch": 1.1028315946348732, "grad_norm": 20.229358673095703, "learning_rate": 3.6760731104690393e-06, "log_odds_chosen": 1.0057324171066284, "log_odds_ratio": -0.3837296664714813, "logits/chosen": 384.34808349609375, "logits/rejected": 376.38800048828125, "logps/chosen": -0.6548343896865845, "logps/rejected": -1.1811447143554688, "loss": 1.0221, "nll_loss": 0.9857061505317688, "rewards/accuracies": 0.875, "rewards/chosen": -0.032741717994213104, "rewards/margins": 0.026315515860915184, "rewards/rejected": -0.05905723571777344, "step": 185 }, { "epoch": 1.1326378539493294, "grad_norm": 18.751834869384766, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.6209810972213745, "log_odds_ratio": -0.5106909275054932, "logits/chosen": 358.50823974609375, "logits/rejected": 404.4180603027344, "logps/chosen": -0.7595417499542236, "logps/rejected": -1.1261508464813232, "loss": 1.0914, "nll_loss": 1.0129649639129639, "rewards/accuracies": 0.75, "rewards/chosen": -0.03797708824276924, "rewards/margins": 0.018330451101064682, "rewards/rejected": -0.05630754306912422, "step": 190 }, { "epoch": 1.1624441132637853, "grad_norm": 20.339866638183594, "learning_rate": 3.5805743701971648e-06, "log_odds_chosen": 0.8648549914360046, "log_odds_ratio": -0.40149006247520447, "logits/chosen": 381.13031005859375, "logits/rejected": 395.5570983886719, "logps/chosen": -0.8033710718154907, "logps/rejected": -1.2736122608184814, "loss": 1.1227, "nll_loss": 1.1343204975128174, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.04016854614019394, "rewards/margins": 0.02351205423474312, "rewards/rejected": -0.06368060410022736, "step": 195 }, { "epoch": 1.1922503725782414, "grad_norm": 20.895063400268555, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 0.9302545785903931, "log_odds_ratio": -0.4023068845272064, "logits/chosen": 408.6002197265625, "logits/rejected": 393.536865234375, "logps/chosen": -0.7376815676689148, "logps/rejected": -1.2836555242538452, "loss": 1.0834, "nll_loss": 1.019555687904358, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03688408061861992, "rewards/margins": 0.02729869820177555, "rewards/rejected": -0.06418277323246002, "step": 200 }, { "epoch": 1.2220566318926975, "grad_norm": 21.968069076538086, "learning_rate": 3.4921514788478916e-06, "log_odds_chosen": 1.1145693063735962, "log_odds_ratio": -0.38622182607650757, "logits/chosen": 364.79913330078125, "logits/rejected": 359.30718994140625, "logps/chosen": -0.6945966482162476, "logps/rejected": -1.2616204023361206, "loss": 1.0621, "nll_loss": 1.079245686531067, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.03472983092069626, "rewards/margins": 0.028351187705993652, "rewards/rejected": -0.06308101862668991, "step": 205 }, { "epoch": 1.2518628912071534, "grad_norm": 19.83363914489746, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 1.1763904094696045, "log_odds_ratio": -0.34168320894241333, "logits/chosen": 371.95068359375, "logits/rejected": 400.94305419921875, "logps/chosen": -0.6090874075889587, "logps/rejected": -1.2537710666656494, "loss": 1.0413, "nll_loss": 0.9631906747817993, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.030454367399215698, "rewards/margins": 0.03223418444395065, "rewards/rejected": -0.06268856674432755, "step": 210 }, { "epoch": 1.2816691505216096, "grad_norm": 22.73797035217285, "learning_rate": 3.409971697352368e-06, "log_odds_chosen": 1.0536540746688843, "log_odds_ratio": -0.3665863871574402, "logits/chosen": 392.6047058105469, "logits/rejected": 377.4068603515625, "logps/chosen": -0.7370086908340454, "logps/rejected": -1.3404157161712646, "loss": 1.0487, "nll_loss": 1.0565564632415771, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.03685043752193451, "rewards/margins": 0.030170351266860962, "rewards/rejected": -0.06702078878879547, "step": 215 }, { "epoch": 1.3114754098360657, "grad_norm": 13.09876537322998, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.7300616502761841, "log_odds_ratio": -0.4766615033149719, "logits/chosen": 384.1726989746094, "logits/rejected": 378.66851806640625, "logps/chosen": -0.7808234691619873, "logps/rejected": -1.1460365056991577, "loss": 1.0819, "nll_loss": 1.038731336593628, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0390411801636219, "rewards/margins": 0.0182606503367424, "rewards/rejected": -0.057301826775074005, "step": 220 }, { "epoch": 1.3412816691505216, "grad_norm": 29.453706741333008, "learning_rate": 3.3333333333333333e-06, "log_odds_chosen": 0.4621034562587738, "log_odds_ratio": -0.5440367460250854, "logits/chosen": 385.5031433105469, "logits/rejected": 378.17987060546875, "logps/chosen": -0.8730419278144836, "logps/rejected": -1.139762043952942, "loss": 1.0496, "nll_loss": 1.1089845895767212, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04365209490060806, "rewards/margins": 0.01333601027727127, "rewards/rejected": -0.056988101452589035, "step": 225 }, { "epoch": 1.3710879284649775, "grad_norm": 24.137882232666016, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 1.0880992412567139, "log_odds_ratio": -0.37469881772994995, "logits/chosen": 356.7733459472656, "logits/rejected": 376.2106628417969, "logps/chosen": -0.6375613808631897, "logps/rejected": -1.2090116739273071, "loss": 1.0368, "nll_loss": 0.927442729473114, "rewards/accuracies": 0.875, "rewards/chosen": -0.031878065317869186, "rewards/margins": 0.02857252024114132, "rewards/rejected": -0.06045059114694595, "step": 230 }, { "epoch": 1.4008941877794336, "grad_norm": 19.043012619018555, "learning_rate": 3.2616403652672114e-06, "log_odds_chosen": 1.1069047451019287, "log_odds_ratio": -0.39715421199798584, "logits/chosen": 377.45684814453125, "logits/rejected": 391.23175048828125, "logps/chosen": -0.6500628590583801, "logps/rejected": -1.3308535814285278, "loss": 1.0109, "nll_loss": 0.9406328201293945, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.032503142952919006, "rewards/margins": 0.034039538353681564, "rewards/rejected": -0.06654268503189087, "step": 235 }, { "epoch": 1.4307004470938898, "grad_norm": 18.564252853393555, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.8188554048538208, "log_odds_ratio": -0.4366012513637543, "logits/chosen": 400.0711364746094, "logits/rejected": 406.6979675292969, "logps/chosen": -0.7228484153747559, "logps/rejected": -1.1837232112884521, "loss": 1.0716, "nll_loss": 1.032801866531372, "rewards/accuracies": 0.875, "rewards/chosen": -0.03614242747426033, "rewards/margins": 0.023043744266033173, "rewards/rejected": -0.05918616056442261, "step": 240 }, { "epoch": 1.4605067064083457, "grad_norm": 13.215555191040039, "learning_rate": 3.1943828249997e-06, "log_odds_chosen": 0.9353200793266296, "log_odds_ratio": -0.4173661172389984, "logits/chosen": 397.68170166015625, "logits/rejected": 386.11883544921875, "logps/chosen": -0.6454007029533386, "logps/rejected": -1.1329607963562012, "loss": 1.0931, "nll_loss": 1.0978925228118896, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03227003663778305, "rewards/margins": 0.024378007277846336, "rewards/rejected": -0.056648045778274536, "step": 245 }, { "epoch": 1.4903129657228018, "grad_norm": 15.847436904907227, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 1.0629552602767944, "log_odds_ratio": -0.4346255660057068, "logits/chosen": 370.0399475097656, "logits/rejected": 377.7971496582031, "logps/chosen": -0.6677332520484924, "logps/rejected": -1.2528654336929321, "loss": 0.9948, "nll_loss": 0.9116696119308472, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03338665887713432, "rewards/margins": 0.029256608337163925, "rewards/rejected": -0.06264327466487885, "step": 250 }, { "epoch": 1.520119225037258, "grad_norm": 20.606616973876953, "learning_rate": 3.131121455425748e-06, "log_odds_chosen": 1.0881011486053467, "log_odds_ratio": -0.33976244926452637, "logits/chosen": 390.563720703125, "logits/rejected": 393.47064208984375, "logps/chosen": -0.6047049760818481, "logps/rejected": -1.1917129755020142, "loss": 1.0504, "nll_loss": 0.9429427981376648, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.030235249549150467, "rewards/margins": 0.0293504036962986, "rewards/rejected": -0.05958564952015877, "step": 255 }, { "epoch": 1.5499254843517138, "grad_norm": 35.40441131591797, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.8506741523742676, "log_odds_ratio": -0.4449694752693176, "logits/chosen": 372.16888427734375, "logits/rejected": 413.76153564453125, "logps/chosen": -0.8014513254165649, "logps/rejected": -1.3543529510498047, "loss": 1.0248, "nll_loss": 1.0251777172088623, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.040072567760944366, "rewards/margins": 0.027645081281661987, "rewards/rejected": -0.06771764904260635, "step": 260 }, { "epoch": 1.5797317436661698, "grad_norm": 13.316988945007324, "learning_rate": 3.0714755841697565e-06, "log_odds_chosen": 1.0472757816314697, "log_odds_ratio": -0.4307102560997009, "logits/chosen": 383.9051513671875, "logits/rejected": 406.1117248535156, "logps/chosen": -0.6818675398826599, "logps/rejected": -1.2686574459075928, "loss": 1.1204, "nll_loss": 1.0089762210845947, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.034093379974365234, "rewards/margins": 0.029339497908949852, "rewards/rejected": -0.06343287974596024, "step": 265 }, { "epoch": 1.6095380029806259, "grad_norm": 17.495107650756836, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.9306485056877136, "log_odds_ratio": -0.4013773798942566, "logits/chosen": 370.3818054199219, "logits/rejected": 381.3802490234375, "logps/chosen": -0.7612948417663574, "logps/rejected": -1.283376932144165, "loss": 1.0864, "nll_loss": 1.1147105693817139, "rewards/accuracies": 0.875, "rewards/chosen": -0.03806474059820175, "rewards/margins": 0.026104098185896873, "rewards/rejected": -0.06416884064674377, "step": 270 }, { "epoch": 1.639344262295082, "grad_norm": 14.062923431396484, "learning_rate": 3.0151134457776365e-06, "log_odds_chosen": 0.8347261548042297, "log_odds_ratio": -0.4390513002872467, "logits/chosen": 361.4908752441406, "logits/rejected": 350.319091796875, "logps/chosen": -0.6371272802352905, "logps/rejected": -1.0568915605545044, "loss": 1.0712, "nll_loss": 0.9875114560127258, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.031856365501880646, "rewards/margins": 0.020988214761018753, "rewards/rejected": -0.0528445765376091, "step": 275 }, { "epoch": 1.669150521609538, "grad_norm": 14.235246658325195, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.8683498501777649, "log_odds_ratio": -0.5000298619270325, "logits/chosen": 403.158935546875, "logits/rejected": 391.2458190917969, "logps/chosen": -0.6794577240943909, "logps/rejected": -1.190443754196167, "loss": 1.0475, "nll_loss": 1.049759864807129, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.033972885459661484, "rewards/margins": 0.025549303740262985, "rewards/rejected": -0.05952219292521477, "step": 280 }, { "epoch": 1.698956780923994, "grad_norm": 14.518519401550293, "learning_rate": 2.961744388795462e-06, "log_odds_chosen": 0.9579475522041321, "log_odds_ratio": -0.3945266008377075, "logits/chosen": 368.3428649902344, "logits/rejected": 374.80645751953125, "logps/chosen": -0.6118819117546082, "logps/rejected": -1.1229194402694702, "loss": 0.9917, "nll_loss": 0.9298090934753418, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.030594095587730408, "rewards/margins": 0.025551876053214073, "rewards/rejected": -0.05614597350358963, "step": 285 }, { "epoch": 1.7287630402384502, "grad_norm": 16.039731979370117, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.8852699398994446, "log_odds_ratio": -0.41907158493995667, "logits/chosen": 385.8910217285156, "logits/rejected": 422.97454833984375, "logps/chosen": -0.8015801310539246, "logps/rejected": -1.3009235858917236, "loss": 1.0643, "nll_loss": 1.0100016593933105, "rewards/accuracies": 0.875, "rewards/chosen": -0.04007900878787041, "rewards/margins": 0.02496717870235443, "rewards/rejected": -0.06504618376493454, "step": 290 }, { "epoch": 1.758569299552906, "grad_norm": 17.417320251464844, "learning_rate": 2.9111125486979104e-06, "log_odds_chosen": 0.8097723722457886, "log_odds_ratio": -0.4489704966545105, "logits/chosen": 363.5550231933594, "logits/rejected": 407.45367431640625, "logps/chosen": -0.7277875542640686, "logps/rejected": -1.1767876148223877, "loss": 1.0644, "nll_loss": 1.0175808668136597, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03638937696814537, "rewards/margins": 0.022450000047683716, "rewards/rejected": -0.058839380741119385, "step": 295 }, { "epoch": 1.788375558867362, "grad_norm": 22.727943420410156, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 1.2782224416732788, "log_odds_ratio": -0.3165340721607208, "logits/chosen": 403.18780517578125, "logits/rejected": 379.86224365234375, "logps/chosen": -0.6022372245788574, "logps/rejected": -1.2621891498565674, "loss": 1.0012, "nll_loss": 0.9228881597518921, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03011186420917511, "rewards/margins": 0.032997600734233856, "rewards/rejected": -0.06310946494340897, "step": 300 }, { "epoch": 1.8181818181818183, "grad_norm": 13.393155097961426, "learning_rate": 2.862991671569341e-06, "log_odds_chosen": 0.5560621619224548, "log_odds_ratio": -0.5250486135482788, "logits/chosen": 394.03631591796875, "logits/rejected": 403.3617858886719, "logps/chosen": -0.9106165170669556, "logps/rejected": -1.2179043292999268, "loss": 1.0386, "nll_loss": 1.1626732349395752, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.045530833303928375, "rewards/margins": 0.015364391729235649, "rewards/rejected": -0.060895223170518875, "step": 305 }, { "epoch": 1.8479880774962743, "grad_norm": 14.096085548400879, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 1.0126060247421265, "log_odds_ratio": -0.4341171383857727, "logits/chosen": 378.22705078125, "logits/rejected": 388.7279357910156, "logps/chosen": -0.6974117159843445, "logps/rejected": -1.3275178670883179, "loss": 1.0991, "nll_loss": 1.0783545970916748, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.034870583564043045, "rewards/margins": 0.03150530904531479, "rewards/rejected": -0.06637589633464813, "step": 310 }, { "epoch": 1.8777943368107302, "grad_norm": 15.438323974609375, "learning_rate": 2.817180849095055e-06, "log_odds_chosen": 0.4888283610343933, "log_odds_ratio": -0.5892666578292847, "logits/chosen": 354.91192626953125, "logits/rejected": 373.19049072265625, "logps/chosen": -1.0054099559783936, "logps/rejected": -1.3448001146316528, "loss": 1.0997, "nll_loss": 1.2546958923339844, "rewards/accuracies": 0.625, "rewards/chosen": -0.05027049034833908, "rewards/margins": 0.01696951314806938, "rewards/rejected": -0.06724000722169876, "step": 315 }, { "epoch": 1.9076005961251863, "grad_norm": 15.382440567016602, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 1.0956491231918335, "log_odds_ratio": -0.3748942017555237, "logits/chosen": 376.21466064453125, "logits/rejected": 396.38897705078125, "logps/chosen": -0.6471365690231323, "logps/rejected": -1.257728934288025, "loss": 0.986, "nll_loss": 0.9363555908203125, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.032356828451156616, "rewards/margins": 0.03052961453795433, "rewards/rejected": -0.06288645416498184, "step": 320 }, { "epoch": 1.9374068554396424, "grad_norm": 36.433021545410156, "learning_rate": 2.773500981126146e-06, "log_odds_chosen": 1.154837965965271, "log_odds_ratio": -0.362586110830307, "logits/chosen": 373.2748107910156, "logits/rejected": 404.8694152832031, "logps/chosen": -0.705539882183075, "logps/rejected": -1.3716325759887695, "loss": 1.0139, "nll_loss": 0.9342381358146667, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.03527699410915375, "rewards/margins": 0.03330463916063309, "rewards/rejected": -0.06858162581920624, "step": 325 }, { "epoch": 1.9672131147540983, "grad_norm": 20.0263671875, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.8623636960983276, "log_odds_ratio": -0.414236456155777, "logits/chosen": 370.912841796875, "logits/rejected": 377.9576721191406, "logps/chosen": -0.7194432020187378, "logps/rejected": -1.2195098400115967, "loss": 1.0256, "nll_loss": 0.8793627023696899, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03597215935587883, "rewards/margins": 0.025003332644701004, "rewards/rejected": -0.060975492000579834, "step": 330 }, { "epoch": 1.9970193740685542, "grad_norm": 24.618507385253906, "learning_rate": 2.7317918235407652e-06, "log_odds_chosen": 0.5057398080825806, "log_odds_ratio": -0.5592184662818909, "logits/chosen": 395.17340087890625, "logits/rejected": 387.1885986328125, "logps/chosen": -0.9086158871650696, "logps/rejected": -1.1841217279434204, "loss": 1.1098, "nll_loss": 1.2389247417449951, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04543079435825348, "rewards/margins": 0.013775287196040154, "rewards/rejected": -0.05920607969164848, "step": 335 }, { "epoch": 1.9970193740685542, "eval_log_odds_chosen": 0.21048486232757568, "eval_log_odds_ratio": -0.7227855920791626, "eval_logits/chosen": 315.02960205078125, "eval_logits/rejected": 286.43115234375, "eval_logps/chosen": -1.0353137254714966, "eval_logps/rejected": -1.1580623388290405, "eval_loss": 1.4451346397399902, "eval_nll_loss": 1.3838590383529663, "eval_rewards/accuracies": 0.5467625856399536, "eval_rewards/chosen": -0.05176568776369095, "eval_rewards/margins": 0.006137436721473932, "eval_rewards/rejected": -0.05790312588214874, "eval_runtime": 112.1251, "eval_samples_per_second": 4.932, "eval_steps_per_second": 1.24, "step": 335 }, { "epoch": 2.0268256333830106, "grad_norm": 17.44247817993164, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 1.980719804763794, "log_odds_ratio": -0.21638807654380798, "logits/chosen": 392.9175109863281, "logits/rejected": 369.302490234375, "logps/chosen": -0.39937111735343933, "logps/rejected": -1.395355224609375, "loss": 0.6343, "nll_loss": 0.7234522700309753, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.019968556240200996, "rewards/margins": 0.04979920759797096, "rewards/rejected": -0.06976776570081711, "step": 340 }, { "epoch": 2.0566318926974665, "grad_norm": 11.428772926330566, "learning_rate": 2.691909510290828e-06, "log_odds_chosen": 2.5441951751708984, "log_odds_ratio": -0.12063421308994293, "logits/chosen": 354.2935485839844, "logits/rejected": 359.0185852050781, "logps/chosen": -0.3628384470939636, "logps/rejected": -1.6579961776733398, "loss": 0.5571, "nll_loss": 0.5666171312332153, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.01814192347228527, "rewards/margins": 0.06475789844989777, "rewards/rejected": -0.08289982378482819, "step": 345 }, { "epoch": 2.0864381520119224, "grad_norm": 13.283677101135254, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 2.592142343521118, "log_odds_ratio": -0.11488159000873566, "logits/chosen": 353.8732604980469, "logits/rejected": 388.585693359375, "logps/chosen": -0.3672012686729431, "logps/rejected": -1.8615690469741821, "loss": 0.5687, "nll_loss": 0.5486581921577454, "rewards/accuracies": 1.0, "rewards/chosen": -0.018360063433647156, "rewards/margins": 0.07471838593482971, "rewards/rejected": -0.09307844936847687, "step": 350 }, { "epoch": 2.1162444113263787, "grad_norm": 12.212410926818848, "learning_rate": 2.6537244621713765e-06, "log_odds_chosen": 2.209368944168091, "log_odds_ratio": -0.15512482821941376, "logits/chosen": 352.80633544921875, "logits/rejected": 371.6228942871094, "logps/chosen": -0.3736402690410614, "logps/rejected": -1.5454914569854736, "loss": 0.5485, "nll_loss": 0.609760582447052, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01868201419711113, "rewards/margins": 0.058592550456523895, "rewards/rejected": -0.07727457582950592, "step": 355 }, { "epoch": 2.1460506706408347, "grad_norm": 12.874505043029785, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 2.694078207015991, "log_odds_ratio": -0.11345534026622772, "logits/chosen": 355.081787109375, "logits/rejected": 400.65533447265625, "logps/chosen": -0.3401663899421692, "logps/rejected": -1.6482181549072266, "loss": 0.5505, "nll_loss": 0.5371311902999878, "rewards/accuracies": 1.0, "rewards/chosen": -0.01700832135975361, "rewards/margins": 0.06540258973836899, "rewards/rejected": -0.08241091668605804, "step": 360 }, { "epoch": 2.1758569299552906, "grad_norm": 12.150455474853516, "learning_rate": 2.6171196129510684e-06, "log_odds_chosen": 2.1292691230773926, "log_odds_ratio": -0.15649950504302979, "logits/chosen": 340.80157470703125, "logits/rejected": 330.2677001953125, "logps/chosen": -0.3447723984718323, "logps/rejected": -1.3634696006774902, "loss": 0.5401, "nll_loss": 0.5159801840782166, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.017238620668649673, "rewards/margins": 0.05093486234545708, "rewards/rejected": -0.06817348301410675, "step": 365 }, { "epoch": 2.2056631892697465, "grad_norm": 15.934440612792969, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 2.0337166786193848, "log_odds_ratio": -0.19345471262931824, "logits/chosen": 315.1424560546875, "logits/rejected": 338.2904968261719, "logps/chosen": -0.3659020662307739, "logps/rejected": -1.4170308113098145, "loss": 0.5707, "nll_loss": 0.5888785719871521, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.018295101821422577, "rewards/margins": 0.052556443959474564, "rewards/rejected": -0.07085154205560684, "step": 370 }, { "epoch": 2.235469448584203, "grad_norm": 13.303545951843262, "learning_rate": 2.5819888974716113e-06, "log_odds_chosen": 1.9749561548233032, "log_odds_ratio": -0.1846763789653778, "logits/chosen": 365.7724304199219, "logits/rejected": 387.26141357421875, "logps/chosen": -0.42183151841163635, "logps/rejected": -1.4507567882537842, "loss": 0.6027, "nll_loss": 0.5997955203056335, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.021091576665639877, "rewards/margins": 0.05144626647233963, "rewards/rejected": -0.07253783941268921, "step": 375 }, { "epoch": 2.2652757078986587, "grad_norm": 18.135498046875, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 2.311295509338379, "log_odds_ratio": -0.12876024842262268, "logits/chosen": 364.0061950683594, "logits/rejected": 350.2301330566406, "logps/chosen": -0.29145348072052, "logps/rejected": -1.3336101770401, "loss": 0.5545, "nll_loss": 0.5340723991394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.014572675339877605, "rewards/margins": 0.05210784077644348, "rewards/rejected": -0.06668051332235336, "step": 380 }, { "epoch": 2.2950819672131146, "grad_norm": 10.94619369506836, "learning_rate": 2.5482359571881276e-06, "log_odds_chosen": 2.5354793071746826, "log_odds_ratio": -0.115506611764431, "logits/chosen": 353.3926696777344, "logits/rejected": 348.86944580078125, "logps/chosen": -0.2818690240383148, "logps/rejected": -1.487006425857544, "loss": 0.5179, "nll_loss": 0.476929247379303, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014093451201915741, "rewards/margins": 0.06025686860084534, "rewards/rejected": -0.07435031235218048, "step": 385 }, { "epoch": 2.3248882265275705, "grad_norm": 12.89717960357666, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 2.246914863586426, "log_odds_ratio": -0.13051298260688782, "logits/chosen": 370.3692626953125, "logits/rejected": 393.1583557128906, "logps/chosen": -0.37999650835990906, "logps/rejected": -1.5727269649505615, "loss": 0.5955, "nll_loss": 0.6084927320480347, "rewards/accuracies": 1.0, "rewards/chosen": -0.018999826163053513, "rewards/margins": 0.05963651463389397, "rewards/rejected": -0.07863634079694748, "step": 390 }, { "epoch": 2.354694485842027, "grad_norm": 9.882362365722656, "learning_rate": 2.515773027133138e-06, "log_odds_chosen": 2.3919968605041504, "log_odds_ratio": -0.13801579177379608, "logits/chosen": 369.07232666015625, "logits/rejected": 362.56475830078125, "logps/chosen": -0.2836388051509857, "logps/rejected": -1.353062391281128, "loss": 0.5206, "nll_loss": 0.473809152841568, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014181938953697681, "rewards/margins": 0.05347117781639099, "rewards/rejected": -0.0676531195640564, "step": 395 }, { "epoch": 2.384500745156483, "grad_norm": 20.866735458374023, "learning_rate": 2.5e-06, "log_odds_chosen": 2.305642604827881, "log_odds_ratio": -0.17361058294773102, "logits/chosen": 367.1854553222656, "logits/rejected": 388.62860107421875, "logps/chosen": -0.37132248282432556, "logps/rejected": -1.6480903625488281, "loss": 0.5804, "nll_loss": 0.5412487387657166, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.018566126003861427, "rewards/margins": 0.0638383999466896, "rewards/rejected": -0.08240451663732529, "step": 400 }, { "epoch": 2.4143070044709387, "grad_norm": 17.410255432128906, "learning_rate": 2.484519974999767e-06, "log_odds_chosen": 2.341656446456909, "log_odds_ratio": -0.18742091953754425, "logits/chosen": 417.4825744628906, "logits/rejected": 384.49346923828125, "logps/chosen": -0.38954219222068787, "logps/rejected": -1.552782416343689, "loss": 0.5795, "nll_loss": 0.5449979305267334, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.019477110356092453, "rewards/margins": 0.05816201493144035, "rewards/rejected": -0.07763911783695221, "step": 405 }, { "epoch": 2.444113263785395, "grad_norm": 11.311455726623535, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 2.352574586868286, "log_odds_ratio": -0.18433162569999695, "logits/chosen": 365.95965576171875, "logits/rejected": 380.1703186035156, "logps/chosen": -0.37695974111557007, "logps/rejected": -1.5367991924285889, "loss": 0.5696, "nll_loss": 0.5719352960586548, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018847983330488205, "rewards/margins": 0.05799197405576706, "rewards/rejected": -0.07683996111154556, "step": 410 }, { "epoch": 2.473919523099851, "grad_norm": 11.967494010925293, "learning_rate": 2.4544034683690802e-06, "log_odds_chosen": 2.2503182888031006, "log_odds_ratio": -0.15851208567619324, "logits/chosen": 364.34222412109375, "logits/rejected": 394.3598327636719, "logps/chosen": -0.3465135991573334, "logps/rejected": -1.4553066492080688, "loss": 0.5766, "nll_loss": 0.5365554690361023, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01732568070292473, "rewards/margins": 0.05543965846300125, "rewards/rejected": -0.07276533544063568, "step": 415 }, { "epoch": 2.503725782414307, "grad_norm": 11.675920486450195, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 2.0490882396698, "log_odds_ratio": -0.1818782538175583, "logits/chosen": 367.0909423828125, "logits/rejected": 343.985107421875, "logps/chosen": -0.36017632484436035, "logps/rejected": -1.39711594581604, "loss": 0.554, "nll_loss": 0.6418091654777527, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018008816987276077, "rewards/margins": 0.05184697359800339, "rewards/rejected": -0.06985578685998917, "step": 420 }, { "epoch": 2.533532041728763, "grad_norm": 11.233902931213379, "learning_rate": 2.4253562503633297e-06, "log_odds_chosen": 2.5332672595977783, "log_odds_ratio": -0.10215308517217636, "logits/chosen": 365.8087463378906, "logits/rejected": 362.74371337890625, "logps/chosen": -0.3472338318824768, "logps/rejected": -1.7049144506454468, "loss": 0.5363, "nll_loss": 0.5403138399124146, "rewards/accuracies": 1.0, "rewards/chosen": -0.01736168935894966, "rewards/margins": 0.06788404285907745, "rewards/rejected": -0.08524572849273682, "step": 425 }, { "epoch": 2.563338301043219, "grad_norm": 16.26917266845703, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 2.512302875518799, "log_odds_ratio": -0.1274806559085846, "logits/chosen": 365.8606262207031, "logits/rejected": 377.60894775390625, "logps/chosen": -0.30852970480918884, "logps/rejected": -1.5747673511505127, "loss": 0.551, "nll_loss": 0.5144289135932922, "rewards/accuracies": 1.0, "rewards/chosen": -0.015426484867930412, "rewards/margins": 0.06331188976764679, "rewards/rejected": -0.07873837649822235, "step": 430 }, { "epoch": 2.593144560357675, "grad_norm": 13.473649024963379, "learning_rate": 2.3973165074269213e-06, "log_odds_chosen": 2.2823190689086914, "log_odds_ratio": -0.1513710767030716, "logits/chosen": 372.6357421875, "logits/rejected": 341.8959045410156, "logps/chosen": -0.3947034776210785, "logps/rejected": -1.5539586544036865, "loss": 0.5703, "nll_loss": 0.5524027943611145, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.019735176116228104, "rewards/margins": 0.05796275660395622, "rewards/rejected": -0.07769793272018433, "step": 435 }, { "epoch": 2.6229508196721314, "grad_norm": 15.039813041687012, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 2.4606306552886963, "log_odds_ratio": -0.11453738063573837, "logits/chosen": 356.7464599609375, "logits/rejected": 367.7265930175781, "logps/chosen": -0.27872234582901, "logps/rejected": -1.4566452503204346, "loss": 0.5883, "nll_loss": 0.5446338653564453, "rewards/accuracies": 1.0, "rewards/chosen": -0.0139361172914505, "rewards/margins": 0.05889614298939705, "rewards/rejected": -0.07283225655555725, "step": 440 }, { "epoch": 2.6527570789865873, "grad_norm": 13.054855346679688, "learning_rate": 2.3702273156998867e-06, "log_odds_chosen": 2.510906219482422, "log_odds_ratio": -0.11371259391307831, "logits/chosen": 337.3484191894531, "logits/rejected": 373.2784423828125, "logps/chosen": -0.35476621985435486, "logps/rejected": -1.7162315845489502, "loss": 0.5632, "nll_loss": 0.5669391751289368, "rewards/accuracies": 1.0, "rewards/chosen": -0.017738312482833862, "rewards/margins": 0.06807325780391693, "rewards/rejected": -0.08581157773733139, "step": 445 }, { "epoch": 2.682563338301043, "grad_norm": 12.158041954040527, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 2.407587766647339, "log_odds_ratio": -0.11502983421087265, "logits/chosen": 363.87554931640625, "logits/rejected": 364.67071533203125, "logps/chosen": -0.4218372404575348, "logps/rejected": -1.7667124271392822, "loss": 0.5905, "nll_loss": 0.5684647560119629, "rewards/accuracies": 1.0, "rewards/chosen": -0.02109185978770256, "rewards/margins": 0.06724376231431961, "rewards/rejected": -0.08833561837673187, "step": 450 }, { "epoch": 2.712369597615499, "grad_norm": 14.808917045593262, "learning_rate": 2.3440361546924774e-06, "log_odds_chosen": 2.5720152854919434, "log_odds_ratio": -0.1182328313589096, "logits/chosen": 389.94683837890625, "logits/rejected": 369.15606689453125, "logps/chosen": -0.3745032250881195, "logps/rejected": -1.6708095073699951, "loss": 0.6099, "nll_loss": 0.5873192548751831, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018725162371993065, "rewards/margins": 0.06481531262397766, "rewards/rejected": -0.08354047685861588, "step": 455 }, { "epoch": 2.742175856929955, "grad_norm": 15.351086616516113, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 2.6212141513824463, "log_odds_ratio": -0.12157906591892242, "logits/chosen": 381.9286193847656, "logits/rejected": 401.04998779296875, "logps/chosen": -0.336896151304245, "logps/rejected": -1.7683613300323486, "loss": 0.5888, "nll_loss": 0.6308404207229614, "rewards/accuracies": 1.0, "rewards/chosen": -0.01684480905532837, "rewards/margins": 0.07157325744628906, "rewards/rejected": -0.08841806650161743, "step": 460 }, { "epoch": 2.7719821162444114, "grad_norm": 13.619884490966797, "learning_rate": 2.3186944788008413e-06, "log_odds_chosen": 2.487888813018799, "log_odds_ratio": -0.1321084201335907, "logits/chosen": 378.2283630371094, "logits/rejected": 382.45391845703125, "logps/chosen": -0.3096372187137604, "logps/rejected": -1.5240898132324219, "loss": 0.5702, "nll_loss": 0.5487266778945923, "rewards/accuracies": 1.0, "rewards/chosen": -0.015481861308217049, "rewards/margins": 0.060722626745700836, "rewards/rejected": -0.07620447874069214, "step": 465 }, { "epoch": 2.8017883755588673, "grad_norm": 11.559633255004883, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 2.1641154289245605, "log_odds_ratio": -0.19840756058692932, "logits/chosen": 382.61077880859375, "logits/rejected": 354.3682556152344, "logps/chosen": -0.408609539270401, "logps/rejected": -1.5154647827148438, "loss": 0.5838, "nll_loss": 0.5971536636352539, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.02043047733604908, "rewards/margins": 0.05534275621175766, "rewards/rejected": -0.07577323168516159, "step": 470 }, { "epoch": 2.8315946348733236, "grad_norm": 12.24728012084961, "learning_rate": 2.2941573387056174e-06, "log_odds_chosen": 2.448145866394043, "log_odds_ratio": -0.14108145236968994, "logits/chosen": 352.58197021484375, "logits/rejected": 376.586181640625, "logps/chosen": -0.3962209224700928, "logps/rejected": -1.6542632579803467, "loss": 0.5532, "nll_loss": 0.5462762713432312, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01981104537844658, "rewards/margins": 0.06290213763713837, "rewards/rejected": -0.08271317183971405, "step": 475 }, { "epoch": 2.8614008941877795, "grad_norm": 11.175488471984863, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 2.349735736846924, "log_odds_ratio": -0.11864028871059418, "logits/chosen": 361.24639892578125, "logits/rejected": 402.2587585449219, "logps/chosen": -0.33937591314315796, "logps/rejected": -1.5452320575714111, "loss": 0.5077, "nll_loss": 0.4929002821445465, "rewards/accuracies": 1.0, "rewards/chosen": -0.01696879416704178, "rewards/margins": 0.060292817652225494, "rewards/rejected": -0.07726161181926727, "step": 480 }, { "epoch": 2.8912071535022354, "grad_norm": 12.026611328125, "learning_rate": 2.270383045932499e-06, "log_odds_chosen": 2.4791646003723145, "log_odds_ratio": -0.12428289651870728, "logits/chosen": 358.9771728515625, "logits/rejected": 380.42901611328125, "logps/chosen": -0.3796696364879608, "logps/rejected": -1.7137501239776611, "loss": 0.5514, "nll_loss": 0.5423077344894409, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01898348517715931, "rewards/margins": 0.06670401990413666, "rewards/rejected": -0.08568750321865082, "step": 485 }, { "epoch": 2.9210134128166914, "grad_norm": 12.008419036865234, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 2.308088541030884, "log_odds_ratio": -0.1636713743209839, "logits/chosen": 370.170654296875, "logits/rejected": 335.97857666015625, "logps/chosen": -0.41963282227516174, "logps/rejected": -1.61661696434021, "loss": 0.6122, "nll_loss": 0.5415998697280884, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.020981641486287117, "rewards/margins": 0.05984921008348465, "rewards/rejected": -0.08083084970712662, "step": 490 }, { "epoch": 2.9508196721311473, "grad_norm": 11.31982135772705, "learning_rate": 2.2473328748774737e-06, "log_odds_chosen": 2.167809247970581, "log_odds_ratio": -0.174642875790596, "logits/chosen": 364.75048828125, "logits/rejected": 393.1929626464844, "logps/chosen": -0.4049316346645355, "logps/rejected": -1.4748752117156982, "loss": 0.5603, "nll_loss": 0.5929296612739563, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.020246583968400955, "rewards/margins": 0.05349717289209366, "rewards/rejected": -0.07374376058578491, "step": 495 }, { "epoch": 2.9806259314456036, "grad_norm": 13.445329666137695, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 2.3928182125091553, "log_odds_ratio": -0.14649812877178192, "logits/chosen": 374.05535888671875, "logits/rejected": 372.2560119628906, "logps/chosen": -0.34778839349746704, "logps/rejected": -1.5306968688964844, "loss": 0.5921, "nll_loss": 0.5048509836196899, "rewards/accuracies": 1.0, "rewards/chosen": -0.017389420419931412, "rewards/margins": 0.059145428240299225, "rewards/rejected": -0.07653484493494034, "step": 500 }, { "epoch": 2.9865871833084947, "eval_log_odds_chosen": 0.28559842705726624, "eval_log_odds_ratio": -0.6970076560974121, "eval_logits/chosen": 297.1682434082031, "eval_logits/rejected": 268.0281982421875, "eval_logps/chosen": -1.1085351705551147, "eval_logps/rejected": -1.2919707298278809, "eval_loss": 1.5517091751098633, "eval_nll_loss": 1.4855411052703857, "eval_rewards/accuracies": 0.5611510872840881, "eval_rewards/chosen": -0.055426761507987976, "eval_rewards/margins": 0.009171773679554462, "eval_rewards/rejected": -0.06459853798151016, "eval_runtime": 112.1561, "eval_samples_per_second": 4.931, "eval_steps_per_second": 1.239, "step": 501 }, { "epoch": 2.9865871833084947, "step": 501, "total_flos": 0.0, "train_loss": 1.4570662823027956, "train_runtime": 13599.7579, "train_samples_per_second": 1.183, "train_steps_per_second": 0.037 } ], "logging_steps": 5, "max_steps": 501, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }