{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998691442030882, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010468463752944255, "grad_norm": 22.33847861457317, "learning_rate": 5.208333333333333e-08, "logits/chosen": -2.7707886695861816, "logits/rejected": -2.7283411026000977, "logps/chosen": -1.0281651020050049, "logps/rejected": -1.1735057830810547, "loss": 1.7068, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -1.0281651020050049, "rewards/margins": 0.14534088969230652, "rewards/rejected": -1.1735057830810547, "step": 5 }, { "epoch": 0.02093692750588851, "grad_norm": 19.58286489214486, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.7509849071502686, "logits/rejected": -2.725268840789795, "logps/chosen": -0.9945869445800781, "logps/rejected": -1.070472002029419, "loss": 1.7055, "rewards/accuracies": 0.5, "rewards/chosen": -0.9945869445800781, "rewards/margins": 0.07588515430688858, "rewards/rejected": -1.070472002029419, "step": 10 }, { "epoch": 0.031405391258832765, "grad_norm": 20.875691978403097, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -2.731562852859497, "logits/rejected": -2.698035478591919, "logps/chosen": -0.9806415438652039, "logps/rejected": -1.1409623622894287, "loss": 1.6533, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9806415438652039, "rewards/margins": 0.1603206843137741, "rewards/rejected": -1.1409623622894287, "step": 15 }, { "epoch": 0.04187385501177702, "grad_norm": 18.447110198802093, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.804091453552246, "logits/rejected": -2.711153268814087, "logps/chosen": -0.9976784586906433, "logps/rejected": -1.0985018014907837, "loss": 1.6778, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9976784586906433, "rewards/margins": 0.10082335770130157, "rewards/rejected": -1.0985018014907837, "step": 20 }, { "epoch": 0.05234231876472128, "grad_norm": 17.24388250585097, "learning_rate": 2.604166666666667e-07, "logits/chosen": -2.800624132156372, "logits/rejected": -2.777677536010742, "logps/chosen": -0.977279007434845, "logps/rejected": -1.0392786264419556, "loss": 1.6818, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.977279007434845, "rewards/margins": 0.061999619007110596, "rewards/rejected": -1.0392786264419556, "step": 25 }, { "epoch": 0.06281078251766553, "grad_norm": 17.54927195272229, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.774305820465088, "logits/rejected": -2.734163284301758, "logps/chosen": -0.8836237192153931, "logps/rejected": -0.9883272051811218, "loss": 1.7148, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.8836237192153931, "rewards/margins": 0.10470354557037354, "rewards/rejected": -0.9883272051811218, "step": 30 }, { "epoch": 0.07327924627060979, "grad_norm": 19.597204446648743, "learning_rate": 3.645833333333333e-07, "logits/chosen": -2.7799019813537598, "logits/rejected": -2.6970131397247314, "logps/chosen": -0.9105981588363647, "logps/rejected": -1.032915711402893, "loss": 1.6819, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.9105981588363647, "rewards/margins": 0.12231750786304474, "rewards/rejected": -1.032915711402893, "step": 35 }, { "epoch": 0.08374771002355404, "grad_norm": 17.382375782510174, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.7961745262145996, "logits/rejected": -2.7064435482025146, "logps/chosen": -0.8972692489624023, "logps/rejected": -1.1690887212753296, "loss": 1.5799, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8972692489624023, "rewards/margins": 0.27181947231292725, "rewards/rejected": -1.1690887212753296, "step": 40 }, { "epoch": 0.0942161737764983, "grad_norm": 20.280760501242256, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -2.792088031768799, "logits/rejected": -2.748499870300293, "logps/chosen": -0.9211010932922363, "logps/rejected": -1.065104365348816, "loss": 1.5989, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.9211010932922363, "rewards/margins": 0.14400319755077362, "rewards/rejected": -1.065104365348816, "step": 45 }, { "epoch": 0.10468463752944256, "grad_norm": 24.965329955439582, "learning_rate": 4.999731868769026e-07, "logits/chosen": -2.7186241149902344, "logits/rejected": -2.6570615768432617, "logps/chosen": -0.9831829071044922, "logps/rejected": -1.2431915998458862, "loss": 1.5992, "rewards/accuracies": 0.625, "rewards/chosen": -0.9831829071044922, "rewards/margins": 0.2600088119506836, "rewards/rejected": -1.2431915998458862, "step": 50 }, { "epoch": 0.11515310128238682, "grad_norm": 20.11572375657384, "learning_rate": 4.996716052911017e-07, "logits/chosen": -2.6839053630828857, "logits/rejected": -2.6501219272613525, "logps/chosen": -1.0190128087997437, "logps/rejected": -1.2308984994888306, "loss": 1.6076, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -1.0190128087997437, "rewards/margins": 0.21188561618328094, "rewards/rejected": -1.2308984994888306, "step": 55 }, { "epoch": 0.12562156503533106, "grad_norm": 23.21505244201069, "learning_rate": 4.990353313429303e-07, "logits/chosen": -2.7174434661865234, "logits/rejected": -2.6702983379364014, "logps/chosen": -1.0736011266708374, "logps/rejected": -1.2630236148834229, "loss": 1.5192, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0736011266708374, "rewards/margins": 0.18942244350910187, "rewards/rejected": -1.2630236148834229, "step": 60 }, { "epoch": 0.1360900287882753, "grad_norm": 23.25106134127669, "learning_rate": 4.980652179769217e-07, "logits/chosen": -2.6810200214385986, "logits/rejected": -2.5684618949890137, "logps/chosen": -1.0137274265289307, "logps/rejected": -1.498494267463684, "loss": 1.5546, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0137274265289307, "rewards/margins": 0.4847669005393982, "rewards/rejected": -1.498494267463684, "step": 65 }, { "epoch": 0.14655849254121958, "grad_norm": 27.792367557668214, "learning_rate": 4.967625656594781e-07, "logits/chosen": -2.530665636062622, "logits/rejected": -2.4802050590515137, "logps/chosen": -1.1090538501739502, "logps/rejected": -1.3939735889434814, "loss": 1.5395, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1090538501739502, "rewards/margins": 0.28491973876953125, "rewards/rejected": -1.3939735889434814, "step": 70 }, { "epoch": 0.15702695629416383, "grad_norm": 31.80202351677776, "learning_rate": 4.951291206355559e-07, "logits/chosen": -2.56596302986145, "logits/rejected": -2.4995336532592773, "logps/chosen": -1.0895880460739136, "logps/rejected": -1.4386751651763916, "loss": 1.5145, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0895880460739136, "rewards/margins": 0.3490869700908661, "rewards/rejected": -1.4386751651763916, "step": 75 }, { "epoch": 0.16749542004710807, "grad_norm": 24.649835098743253, "learning_rate": 4.93167072587771e-07, "logits/chosen": -2.5505566596984863, "logits/rejected": -2.499436140060425, "logps/chosen": -1.066165804862976, "logps/rejected": -1.3876395225524902, "loss": 1.5557, "rewards/accuracies": 0.65625, "rewards/chosen": -1.066165804862976, "rewards/margins": 0.3214736878871918, "rewards/rejected": -1.3876395225524902, "step": 80 }, { "epoch": 0.17796388380005235, "grad_norm": 36.00370335117123, "learning_rate": 4.908790517010636e-07, "logits/chosen": -2.5619356632232666, "logits/rejected": -2.5041964054107666, "logps/chosen": -1.0125417709350586, "logps/rejected": -1.3010904788970947, "loss": 1.5994, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.0125417709350586, "rewards/margins": 0.2885487377643585, "rewards/rejected": -1.3010904788970947, "step": 85 }, { "epoch": 0.1884323475529966, "grad_norm": 24.146045834642553, "learning_rate": 4.882681251368548e-07, "logits/chosen": -2.4920132160186768, "logits/rejected": -2.465853691101074, "logps/chosen": -1.0736846923828125, "logps/rejected": -1.3465595245361328, "loss": 1.5492, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0736846923828125, "rewards/margins": 0.27287474274635315, "rewards/rejected": -1.3465595245361328, "step": 90 }, { "epoch": 0.19890081130594087, "grad_norm": 28.073142968648483, "learning_rate": 4.853377929214243e-07, "logits/chosen": -2.5122618675231934, "logits/rejected": -2.4115467071533203, "logps/chosen": -1.0956408977508545, "logps/rejected": -1.3623732328414917, "loss": 1.5645, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -1.0956408977508545, "rewards/margins": 0.26673227548599243, "rewards/rejected": -1.3623732328414917, "step": 95 }, { "epoch": 0.2093692750588851, "grad_norm": 31.739558342111017, "learning_rate": 4.820919832540181e-07, "logits/chosen": -2.4314770698547363, "logits/rejected": -2.3331286907196045, "logps/chosen": -1.1069271564483643, "logps/rejected": -1.4718701839447021, "loss": 1.5661, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.1069271564483643, "rewards/margins": 0.3649430274963379, "rewards/rejected": -1.4718701839447021, "step": 100 }, { "epoch": 0.21983773881182936, "grad_norm": 30.25619066044505, "learning_rate": 4.785350472409791e-07, "logits/chosen": -2.374175548553467, "logits/rejected": -2.345848321914673, "logps/chosen": -1.1067227125167847, "logps/rejected": -1.5379221439361572, "loss": 1.4913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1067227125167847, "rewards/margins": 0.43119925260543823, "rewards/rejected": -1.5379221439361572, "step": 105 }, { "epoch": 0.23030620256477363, "grad_norm": 37.413658539880686, "learning_rate": 4.7467175306295647e-07, "logits/chosen": -2.3754255771636963, "logits/rejected": -2.298557996749878, "logps/chosen": -1.2783015966415405, "logps/rejected": -1.6868267059326172, "loss": 1.4914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2783015966415405, "rewards/margins": 0.4085250496864319, "rewards/rejected": -1.6868267059326172, "step": 110 }, { "epoch": 0.24077466631771788, "grad_norm": 30.692988045641766, "learning_rate": 4.70507279583015e-07, "logits/chosen": -2.295192241668701, "logits/rejected": -2.2188549041748047, "logps/chosen": -1.2483012676239014, "logps/rejected": -1.789584755897522, "loss": 1.422, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.2483012676239014, "rewards/margins": 0.541283369064331, "rewards/rejected": -1.789584755897522, "step": 115 }, { "epoch": 0.2512431300706621, "grad_norm": 30.61675572148456, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -2.2375972270965576, "logits/rejected": -2.188506603240967, "logps/chosen": -1.2778130769729614, "logps/rejected": -1.6632368564605713, "loss": 1.5023, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.2778130769729614, "rewards/margins": 0.3854238986968994, "rewards/rejected": -1.6632368564605713, "step": 120 }, { "epoch": 0.26171159382360637, "grad_norm": 27.143222504991545, "learning_rate": 4.612975213859487e-07, "logits/chosen": -2.2686073780059814, "logits/rejected": -2.240734815597534, "logps/chosen": -1.2156823873519897, "logps/rejected": -1.5097521543502808, "loss": 1.5016, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.2156823873519897, "rewards/margins": 0.2940698564052582, "rewards/rejected": -1.5097521543502808, "step": 125 }, { "epoch": 0.2721800575765506, "grad_norm": 28.662390853422256, "learning_rate": 4.5626458262912735e-07, "logits/chosen": -2.1936447620391846, "logits/rejected": -2.1442556381225586, "logps/chosen": -1.2718418836593628, "logps/rejected": -1.5303620100021362, "loss": 1.4909, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.2718418836593628, "rewards/margins": 0.25852006673812866, "rewards/rejected": -1.5303620100021362, "step": 130 }, { "epoch": 0.2826485213294949, "grad_norm": 44.28036575523435, "learning_rate": 4.5095513994085974e-07, "logits/chosen": -2.1270031929016113, "logits/rejected": -2.0248324871063232, "logps/chosen": -1.3885653018951416, "logps/rejected": -2.0145115852355957, "loss": 1.4337, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.3885653018951416, "rewards/margins": 0.6259465217590332, "rewards/rejected": -2.0145115852355957, "step": 135 }, { "epoch": 0.29311698508243916, "grad_norm": 36.60577846523631, "learning_rate": 4.453763107901675e-07, "logits/chosen": -2.1079370975494385, "logits/rejected": -2.0384957790374756, "logps/chosen": -1.5038011074066162, "logps/rejected": -1.9894378185272217, "loss": 1.4093, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.5038011074066162, "rewards/margins": 0.48563677072525024, "rewards/rejected": -1.9894378185272217, "step": 140 }, { "epoch": 0.3035854488353834, "grad_norm": 28.487957762616777, "learning_rate": 4.395355737667985e-07, "logits/chosen": -2.0164198875427246, "logits/rejected": -1.9161510467529297, "logps/chosen": -1.5493210554122925, "logps/rejected": -2.018698215484619, "loss": 1.451, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5493210554122925, "rewards/margins": 0.4693775177001953, "rewards/rejected": -2.018698215484619, "step": 145 }, { "epoch": 0.31405391258832765, "grad_norm": 41.06239885865088, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -1.9734079837799072, "logits/rejected": -1.9259965419769287, "logps/chosen": -1.4789108037948608, "logps/rejected": -1.9842697381973267, "loss": 1.3495, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.4789108037948608, "rewards/margins": 0.5053588151931763, "rewards/rejected": -1.9842697381973267, "step": 150 }, { "epoch": 0.3245223763412719, "grad_norm": 48.830011945922514, "learning_rate": 4.271000354423425e-07, "logits/chosen": -1.8841253519058228, "logits/rejected": -1.8244024515151978, "logps/chosen": -1.7202045917510986, "logps/rejected": -2.2293524742126465, "loss": 1.3858, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7202045917510986, "rewards/margins": 0.5091480016708374, "rewards/rejected": -2.2293524742126465, "step": 155 }, { "epoch": 0.33499084009421615, "grad_norm": 58.39848893185564, "learning_rate": 4.2052190435769554e-07, "logits/chosen": -1.7686011791229248, "logits/rejected": -1.7215473651885986, "logps/chosen": -1.7806098461151123, "logps/rejected": -2.337874174118042, "loss": 1.3384, "rewards/accuracies": 0.71875, "rewards/chosen": -1.7806098461151123, "rewards/margins": 0.5572644472122192, "rewards/rejected": -2.337874174118042, "step": 160 }, { "epoch": 0.34545930384716045, "grad_norm": 39.19480392174481, "learning_rate": 4.137151834863213e-07, "logits/chosen": -1.7493757009506226, "logits/rejected": -1.6483466625213623, "logps/chosen": -1.6905717849731445, "logps/rejected": -2.2200164794921875, "loss": 1.3194, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.6905717849731445, "rewards/margins": 0.5294445753097534, "rewards/rejected": -2.2200164794921875, "step": 165 }, { "epoch": 0.3559277676001047, "grad_norm": 43.29327849827671, "learning_rate": 4.0668899744407567e-07, "logits/chosen": -1.7271289825439453, "logits/rejected": -1.6169135570526123, "logps/chosen": -1.7594356536865234, "logps/rejected": -2.3671681880950928, "loss": 1.4272, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7594356536865234, "rewards/margins": 0.6077327132225037, "rewards/rejected": -2.3671681880950928, "step": 170 }, { "epoch": 0.36639623135304894, "grad_norm": 51.0545099532057, "learning_rate": 3.994527650465352e-07, "logits/chosen": -1.6730191707611084, "logits/rejected": -1.5045350790023804, "logps/chosen": -1.7539478540420532, "logps/rejected": -2.3421220779418945, "loss": 1.4166, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7539478540420532, "rewards/margins": 0.5881742835044861, "rewards/rejected": -2.3421220779418945, "step": 175 }, { "epoch": 0.3768646951059932, "grad_norm": 39.83826698098843, "learning_rate": 3.920161866827889e-07, "logits/chosen": -1.5326063632965088, "logits/rejected": -1.4709655046463013, "logps/chosen": -1.8253322839736938, "logps/rejected": -2.5203299522399902, "loss": 1.327, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.8253322839736938, "rewards/margins": 0.6949977278709412, "rewards/rejected": -2.5203299522399902, "step": 180 }, { "epoch": 0.38733315885893743, "grad_norm": 54.751515663792574, "learning_rate": 3.8438923131177237e-07, "logits/chosen": -1.6529709100723267, "logits/rejected": -1.547048807144165, "logps/chosen": -2.0327372550964355, "logps/rejected": -2.799630880355835, "loss": 1.3481, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -2.0327372550964355, "rewards/margins": 0.7668935656547546, "rewards/rejected": -2.799630880355835, "step": 185 }, { "epoch": 0.39780162261188173, "grad_norm": 55.01879734493922, "learning_rate": 3.765821230985757e-07, "logits/chosen": -1.7503044605255127, "logits/rejected": -1.7193371057510376, "logps/chosen": -2.0020930767059326, "logps/rejected": -2.541438341140747, "loss": 1.4457, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0020930767059326, "rewards/margins": 0.5393451452255249, "rewards/rejected": -2.541438341140747, "step": 190 }, { "epoch": 0.408270086364826, "grad_norm": 56.01695405727775, "learning_rate": 3.6860532770864005e-07, "logits/chosen": -1.6532561779022217, "logits/rejected": -1.5889365673065186, "logps/chosen": -2.2126450538635254, "logps/rejected": -2.833956241607666, "loss": 1.3837, "rewards/accuracies": 0.71875, "rewards/chosen": -2.2126450538635254, "rewards/margins": 0.6213110685348511, "rewards/rejected": -2.833956241607666, "step": 195 }, { "epoch": 0.4187385501177702, "grad_norm": 53.693962657799496, "learning_rate": 3.604695382782159e-07, "logits/chosen": -1.5952329635620117, "logits/rejected": -1.5742504596710205, "logps/chosen": -2.298196315765381, "logps/rejected": -2.9012575149536133, "loss": 1.3376, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.298196315765381, "rewards/margins": 0.6030609607696533, "rewards/rejected": -2.9012575149536133, "step": 200 }, { "epoch": 0.42920701387071447, "grad_norm": 64.50276884709064, "learning_rate": 3.5218566107988867e-07, "logits/chosen": -1.3948825597763062, "logits/rejected": -1.3095804452896118, "logps/chosen": -2.4898180961608887, "logps/rejected": -3.191925525665283, "loss": 1.2815, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -2.4898180961608887, "rewards/margins": 0.7021073698997498, "rewards/rejected": -3.191925525665283, "step": 205 }, { "epoch": 0.4396754776236587, "grad_norm": 84.53595036055246, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -1.3462097644805908, "logits/rejected": -1.2632777690887451, "logps/chosen": -2.999131679534912, "logps/rejected": -3.611553192138672, "loss": 1.3852, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -2.999131679534912, "rewards/margins": 0.6124216318130493, "rewards/rejected": -3.611553192138672, "step": 210 }, { "epoch": 0.45014394137660296, "grad_norm": 46.66563883333029, "learning_rate": 3.3521824616429284e-07, "logits/chosen": -1.3600023984909058, "logits/rejected": -1.2664874792099, "logps/chosen": -2.6604790687561035, "logps/rejected": -3.3327317237854004, "loss": 1.286, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.6604790687561035, "rewards/margins": 0.6722527742385864, "rewards/rejected": -3.3327317237854004, "step": 215 }, { "epoch": 0.46061240512954726, "grad_norm": 52.30084280147944, "learning_rate": 3.265574537815398e-07, "logits/chosen": -1.328802466392517, "logits/rejected": -1.1486713886260986, "logps/chosen": -2.2492258548736572, "logps/rejected": -3.048722743988037, "loss": 1.2381, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.2492258548736572, "rewards/margins": 0.7994968295097351, "rewards/rejected": -3.048722743988037, "step": 220 }, { "epoch": 0.4710808688824915, "grad_norm": 59.5983284817177, "learning_rate": 3.1779403380910425e-07, "logits/chosen": -1.2022250890731812, "logits/rejected": -0.9333807229995728, "logps/chosen": -2.3682289123535156, "logps/rejected": -3.2058632373809814, "loss": 1.2818, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -2.3682289123535156, "rewards/margins": 0.8376340866088867, "rewards/rejected": -3.2058632373809814, "step": 225 }, { "epoch": 0.48154933263543576, "grad_norm": 57.79020776140021, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -0.9690738916397095, "logits/rejected": -0.6763086915016174, "logps/chosen": -2.678581714630127, "logps/rejected": -3.652540922164917, "loss": 1.1691, "rewards/accuracies": 0.78125, "rewards/chosen": -2.678581714630127, "rewards/margins": 0.9739594459533691, "rewards/rejected": -3.652540922164917, "step": 230 }, { "epoch": 0.49201779638838, "grad_norm": 68.31261443984954, "learning_rate": 3.000064234440111e-07, "logits/chosen": -0.7249783277511597, "logits/rejected": -0.4497829079627991, "logps/chosen": -2.929415225982666, "logps/rejected": -3.6940486431121826, "loss": 1.219, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -2.929415225982666, "rewards/margins": 0.7646334171295166, "rewards/rejected": -3.6940486431121826, "step": 235 }, { "epoch": 0.5024862601413242, "grad_norm": 58.51770384505626, "learning_rate": 2.910060778827554e-07, "logits/chosen": -0.6646004915237427, "logits/rejected": -0.4312285780906677, "logps/chosen": -2.8067305088043213, "logps/rejected": -3.5952727794647217, "loss": 1.2149, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.8067305088043213, "rewards/margins": 0.7885428071022034, "rewards/rejected": -3.5952727794647217, "step": 240 }, { "epoch": 0.5129547238942685, "grad_norm": 77.9188973058883, "learning_rate": 2.8195076242990116e-07, "logits/chosen": -0.8215748071670532, "logits/rejected": -0.6058939695358276, "logps/chosen": -3.0396523475646973, "logps/rejected": -3.7490572929382324, "loss": 1.3009, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -3.0396523475646973, "rewards/margins": 0.7094049453735352, "rewards/rejected": -3.7490572929382324, "step": 245 }, { "epoch": 0.5234231876472127, "grad_norm": 63.40691188463422, "learning_rate": 2.7285261601056697e-07, "logits/chosen": -0.9382761716842651, "logits/rejected": -0.729052722454071, "logps/chosen": -2.595768451690674, "logps/rejected": -3.3881466388702393, "loss": 1.3493, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -2.595768451690674, "rewards/margins": 0.792378306388855, "rewards/rejected": -3.3881466388702393, "step": 250 }, { "epoch": 0.533891651400157, "grad_norm": 48.6873954516319, "learning_rate": 2.6372383496608186e-07, "logits/chosen": -0.9191045761108398, "logits/rejected": -0.6374125480651855, "logps/chosen": -2.6228957176208496, "logps/rejected": -3.4444642066955566, "loss": 1.1933, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.6228957176208496, "rewards/margins": 0.8215683698654175, "rewards/rejected": -3.4444642066955566, "step": 255 }, { "epoch": 0.5443601151531012, "grad_norm": 58.54153465550475, "learning_rate": 2.5457665670441937e-07, "logits/chosen": -1.0062066316604614, "logits/rejected": -0.7524072527885437, "logps/chosen": -2.226409435272217, "logps/rejected": -3.2910056114196777, "loss": 1.2637, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.226409435272217, "rewards/margins": 1.0645958185195923, "rewards/rejected": -3.2910056114196777, "step": 260 }, { "epoch": 0.5548285789060455, "grad_norm": 56.27740231168225, "learning_rate": 2.454233432955807e-07, "logits/chosen": -0.9759531021118164, "logits/rejected": -0.7592412233352661, "logps/chosen": -2.4869279861450195, "logps/rejected": -3.270657777786255, "loss": 1.1763, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.4869279861450195, "rewards/margins": 0.7837298512458801, "rewards/rejected": -3.270657777786255, "step": 265 }, { "epoch": 0.5652970426589898, "grad_norm": 79.16628471833214, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -0.7810020446777344, "logits/rejected": -0.563759446144104, "logps/chosen": -2.9826416969299316, "logps/rejected": -3.696744918823242, "loss": 1.2307, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.9826416969299316, "rewards/margins": 0.7141033411026001, "rewards/rejected": -3.696744918823242, "step": 270 }, { "epoch": 0.575765506411934, "grad_norm": 67.9940381335306, "learning_rate": 2.2714738398943308e-07, "logits/chosen": -0.6166077256202698, "logits/rejected": -0.255386084318161, "logps/chosen": -3.5992627143859863, "logps/rejected": -4.62840461730957, "loss": 1.2213, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.5992627143859863, "rewards/margins": 1.029141902923584, "rewards/rejected": -4.62840461730957, "step": 275 }, { "epoch": 0.5862339701648783, "grad_norm": 77.77171483631982, "learning_rate": 2.1804923757009882e-07, "logits/chosen": -0.4341735243797302, "logits/rejected": -0.08086974173784256, "logps/chosen": -3.569580554962158, "logps/rejected": -4.482806205749512, "loss": 1.2253, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.569580554962158, "rewards/margins": 0.9132259488105774, "rewards/rejected": -4.482806205749512, "step": 280 }, { "epoch": 0.5967024339178225, "grad_norm": 78.23503462159874, "learning_rate": 2.089939221172446e-07, "logits/chosen": -0.4769046902656555, "logits/rejected": -0.28695839643478394, "logps/chosen": -3.2708067893981934, "logps/rejected": -4.225908279418945, "loss": 1.2069, "rewards/accuracies": 0.75, "rewards/chosen": -3.2708067893981934, "rewards/margins": 0.955101490020752, "rewards/rejected": -4.225908279418945, "step": 285 }, { "epoch": 0.6071708976707668, "grad_norm": 60.888870909608286, "learning_rate": 1.9999357655598891e-07, "logits/chosen": -0.629644513130188, "logits/rejected": -0.3126750886440277, "logps/chosen": -3.1302852630615234, "logps/rejected": -4.096064567565918, "loss": 1.1961, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.1302852630615234, "rewards/margins": 0.9657794833183289, "rewards/rejected": -4.096064567565918, "step": 290 }, { "epoch": 0.6176393614237111, "grad_norm": 69.95748727333331, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -0.6755684614181519, "logits/rejected": -0.30471256375312805, "logps/chosen": -2.978564739227295, "logps/rejected": -3.7914657592773438, "loss": 1.2554, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.978564739227295, "rewards/margins": 0.8129006624221802, "rewards/rejected": -3.7914657592773438, "step": 295 }, { "epoch": 0.6281078251766553, "grad_norm": 70.38289921062774, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -0.5232299566268921, "logits/rejected": -0.24573859572410583, "logps/chosen": -2.9739651679992676, "logps/rejected": -3.8225860595703125, "loss": 1.2417, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.9739651679992676, "rewards/margins": 0.8486205339431763, "rewards/rejected": -3.8225860595703125, "step": 300 }, { "epoch": 0.6385762889295996, "grad_norm": 72.4502519540176, "learning_rate": 1.7344254621846017e-07, "logits/chosen": -0.25070467591285706, "logits/rejected": 0.04965158551931381, "logps/chosen": -3.1987884044647217, "logps/rejected": -4.245509147644043, "loss": 1.1282, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.1987884044647217, "rewards/margins": 1.046720266342163, "rewards/rejected": -4.245509147644043, "step": 305 }, { "epoch": 0.6490447526825438, "grad_norm": 74.82154328000124, "learning_rate": 1.647817538357072e-07, "logits/chosen": -0.21263869106769562, "logits/rejected": -0.07299887388944626, "logps/chosen": -3.0970096588134766, "logps/rejected": -4.266674995422363, "loss": 1.1228, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.0970096588134766, "rewards/margins": 1.1696654558181763, "rewards/rejected": -4.266674995422363, "step": 310 }, { "epoch": 0.6595132164354881, "grad_norm": 104.80383724137711, "learning_rate": 1.562351990976095e-07, "logits/chosen": -0.2823619246482849, "logits/rejected": 0.06330037117004395, "logps/chosen": -3.170875072479248, "logps/rejected": -4.283278465270996, "loss": 1.1544, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.170875072479248, "rewards/margins": 1.1124036312103271, "rewards/rejected": -4.283278465270996, "step": 315 }, { "epoch": 0.6699816801884323, "grad_norm": 77.84225661924857, "learning_rate": 1.478143389201113e-07, "logits/chosen": -0.15583737194538116, "logits/rejected": -0.0702785775065422, "logps/chosen": -3.0332465171813965, "logps/rejected": -3.9285430908203125, "loss": 1.2128, "rewards/accuracies": 0.75, "rewards/chosen": -3.0332465171813965, "rewards/margins": 0.8952968716621399, "rewards/rejected": -3.9285430908203125, "step": 320 }, { "epoch": 0.6804501439413766, "grad_norm": 82.4050976791357, "learning_rate": 1.3953046172178413e-07, "logits/chosen": -0.19347061216831207, "logits/rejected": 0.27237311005592346, "logps/chosen": -2.916917324066162, "logps/rejected": -3.891824245452881, "loss": 1.1941, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.916917324066162, "rewards/margins": 0.9749069213867188, "rewards/rejected": -3.891824245452881, "step": 325 }, { "epoch": 0.6909186076943209, "grad_norm": 74.25892962169908, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -0.2500423491001129, "logits/rejected": 0.1266798973083496, "logps/chosen": -2.7360358238220215, "logps/rejected": -3.647095203399658, "loss": 1.2222, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -2.7360358238220215, "rewards/margins": 0.9110593795776367, "rewards/rejected": -3.647095203399658, "step": 330 }, { "epoch": 0.7013870714472651, "grad_norm": 68.6281193094518, "learning_rate": 1.2341787690142435e-07, "logits/chosen": -0.4035646915435791, "logits/rejected": -0.033549416810274124, "logps/chosen": -2.763370990753174, "logps/rejected": -3.6889405250549316, "loss": 1.2685, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.763370990753174, "rewards/margins": 0.9255691766738892, "rewards/rejected": -3.6889405250549316, "step": 335 }, { "epoch": 0.7118555352002094, "grad_norm": 64.3244472190874, "learning_rate": 1.1561076868822755e-07, "logits/chosen": -0.36889219284057617, "logits/rejected": -0.027694886550307274, "logps/chosen": -2.9837875366210938, "logps/rejected": -3.666727066040039, "loss": 1.2109, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -2.9837875366210938, "rewards/margins": 0.6829396486282349, "rewards/rejected": -3.666727066040039, "step": 340 }, { "epoch": 0.7223239989531536, "grad_norm": 67.8756323193863, "learning_rate": 1.0798381331721107e-07, "logits/chosen": -0.3677563965320587, "logits/rejected": -0.05767295882105827, "logps/chosen": -2.810889959335327, "logps/rejected": -3.620894193649292, "loss": 1.1996, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.810889959335327, "rewards/margins": 0.8100040555000305, "rewards/rejected": -3.620894193649292, "step": 345 }, { "epoch": 0.7327924627060979, "grad_norm": 75.08030338233884, "learning_rate": 1.0054723495346482e-07, "logits/chosen": -0.2898016571998596, "logits/rejected": -0.137650728225708, "logps/chosen": -2.9614298343658447, "logps/rejected": -3.739614963531494, "loss": 1.2604, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -2.9614298343658447, "rewards/margins": 0.7781847715377808, "rewards/rejected": -3.739614963531494, "step": 350 }, { "epoch": 0.7432609264590422, "grad_norm": 74.74858163258845, "learning_rate": 9.331100255592436e-08, "logits/chosen": -0.2886708378791809, "logits/rejected": 0.029823053628206253, "logps/chosen": -3.0250728130340576, "logps/rejected": -4.2197394371032715, "loss": 1.1354, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -3.0250728130340576, "rewards/margins": 1.194666862487793, "rewards/rejected": -4.2197394371032715, "step": 355 }, { "epoch": 0.7537293902119864, "grad_norm": 70.24673714433773, "learning_rate": 8.628481651367875e-08, "logits/chosen": -0.40003472566604614, "logits/rejected": -0.07148544490337372, "logps/chosen": -3.224444627761841, "logps/rejected": -3.998364210128784, "loss": 1.1942, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.224444627761841, "rewards/margins": 0.7739196419715881, "rewards/rejected": -3.998364210128784, "step": 360 }, { "epoch": 0.7641978539649307, "grad_norm": 99.18869677889904, "learning_rate": 7.947809564230445e-08, "logits/chosen": -0.20483890175819397, "logits/rejected": 0.11580769717693329, "logps/chosen": -3.247260570526123, "logps/rejected": -4.369873046875, "loss": 1.2124, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.247260570526123, "rewards/margins": 1.122612714767456, "rewards/rejected": -4.369873046875, "step": 365 }, { "epoch": 0.7746663177178749, "grad_norm": 91.27768360001403, "learning_rate": 7.289996455765748e-08, "logits/chosen": -0.2206389605998993, "logits/rejected": 0.08472562581300735, "logps/chosen": -3.4089035987854004, "logps/rejected": -4.553833961486816, "loss": 1.1347, "rewards/accuracies": 0.78125, "rewards/chosen": -3.4089035987854004, "rewards/margins": 1.1449302434921265, "rewards/rejected": -4.553833961486816, "step": 370 }, { "epoch": 0.7851347814708192, "grad_norm": 76.20144058868142, "learning_rate": 6.655924144404906e-08, "logits/chosen": -0.2008267343044281, "logits/rejected": -0.1611969918012619, "logps/chosen": -3.3004608154296875, "logps/rejected": -4.173122406005859, "loss": 1.1354, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -3.3004608154296875, "rewards/margins": 0.8726619482040405, "rewards/rejected": -4.173122406005859, "step": 375 }, { "epoch": 0.7956032452237635, "grad_norm": 74.59904017410364, "learning_rate": 6.046442623320145e-08, "logits/chosen": -0.1321212500333786, "logits/rejected": -0.01653924025595188, "logps/chosen": -3.2044310569763184, "logps/rejected": -4.154183864593506, "loss": 1.1999, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.2044310569763184, "rewards/margins": 0.9497528076171875, "rewards/rejected": -4.154183864593506, "step": 380 }, { "epoch": 0.8060717089767077, "grad_norm": 72.89488713559955, "learning_rate": 5.4623689209832484e-08, "logits/chosen": -0.2955227494239807, "logits/rejected": -0.03774283826351166, "logps/chosen": -3.0728917121887207, "logps/rejected": -4.051039695739746, "loss": 1.1963, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.0728917121887207, "rewards/margins": 0.9781482815742493, "rewards/rejected": -4.051039695739746, "step": 385 }, { "epoch": 0.816540172729652, "grad_norm": 67.63752264326851, "learning_rate": 4.904486005914027e-08, "logits/chosen": -0.45038795471191406, "logits/rejected": -0.09373664855957031, "logps/chosen": -3.1700501441955566, "logps/rejected": -4.2985382080078125, "loss": 1.1136, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.1700501441955566, "rewards/margins": 1.1284879446029663, "rewards/rejected": -4.2985382080078125, "step": 390 }, { "epoch": 0.8270086364825961, "grad_norm": 74.45872340513813, "learning_rate": 4.373541737087263e-08, "logits/chosen": -0.49037042260169983, "logits/rejected": -0.09940163046121597, "logps/chosen": -3.04799222946167, "logps/rejected": -4.031525135040283, "loss": 1.1544, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.04799222946167, "rewards/margins": 0.9835329055786133, "rewards/rejected": -4.031525135040283, "step": 395 }, { "epoch": 0.8374771002355405, "grad_norm": 78.72742631851618, "learning_rate": 3.8702478614051345e-08, "logits/chosen": -0.32979053258895874, "logits/rejected": -0.04894006997346878, "logps/chosen": -3.051708698272705, "logps/rejected": -4.071520805358887, "loss": 1.2236, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.051708698272705, "rewards/margins": 1.0198121070861816, "rewards/rejected": -4.071520805358887, "step": 400 }, { "epoch": 0.8479455639884846, "grad_norm": 84.34790591849121, "learning_rate": 3.3952790595787986e-08, "logits/chosen": -0.38359755277633667, "logits/rejected": -0.2002539187669754, "logps/chosen": -3.3085391521453857, "logps/rejected": -4.132696151733398, "loss": 1.1961, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -3.3085391521453857, "rewards/margins": 0.8241568803787231, "rewards/rejected": -4.132696151733398, "step": 405 }, { "epoch": 0.8584140277414289, "grad_norm": 73.80761771512901, "learning_rate": 2.9492720416985e-08, "logits/chosen": -0.3136187195777893, "logits/rejected": -0.1394844651222229, "logps/chosen": -3.2027900218963623, "logps/rejected": -4.078015327453613, "loss": 1.2319, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.2027900218963623, "rewards/margins": 0.8752256631851196, "rewards/rejected": -4.078015327453613, "step": 410 }, { "epoch": 0.8688824914943732, "grad_norm": 55.42101250348971, "learning_rate": 2.5328246937043525e-08, "logits/chosen": -0.3292819857597351, "logits/rejected": 0.0064947158098220825, "logps/chosen": -3.1560277938842773, "logps/rejected": -4.08798360824585, "loss": 1.1358, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.1560277938842773, "rewards/margins": 0.9319561123847961, "rewards/rejected": -4.08798360824585, "step": 415 }, { "epoch": 0.8793509552473174, "grad_norm": 67.75077864144087, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -0.36769989132881165, "logits/rejected": -0.17563530802726746, "logps/chosen": -3.0870118141174316, "logps/rejected": -3.987121105194092, "loss": 1.2481, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -3.0870118141174316, "rewards/margins": 0.9001096487045288, "rewards/rejected": -3.987121105194092, "step": 420 }, { "epoch": 0.8898194190002617, "grad_norm": 73.97322762411653, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -0.3043641448020935, "logits/rejected": -0.044486187398433685, "logps/chosen": -3.249401569366455, "logps/rejected": -4.1566572189331055, "loss": 1.1883, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.249401569366455, "rewards/margins": 0.9072558283805847, "rewards/rejected": -4.1566572189331055, "step": 425 }, { "epoch": 0.9002878827532059, "grad_norm": 62.93268497369251, "learning_rate": 1.4662207078575684e-08, "logits/chosen": -0.3604608178138733, "logits/rejected": -0.05810839682817459, "logps/chosen": -3.14021635055542, "logps/rejected": -4.223752975463867, "loss": 1.1337, "rewards/accuracies": 0.78125, "rewards/chosen": -3.14021635055542, "rewards/margins": 1.0835367441177368, "rewards/rejected": -4.223752975463867, "step": 430 }, { "epoch": 0.9107563465061502, "grad_norm": 72.41742830338748, "learning_rate": 1.1731874863145142e-08, "logits/chosen": -0.3878116011619568, "logits/rejected": -0.25898757576942444, "logps/chosen": -3.085711717605591, "logps/rejected": -4.0059380531311035, "loss": 1.1752, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -3.085711717605591, "rewards/margins": 0.9202262163162231, "rewards/rejected": -4.0059380531311035, "step": 435 }, { "epoch": 0.9212248102590945, "grad_norm": 67.55543823572617, "learning_rate": 9.12094829893642e-09, "logits/chosen": -0.26049160957336426, "logits/rejected": -0.0604906901717186, "logps/chosen": -3.136883497238159, "logps/rejected": -4.199965000152588, "loss": 1.1765, "rewards/accuracies": 0.75, "rewards/chosen": -3.136883497238159, "rewards/margins": 1.0630815029144287, "rewards/rejected": -4.199965000152588, "step": 440 }, { "epoch": 0.9316932740120387, "grad_norm": 68.54556826267816, "learning_rate": 6.832927412229017e-09, "logits/chosen": -0.3159419894218445, "logits/rejected": -0.0612739734351635, "logps/chosen": -3.3173446655273438, "logps/rejected": -4.260361194610596, "loss": 1.1964, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.3173446655273438, "rewards/margins": 0.9430168271064758, "rewards/rejected": -4.260361194610596, "step": 445 }, { "epoch": 0.942161737764983, "grad_norm": 67.1017202252172, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -0.2727358341217041, "logits/rejected": -0.0900549367070198, "logps/chosen": -3.1065354347229004, "logps/rejected": -4.069243431091309, "loss": 1.1456, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.1065354347229004, "rewards/margins": 0.9627076983451843, "rewards/rejected": -4.069243431091309, "step": 450 }, { "epoch": 0.9526302015179272, "grad_norm": 74.36090301118229, "learning_rate": 3.2374343405217884e-09, "logits/chosen": -0.2751519978046417, "logits/rejected": 0.001943744719028473, "logps/chosen": -3.195307493209839, "logps/rejected": -4.041089057922363, "loss": 1.2193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.195307493209839, "rewards/margins": 0.8457815051078796, "rewards/rejected": -4.041089057922363, "step": 455 }, { "epoch": 0.9630986652708715, "grad_norm": 63.718344779873874, "learning_rate": 1.9347820230782295e-09, "logits/chosen": -0.1701783835887909, "logits/rejected": 0.05854835361242294, "logps/chosen": -3.1413817405700684, "logps/rejected": -4.193324565887451, "loss": 1.1655, "rewards/accuracies": 0.78125, "rewards/chosen": -3.1413817405700684, "rewards/margins": 1.0519429445266724, "rewards/rejected": -4.193324565887451, "step": 460 }, { "epoch": 0.9735671290238157, "grad_norm": 66.29809600281665, "learning_rate": 9.64668657069706e-10, "logits/chosen": -0.3206351101398468, "logits/rejected": 0.049534112215042114, "logps/chosen": -3.1361141204833984, "logps/rejected": -4.281296730041504, "loss": 1.1251, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1361141204833984, "rewards/margins": 1.1451822519302368, "rewards/rejected": -4.281296730041504, "step": 465 }, { "epoch": 0.98403559277676, "grad_norm": 74.93911331685558, "learning_rate": 3.2839470889836627e-10, "logits/chosen": -0.34328070282936096, "logits/rejected": -0.11956997960805893, "logps/chosen": -3.241725206375122, "logps/rejected": -4.065664291381836, "loss": 1.2111, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -3.241725206375122, "rewards/margins": 0.8239390254020691, "rewards/rejected": -4.065664291381836, "step": 470 }, { "epoch": 0.9945040565297043, "grad_norm": 69.55111369299213, "learning_rate": 2.6813123097352287e-11, "logits/chosen": -0.4041665494441986, "logits/rejected": 0.02163061499595642, "logps/chosen": -3.192171573638916, "logps/rejected": -4.112965106964111, "loss": 1.2172, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -3.192171573638916, "rewards/margins": 0.9207934141159058, "rewards/rejected": -4.112965106964111, "step": 475 }, { "epoch": 0.998691442030882, "step": 477, "total_flos": 0.0, "train_loss": 1.3340201452843048, "train_runtime": 7646.4301, "train_samples_per_second": 7.995, "train_steps_per_second": 0.062 } ], "logging_steps": 5, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }