|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1589, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0031466331025802393, |
|
"grad_norm": 8.804184913635254, |
|
"learning_rate": 1.257861635220126e-06, |
|
"logits/chosen": -0.11013289541006088, |
|
"logits/rejected": -0.5208367109298706, |
|
"logps/chosen": -0.8537980914115906, |
|
"logps/rejected": -1.0550096035003662, |
|
"loss": 24.9985, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -5.359128408599645e-06, |
|
"rewards/margins": 1.545622944831848e-05, |
|
"rewards/rejected": -2.081535967590753e-05, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0062932662051604785, |
|
"grad_norm": 18.678768157958984, |
|
"learning_rate": 2.2641509433962266e-06, |
|
"logits/chosen": -0.3030635714530945, |
|
"logits/rejected": -0.5435053706169128, |
|
"logps/chosen": -0.9865642786026001, |
|
"logps/rejected": -1.107262372970581, |
|
"loss": 24.9967, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.00010868474782910198, |
|
"rewards/margins": 3.348257814650424e-05, |
|
"rewards/rejected": -0.00014216733688954264, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009439899307740718, |
|
"grad_norm": 11.281435012817383, |
|
"learning_rate": 3.5220125786163524e-06, |
|
"logits/chosen": -0.5111545324325562, |
|
"logits/rejected": -0.8536307215690613, |
|
"logps/chosen": -1.0305876731872559, |
|
"logps/rejected": -1.2494089603424072, |
|
"loss": 24.9847, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.00031705142464488745, |
|
"rewards/margins": 0.000152838954818435, |
|
"rewards/rejected": -0.00046989036491140723, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012586532410320957, |
|
"grad_norm": 59.5455322265625, |
|
"learning_rate": 4.528301886792453e-06, |
|
"logits/chosen": -0.616014838218689, |
|
"logits/rejected": -0.6851056218147278, |
|
"logps/chosen": -1.130916953086853, |
|
"logps/rejected": -1.46986985206604, |
|
"loss": 24.9645, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0012707245768979192, |
|
"rewards/margins": 0.0003901523014064878, |
|
"rewards/rejected": -0.0016608769074082375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015733165512901194, |
|
"grad_norm": 9.532500267028809, |
|
"learning_rate": 5.786163522012579e-06, |
|
"logits/chosen": -0.12423186004161835, |
|
"logits/rejected": -0.4599896967411041, |
|
"logps/chosen": -0.8485546112060547, |
|
"logps/rejected": -1.0018525123596191, |
|
"loss": 24.9267, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.001064571551978588, |
|
"rewards/margins": 0.0007417487213388085, |
|
"rewards/rejected": -0.0018063202733173966, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018879798615481436, |
|
"grad_norm": 11.064372062683105, |
|
"learning_rate": 7.044025157232705e-06, |
|
"logits/chosen": -0.1580429971218109, |
|
"logits/rejected": -0.38266992568969727, |
|
"logps/chosen": -0.8662201166152954, |
|
"logps/rejected": -1.0262982845306396, |
|
"loss": 24.8872, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0021042851731181145, |
|
"rewards/margins": 0.0011668736115098, |
|
"rewards/rejected": -0.0032711587846279144, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.022026431718061675, |
|
"grad_norm": 37.646690368652344, |
|
"learning_rate": 8.301886792452832e-06, |
|
"logits/chosen": 0.026471847668290138, |
|
"logits/rejected": -0.49966034293174744, |
|
"logps/chosen": -0.8883110880851746, |
|
"logps/rejected": -1.199055790901184, |
|
"loss": 24.6732, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.003758195089176297, |
|
"rewards/margins": 0.0034271504264324903, |
|
"rewards/rejected": -0.0071853450499475, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.025173064820641914, |
|
"grad_norm": 34.88091278076172, |
|
"learning_rate": 9.559748427672956e-06, |
|
"logits/chosen": -0.36181551218032837, |
|
"logits/rejected": -0.6659843325614929, |
|
"logps/chosen": -0.9565097689628601, |
|
"logps/rejected": -1.183980941772461, |
|
"loss": 24.6032, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.007488996721804142, |
|
"rewards/margins": 0.004166100639849901, |
|
"rewards/rejected": -0.011655096895992756, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.028319697923222153, |
|
"grad_norm": 20.635541915893555, |
|
"learning_rate": 1.0817610062893083e-05, |
|
"logits/chosen": -0.5469181537628174, |
|
"logits/rejected": -0.7580572366714478, |
|
"logps/chosen": -1.0930149555206299, |
|
"logps/rejected": -1.2114075422286987, |
|
"loss": 24.7536, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0149534298107028, |
|
"rewards/margins": 0.002928597154095769, |
|
"rewards/rejected": -0.017882030457258224, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03146633102580239, |
|
"grad_norm": 36.45863723754883, |
|
"learning_rate": 1.2075471698113209e-05, |
|
"logits/chosen": -0.5085287094116211, |
|
"logits/rejected": -0.7208930253982544, |
|
"logps/chosen": -1.083438515663147, |
|
"logps/rejected": -1.3884985446929932, |
|
"loss": 23.9964, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.018525470048189163, |
|
"rewards/margins": 0.011683688499033451, |
|
"rewards/rejected": -0.03020915761590004, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.034612964128382634, |
|
"grad_norm": 45.08958053588867, |
|
"learning_rate": 1.3081761006289308e-05, |
|
"logits/chosen": -0.6235328912734985, |
|
"logits/rejected": -0.8463523983955383, |
|
"logps/chosen": -1.1567853689193726, |
|
"logps/rejected": -2.149567127227783, |
|
"loss": 23.8291, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.028398022055625916, |
|
"rewards/margins": 0.03046615794301033, |
|
"rewards/rejected": -0.05886417627334595, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03775959723096287, |
|
"grad_norm": 48.65830612182617, |
|
"learning_rate": 1.408805031446541e-05, |
|
"logits/chosen": -0.6318017244338989, |
|
"logits/rejected": -0.9939996600151062, |
|
"logps/chosen": -1.7119739055633545, |
|
"logps/rejected": -2.3402199745178223, |
|
"loss": 23.5592, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.060618169605731964, |
|
"rewards/margins": 0.028203105553984642, |
|
"rewards/rejected": -0.08882127702236176, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04090623033354311, |
|
"grad_norm": 138.4451904296875, |
|
"learning_rate": 1.5345911949685536e-05, |
|
"logits/chosen": -0.9717090725898743, |
|
"logits/rejected": -1.1959871053695679, |
|
"logps/chosen": -1.9082473516464233, |
|
"logps/rejected": -2.449486494064331, |
|
"loss": 22.7524, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08517814427614212, |
|
"rewards/margins": 0.03464372828602791, |
|
"rewards/rejected": -0.11982186883687973, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04405286343612335, |
|
"grad_norm": 49.257568359375, |
|
"learning_rate": 1.6603773584905664e-05, |
|
"logits/chosen": -0.7433441281318665, |
|
"logits/rejected": -1.0399134159088135, |
|
"logps/chosen": -2.255545139312744, |
|
"logps/rejected": -2.98321533203125, |
|
"loss": 23.4113, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11126575618982315, |
|
"rewards/margins": 0.04789165034890175, |
|
"rewards/rejected": -0.159157395362854, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04719949653870359, |
|
"grad_norm": 56.168670654296875, |
|
"learning_rate": 1.7861635220125788e-05, |
|
"logits/chosen": -1.0234445333480835, |
|
"logits/rejected": -1.288999080657959, |
|
"logps/chosen": -1.653058648109436, |
|
"logps/rejected": -2.370941162109375, |
|
"loss": 22.181, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08025868237018585, |
|
"rewards/margins": 0.044546954333782196, |
|
"rewards/rejected": -0.12480561435222626, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05034612964128383, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"logits/chosen": -1.1835613250732422, |
|
"logits/rejected": -1.4036767482757568, |
|
"logps/chosen": -1.90883469581604, |
|
"logps/rejected": -2.1450889110565186, |
|
"loss": 25.8869, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.09280741214752197, |
|
"rewards/margins": 0.021463319659233093, |
|
"rewards/rejected": -0.11427073180675507, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05349276274386407, |
|
"grad_norm": 103.32862091064453, |
|
"learning_rate": 2.0125786163522016e-05, |
|
"logits/chosen": -1.539898157119751, |
|
"logits/rejected": -1.6518356800079346, |
|
"logps/chosen": -2.0776684284210205, |
|
"logps/rejected": -2.5599067211151123, |
|
"loss": 24.1212, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12567706406116486, |
|
"rewards/margins": 0.026400262489914894, |
|
"rewards/rejected": -0.152077317237854, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.056639395846444306, |
|
"grad_norm": 176.14784240722656, |
|
"learning_rate": 2.138364779874214e-05, |
|
"logits/chosen": -1.3818124532699585, |
|
"logits/rejected": -1.5843524932861328, |
|
"logps/chosen": -2.48514461517334, |
|
"logps/rejected": -2.8115108013153076, |
|
"loss": 26.3518, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.15766306221485138, |
|
"rewards/margins": 0.025023411959409714, |
|
"rewards/rejected": -0.1826864778995514, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.059786028949024544, |
|
"grad_norm": 106.15309143066406, |
|
"learning_rate": 2.2641509433962265e-05, |
|
"logits/chosen": -1.5876004695892334, |
|
"logits/rejected": -1.7525005340576172, |
|
"logps/chosen": -2.2529654502868652, |
|
"logps/rejected": -3.2197937965393066, |
|
"loss": 20.8074, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14453086256980896, |
|
"rewards/margins": 0.07421146333217621, |
|
"rewards/rejected": -0.21874232590198517, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06293266205160478, |
|
"grad_norm": 91.7872085571289, |
|
"learning_rate": 2.3899371069182393e-05, |
|
"logits/chosen": -1.6880552768707275, |
|
"logits/rejected": -1.667741060256958, |
|
"logps/chosen": -3.5453040599823, |
|
"logps/rejected": -3.8808798789978027, |
|
"loss": 24.6555, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.2320992648601532, |
|
"rewards/margins": 0.020265836268663406, |
|
"rewards/rejected": -0.2523651123046875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06607929515418502, |
|
"grad_norm": 778.8959350585938, |
|
"learning_rate": 2.4905660377358492e-05, |
|
"logits/chosen": -1.8318984508514404, |
|
"logits/rejected": -1.8932411670684814, |
|
"logps/chosen": -3.125164031982422, |
|
"logps/rejected": -4.746774673461914, |
|
"loss": 27.3293, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.19288301467895508, |
|
"rewards/margins": 0.096275694668293, |
|
"rewards/rejected": -0.28915873169898987, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06922592825676527, |
|
"grad_norm": 132.40354919433594, |
|
"learning_rate": 2.6163522012578617e-05, |
|
"logits/chosen": -1.7445348501205444, |
|
"logits/rejected": -1.902320146560669, |
|
"logps/chosen": -1.9325546026229858, |
|
"logps/rejected": -3.3019511699676514, |
|
"loss": 21.7317, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10555150359869003, |
|
"rewards/margins": 0.07426220178604126, |
|
"rewards/rejected": -0.1798136979341507, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0723725613593455, |
|
"grad_norm": 98.48819732666016, |
|
"learning_rate": 2.742138364779874e-05, |
|
"logits/chosen": -1.7994951009750366, |
|
"logits/rejected": -1.9057296514511108, |
|
"logps/chosen": -2.1663613319396973, |
|
"logps/rejected": -2.82452392578125, |
|
"loss": 22.7429, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1297454833984375, |
|
"rewards/margins": 0.04077299311757088, |
|
"rewards/rejected": -0.17051845788955688, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.07551919446192575, |
|
"grad_norm": 93.13198852539062, |
|
"learning_rate": 2.867924528301887e-05, |
|
"logits/chosen": -1.6606374979019165, |
|
"logits/rejected": -1.7864787578582764, |
|
"logps/chosen": -2.2936453819274902, |
|
"logps/rejected": -2.5704400539398193, |
|
"loss": 24.0989, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12692785263061523, |
|
"rewards/margins": 0.020071204751729965, |
|
"rewards/rejected": -0.1469990611076355, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07866582756450598, |
|
"grad_norm": 101.10535430908203, |
|
"learning_rate": 2.968553459119497e-05, |
|
"logits/chosen": -1.648816704750061, |
|
"logits/rejected": -1.6658546924591064, |
|
"logps/chosen": -2.0479884147644043, |
|
"logps/rejected": -2.8278560638427734, |
|
"loss": 27.9983, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11854568868875504, |
|
"rewards/margins": 0.0439017117023468, |
|
"rewards/rejected": -0.16244739294052124, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08181246066708622, |
|
"grad_norm": 92.93740844726562, |
|
"learning_rate": 3.09433962264151e-05, |
|
"logits/chosen": -1.7306410074234009, |
|
"logits/rejected": -1.8349339962005615, |
|
"logps/chosen": -2.082920551300049, |
|
"logps/rejected": -3.115952253341675, |
|
"loss": 23.5299, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11979808658361435, |
|
"rewards/margins": 0.06730449199676514, |
|
"rewards/rejected": -0.18710258603096008, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08495909376966645, |
|
"grad_norm": 123.31324768066406, |
|
"learning_rate": 3.220125786163522e-05, |
|
"logits/chosen": -1.8235572576522827, |
|
"logits/rejected": -1.8541405200958252, |
|
"logps/chosen": -1.9667946100234985, |
|
"logps/rejected": -2.772089958190918, |
|
"loss": 22.6137, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10746946185827255, |
|
"rewards/margins": 0.04866869002580643, |
|
"rewards/rejected": -0.15613815188407898, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0881057268722467, |
|
"grad_norm": 126.42218017578125, |
|
"learning_rate": 3.345911949685535e-05, |
|
"logits/chosen": -1.674515962600708, |
|
"logits/rejected": -1.8894662857055664, |
|
"logps/chosen": -2.245245933532715, |
|
"logps/rejected": -3.0301966667175293, |
|
"loss": 22.6984, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11901465803384781, |
|
"rewards/margins": 0.049767203629016876, |
|
"rewards/rejected": -0.16878187656402588, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09125235997482693, |
|
"grad_norm": 114.61023712158203, |
|
"learning_rate": 3.471698113207548e-05, |
|
"logits/chosen": -1.7905619144439697, |
|
"logits/rejected": -1.8821656703948975, |
|
"logps/chosen": -3.373708724975586, |
|
"logps/rejected": -4.691153526306152, |
|
"loss": 22.2353, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18414758145809174, |
|
"rewards/margins": 0.0776277631521225, |
|
"rewards/rejected": -0.26177531480789185, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.09439899307740718, |
|
"grad_norm": 296.22955322265625, |
|
"learning_rate": 3.59748427672956e-05, |
|
"logits/chosen": -1.654166579246521, |
|
"logits/rejected": -1.847495436668396, |
|
"logps/chosen": -3.2497410774230957, |
|
"logps/rejected": -4.303386688232422, |
|
"loss": 20.9992, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22493870556354523, |
|
"rewards/margins": 0.07029641419649124, |
|
"rewards/rejected": -0.2952350974082947, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09754562617998741, |
|
"grad_norm": 579.211669921875, |
|
"learning_rate": 3.7232704402515726e-05, |
|
"logits/chosen": -1.6689754724502563, |
|
"logits/rejected": -1.7173473834991455, |
|
"logps/chosen": -3.7695910930633545, |
|
"logps/rejected": -4.783900260925293, |
|
"loss": 25.2195, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2695242762565613, |
|
"rewards/margins": 0.05760473012924194, |
|
"rewards/rejected": -0.3271290063858032, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.10069225928256766, |
|
"grad_norm": 200.13609313964844, |
|
"learning_rate": 3.8490566037735854e-05, |
|
"logits/chosen": -1.7428325414657593, |
|
"logits/rejected": -1.74752938747406, |
|
"logps/chosen": -3.6156649589538574, |
|
"logps/rejected": -4.805546760559082, |
|
"loss": 22.7118, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2704419791698456, |
|
"rewards/margins": 0.06444540619850159, |
|
"rewards/rejected": -0.33488741517066956, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10383889238514789, |
|
"grad_norm": 172.51622009277344, |
|
"learning_rate": 3.9748427672955975e-05, |
|
"logits/chosen": -1.7474384307861328, |
|
"logits/rejected": -1.7428706884384155, |
|
"logps/chosen": -3.276729106903076, |
|
"logps/rejected": -4.082120418548584, |
|
"loss": 22.3077, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2311684638261795, |
|
"rewards/margins": 0.051828593015670776, |
|
"rewards/rejected": -0.2829970717430115, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.10698552548772813, |
|
"grad_norm": 146.08352661132812, |
|
"learning_rate": 3.9999227773220194e-05, |
|
"logits/chosen": -1.6052366495132446, |
|
"logits/rejected": -1.6235520839691162, |
|
"logps/chosen": -3.030139207839966, |
|
"logps/rejected": -4.707204818725586, |
|
"loss": 20.0014, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.21569938957691193, |
|
"rewards/margins": 0.12310032546520233, |
|
"rewards/rejected": -0.33879974484443665, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11013215859030837, |
|
"grad_norm": 133.93203735351562, |
|
"learning_rate": 3.9996090704130684e-05, |
|
"logits/chosen": -1.7021839618682861, |
|
"logits/rejected": -1.7295335531234741, |
|
"logps/chosen": -3.9147982597351074, |
|
"logps/rejected": -5.332208633422852, |
|
"loss": 20.047, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3039882779121399, |
|
"rewards/margins": 0.1180083155632019, |
|
"rewards/rejected": -0.4219965934753418, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.11327879169288861, |
|
"grad_norm": 558.7332763671875, |
|
"learning_rate": 3.999054090678532e-05, |
|
"logits/chosen": -1.5368597507476807, |
|
"logits/rejected": -1.592637300491333, |
|
"logps/chosen": -6.026860237121582, |
|
"logps/rejected": -6.550711631774902, |
|
"loss": 29.6933, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4695097804069519, |
|
"rewards/margins": 0.02213056944310665, |
|
"rewards/rejected": -0.4916403889656067, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11642542479546884, |
|
"grad_norm": 212.05760192871094, |
|
"learning_rate": 3.9982579050822615e-05, |
|
"logits/chosen": -1.5933212041854858, |
|
"logits/rejected": -1.5753694772720337, |
|
"logps/chosen": -4.716382026672363, |
|
"logps/rejected": -5.257371425628662, |
|
"loss": 27.5815, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3512773811817169, |
|
"rewards/margins": 0.033867720514535904, |
|
"rewards/rejected": -0.3851450979709625, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.11957205789804909, |
|
"grad_norm": 134.0122833251953, |
|
"learning_rate": 3.997220609692011e-05, |
|
"logits/chosen": -1.6495559215545654, |
|
"logits/rejected": -1.6725133657455444, |
|
"logps/chosen": -3.984989643096924, |
|
"logps/rejected": -5.001562595367432, |
|
"loss": 22.6766, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2919956147670746, |
|
"rewards/margins": 0.05422482639551163, |
|
"rewards/rejected": -0.3462204337120056, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12271869100062932, |
|
"grad_norm": 151.4617919921875, |
|
"learning_rate": 3.9959423296678384e-05, |
|
"logits/chosen": -1.7128961086273193, |
|
"logits/rejected": -1.6318174600601196, |
|
"logps/chosen": -3.3435721397399902, |
|
"logps/rejected": -4.078289985656738, |
|
"loss": 25.0994, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.23941469192504883, |
|
"rewards/margins": 0.04007013887166977, |
|
"rewards/rejected": -0.2794848084449768, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.12586532410320955, |
|
"grad_norm": 115.02042388916016, |
|
"learning_rate": 3.9944232192470094e-05, |
|
"logits/chosen": -1.7137172222137451, |
|
"logits/rejected": -1.7910420894622803, |
|
"logps/chosen": -3.106358051300049, |
|
"logps/rejected": -3.97111439704895, |
|
"loss": 21.6293, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2061949521303177, |
|
"rewards/margins": 0.04795767739415169, |
|
"rewards/rejected": -0.2541525959968567, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1290119572057898, |
|
"grad_norm": 81.87369537353516, |
|
"learning_rate": 3.992663461725383e-05, |
|
"logits/chosen": -1.5431886911392212, |
|
"logits/rejected": -1.557018518447876, |
|
"logps/chosen": -2.805392026901245, |
|
"logps/rejected": -4.356006622314453, |
|
"loss": 21.8817, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1901206076145172, |
|
"rewards/margins": 0.0818587988615036, |
|
"rewards/rejected": -0.2719793915748596, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.13215859030837004, |
|
"grad_norm": 188.29173278808594, |
|
"learning_rate": 3.990663269435298e-05, |
|
"logits/chosen": -1.6920125484466553, |
|
"logits/rejected": -1.6854931116104126, |
|
"logps/chosen": -3.156735897064209, |
|
"logps/rejected": -4.396471977233887, |
|
"loss": 27.5638, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.20159177482128143, |
|
"rewards/margins": 0.07304342836141586, |
|
"rewards/rejected": -0.2746351957321167, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13530522341095028, |
|
"grad_norm": 141.69142150878906, |
|
"learning_rate": 3.98842288371995e-05, |
|
"logits/chosen": -1.6487762928009033, |
|
"logits/rejected": -1.7372210025787354, |
|
"logps/chosen": -2.6156325340270996, |
|
"logps/rejected": -3.677928924560547, |
|
"loss": 21.5613, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1552310734987259, |
|
"rewards/margins": 0.0671745091676712, |
|
"rewards/rejected": -0.2224055826663971, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.13845185651353054, |
|
"grad_norm": 92.31954193115234, |
|
"learning_rate": 3.985942574904275e-05, |
|
"logits/chosen": -1.677199363708496, |
|
"logits/rejected": -1.6414434909820557, |
|
"logps/chosen": -2.499932050704956, |
|
"logps/rejected": -3.3010895252227783, |
|
"loss": 22.2151, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.16245323419570923, |
|
"rewards/margins": 0.05558500811457634, |
|
"rewards/rejected": -0.21803824603557587, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14159848961611077, |
|
"grad_norm": 106.32303619384766, |
|
"learning_rate": 3.983222642262329e-05, |
|
"logits/chosen": -1.6422779560089111, |
|
"logits/rejected": -1.6500838994979858, |
|
"logps/chosen": -2.66230845451355, |
|
"logps/rejected": -3.7150185108184814, |
|
"loss": 20.2102, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.18372972309589386, |
|
"rewards/margins": 0.08325181156396866, |
|
"rewards/rejected": -0.2669815421104431, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.144745122718691, |
|
"grad_norm": 113.5155029296875, |
|
"learning_rate": 3.980263413981178e-05, |
|
"logits/chosen": -1.5669496059417725, |
|
"logits/rejected": -1.5747731924057007, |
|
"logps/chosen": -3.1706671714782715, |
|
"logps/rejected": -3.948005199432373, |
|
"loss": 21.8852, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2336561679840088, |
|
"rewards/margins": 0.06708776950836182, |
|
"rewards/rejected": -0.300743967294693, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14789175582127123, |
|
"grad_norm": 99.03396606445312, |
|
"learning_rate": 3.977065247121298e-05, |
|
"logits/chosen": -1.639129400253296, |
|
"logits/rejected": -1.6693006753921509, |
|
"logps/chosen": -3.2495856285095215, |
|
"logps/rejected": -4.634251594543457, |
|
"loss": 22.4292, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.23980684578418732, |
|
"rewards/margins": 0.10619230568408966, |
|
"rewards/rejected": -0.345999151468277, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1510383889238515, |
|
"grad_norm": 254.25714111328125, |
|
"learning_rate": 3.973628527573495e-05, |
|
"logits/chosen": -1.4451357126235962, |
|
"logits/rejected": -1.415290355682373, |
|
"logps/chosen": -4.496035575866699, |
|
"logps/rejected": -5.4740800857543945, |
|
"loss": 24.0697, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3446175158023834, |
|
"rewards/margins": 0.07305804640054703, |
|
"rewards/rejected": -0.41767558455467224, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15418502202643172, |
|
"grad_norm": 98.8416976928711, |
|
"learning_rate": 3.969953670012342e-05, |
|
"logits/chosen": -1.6127903461456299, |
|
"logits/rejected": -1.529802918434143, |
|
"logps/chosen": -3.744677782058716, |
|
"logps/rejected": -5.76874303817749, |
|
"loss": 20.2498, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2814852297306061, |
|
"rewards/margins": 0.12259259074926376, |
|
"rewards/rejected": -0.40407782793045044, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.15733165512901195, |
|
"grad_norm": 174.65185546875, |
|
"learning_rate": 3.9660411178461427e-05, |
|
"logits/chosen": -1.6170070171356201, |
|
"logits/rejected": -1.5994997024536133, |
|
"logps/chosen": -3.390500545501709, |
|
"logps/rejected": -4.377715587615967, |
|
"loss": 22.3596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2286950647830963, |
|
"rewards/margins": 0.07162559777498245, |
|
"rewards/rejected": -0.30032065510749817, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1604782882315922, |
|
"grad_norm": 98.30694580078125, |
|
"learning_rate": 3.9618913431634326e-05, |
|
"logits/chosen": -1.5248662233352661, |
|
"logits/rejected": -1.570233702659607, |
|
"logps/chosen": -2.914156436920166, |
|
"logps/rejected": -3.4877963066101074, |
|
"loss": 21.8392, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17873355746269226, |
|
"rewards/margins": 0.04700728505849838, |
|
"rewards/rejected": -0.22574086487293243, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.16362492133417245, |
|
"grad_norm": 108.39894104003906, |
|
"learning_rate": 3.957504846676015e-05, |
|
"logits/chosen": -1.5246005058288574, |
|
"logits/rejected": -1.6037238836288452, |
|
"logps/chosen": -3.113523006439209, |
|
"logps/rejected": -4.024534702301025, |
|
"loss": 21.9178, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.21206626296043396, |
|
"rewards/margins": 0.06214412301778793, |
|
"rewards/rejected": -0.2742103934288025, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16677155443675268, |
|
"grad_norm": 122.53215026855469, |
|
"learning_rate": 3.952882157658545e-05, |
|
"logits/chosen": -1.4534975290298462, |
|
"logits/rejected": -1.4294064044952393, |
|
"logps/chosen": -3.44130277633667, |
|
"logps/rejected": -3.7570698261260986, |
|
"loss": 25.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2504199147224426, |
|
"rewards/margins": 0.021822316572070122, |
|
"rewards/rejected": -0.2722422182559967, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1699181875393329, |
|
"grad_norm": 117.72502899169922, |
|
"learning_rate": 3.948023833884667e-05, |
|
"logits/chosen": -1.596609354019165, |
|
"logits/rejected": -1.6202917098999023, |
|
"logps/chosen": -3.7515816688537598, |
|
"logps/rejected": -3.9420647621154785, |
|
"loss": 25.1709, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.23414090275764465, |
|
"rewards/margins": 0.027978042140603065, |
|
"rewards/rejected": -0.26211896538734436, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17306482064191314, |
|
"grad_norm": 84.38936614990234, |
|
"learning_rate": 3.942930461559718e-05, |
|
"logits/chosen": -1.5714600086212158, |
|
"logits/rejected": -1.683579683303833, |
|
"logps/chosen": -3.3148865699768066, |
|
"logps/rejected": -3.7648849487304688, |
|
"loss": 24.1859, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2219853699207306, |
|
"rewards/margins": 0.02847103402018547, |
|
"rewards/rejected": -0.25045639276504517, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1762114537444934, |
|
"grad_norm": 122.990478515625, |
|
"learning_rate": 3.9376026552499894e-05, |
|
"logits/chosen": -1.5986852645874023, |
|
"logits/rejected": -1.6811764240264893, |
|
"logps/chosen": -3.261617660522461, |
|
"logps/rejected": -4.3173418045043945, |
|
"loss": 19.8872, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.22893181443214417, |
|
"rewards/margins": 0.0762997642159462, |
|
"rewards/rejected": -0.30523157119750977, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17935808684707363, |
|
"grad_norm": 128.1126251220703, |
|
"learning_rate": 3.9320410578085774e-05, |
|
"logits/chosen": -1.5240422487258911, |
|
"logits/rejected": -1.5410079956054688, |
|
"logps/chosen": -3.7498767375946045, |
|
"logps/rejected": -4.466190338134766, |
|
"loss": 22.8035, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2702712118625641, |
|
"rewards/margins": 0.0467303991317749, |
|
"rewards/rejected": -0.3170016407966614, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.18250471994965387, |
|
"grad_norm": 160.00189208984375, |
|
"learning_rate": 3.9262463402978165e-05, |
|
"logits/chosen": -1.413119912147522, |
|
"logits/rejected": -1.3633155822753906, |
|
"logps/chosen": -3.8721237182617188, |
|
"logps/rejected": -5.0125298500061035, |
|
"loss": 22.2056, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3019005358219147, |
|
"rewards/margins": 0.08287017047405243, |
|
"rewards/rejected": -0.3847707211971283, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1856513530522341, |
|
"grad_norm": 168.05908203125, |
|
"learning_rate": 3.920219201908306e-05, |
|
"logits/chosen": -1.2270746231079102, |
|
"logits/rejected": -1.2809008359909058, |
|
"logps/chosen": -4.052460670471191, |
|
"logps/rejected": -5.228961944580078, |
|
"loss": 21.1427, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3124389052391052, |
|
"rewards/margins": 0.0833948403596878, |
|
"rewards/rejected": -0.3958337903022766, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.18879798615481436, |
|
"grad_norm": 94.47506713867188, |
|
"learning_rate": 3.9139603698745514e-05, |
|
"logits/chosen": -1.1681110858917236, |
|
"logits/rejected": -1.2372829914093018, |
|
"logps/chosen": -3.511944532394409, |
|
"logps/rejected": -4.100220680236816, |
|
"loss": 22.7025, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.24873590469360352, |
|
"rewards/margins": 0.03639525547623634, |
|
"rewards/rejected": -0.28513115644454956, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1919446192573946, |
|
"grad_norm": 560.2835083007812, |
|
"learning_rate": 3.907470599387209e-05, |
|
"logits/chosen": -1.101466178894043, |
|
"logits/rejected": -1.0982881784439087, |
|
"logps/chosen": -3.0287392139434814, |
|
"logps/rejected": -3.3412985801696777, |
|
"loss": 25.2732, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.210398867726326, |
|
"rewards/margins": 0.023965148255228996, |
|
"rewards/rejected": -0.23436403274536133, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.19509125235997482, |
|
"grad_norm": 190.03529357910156, |
|
"learning_rate": 3.900750673501971e-05, |
|
"logits/chosen": -0.8078586459159851, |
|
"logits/rejected": -1.0514795780181885, |
|
"logps/chosen": -2.391371250152588, |
|
"logps/rejected": -3.401437282562256, |
|
"loss": 21.6721, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14814777672290802, |
|
"rewards/margins": 0.07396493852138519, |
|
"rewards/rejected": -0.22211270034313202, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19823788546255505, |
|
"grad_norm": 127.30278778076172, |
|
"learning_rate": 3.893801403045078e-05, |
|
"logits/chosen": -0.9948938488960266, |
|
"logits/rejected": -1.1343729496002197, |
|
"logps/chosen": -2.520848274230957, |
|
"logps/rejected": -3.695737838745117, |
|
"loss": 21.1395, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16981211304664612, |
|
"rewards/margins": 0.08695949614048004, |
|
"rewards/rejected": -0.25677159428596497, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2013845185651353, |
|
"grad_norm": 164.39279174804688, |
|
"learning_rate": 3.8866236265154864e-05, |
|
"logits/chosen": -1.059020757675171, |
|
"logits/rejected": -1.1909369230270386, |
|
"logps/chosen": -3.2958297729492188, |
|
"logps/rejected": -4.60178279876709, |
|
"loss": 23.1263, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.24349625408649445, |
|
"rewards/margins": 0.08941353857517242, |
|
"rewards/rejected": -0.33290979266166687, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.20453115166771554, |
|
"grad_norm": 317.5319519042969, |
|
"learning_rate": 3.8792182099836956e-05, |
|
"logits/chosen": -1.1690977811813354, |
|
"logits/rejected": -1.221868872642517, |
|
"logps/chosen": -3.4916579723358154, |
|
"logps/rejected": -4.967286109924316, |
|
"loss": 19.5685, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2618243992328644, |
|
"rewards/margins": 0.09256020933389664, |
|
"rewards/rejected": -0.35438457131385803, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.20767778477029578, |
|
"grad_norm": 113.65757751464844, |
|
"learning_rate": 3.8715860469872456e-05, |
|
"logits/chosen": -1.230567216873169, |
|
"logits/rejected": -1.2354533672332764, |
|
"logps/chosen": -4.1219682693481445, |
|
"logps/rejected": -5.140664577484131, |
|
"loss": 24.1262, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3101332485675812, |
|
"rewards/margins": 0.07826542854309082, |
|
"rewards/rejected": -0.3883987069129944, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21082441787287604, |
|
"grad_norm": 103.66908264160156, |
|
"learning_rate": 3.863728058422905e-05, |
|
"logits/chosen": -1.1679656505584717, |
|
"logits/rejected": -1.2492824792861938, |
|
"logps/chosen": -4.176590442657471, |
|
"logps/rejected": -5.121442794799805, |
|
"loss": 21.9799, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3107621371746063, |
|
"rewards/margins": 0.07555123418569565, |
|
"rewards/rejected": -0.38631340861320496, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.21397105097545627, |
|
"grad_norm": 187.34596252441406, |
|
"learning_rate": 3.855645192435555e-05, |
|
"logits/chosen": -1.4208840131759644, |
|
"logits/rejected": -1.357755422592163, |
|
"logps/chosen": -3.746802568435669, |
|
"logps/rejected": -4.651678562164307, |
|
"loss": 21.8739, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2814106345176697, |
|
"rewards/margins": 0.06742358207702637, |
|
"rewards/rejected": -0.34883421659469604, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2171176840780365, |
|
"grad_norm": 128.47970581054688, |
|
"learning_rate": 3.847338424303787e-05, |
|
"logits/chosen": -1.403939962387085, |
|
"logits/rejected": -1.3926942348480225, |
|
"logps/chosen": -3.540362596511841, |
|
"logps/rejected": -4.463648796081543, |
|
"loss": 22.9837, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2591942250728607, |
|
"rewards/margins": 0.06667342782020569, |
|
"rewards/rejected": -0.3258676528930664, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.22026431718061673, |
|
"grad_norm": 91.00343322753906, |
|
"learning_rate": 3.838808756322222e-05, |
|
"logits/chosen": -1.4555909633636475, |
|
"logits/rejected": -1.4179480075836182, |
|
"logps/chosen": -3.3319029808044434, |
|
"logps/rejected": -4.7188615798950195, |
|
"loss": 22.182, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.24019880592823029, |
|
"rewards/margins": 0.09150617569684982, |
|
"rewards/rejected": -0.3317049741744995, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.223410950283197, |
|
"grad_norm": 89.21013641357422, |
|
"learning_rate": 3.8300572176805796e-05, |
|
"logits/chosen": -1.505953073501587, |
|
"logits/rejected": -1.4713289737701416, |
|
"logps/chosen": -3.2633144855499268, |
|
"logps/rejected": -4.148341655731201, |
|
"loss": 22.4622, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.23655016720294952, |
|
"rewards/margins": 0.04711543396115303, |
|
"rewards/rejected": -0.28366559743881226, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.22655758338577722, |
|
"grad_norm": 136.71780395507812, |
|
"learning_rate": 3.82108486433949e-05, |
|
"logits/chosen": -1.4959208965301514, |
|
"logits/rejected": -1.4308115243911743, |
|
"logps/chosen": -3.161681652069092, |
|
"logps/rejected": -3.9897868633270264, |
|
"loss": 23.3097, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2291373759508133, |
|
"rewards/margins": 0.045841820538043976, |
|
"rewards/rejected": -0.2749791741371155, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22970421648835745, |
|
"grad_norm": 233.93896484375, |
|
"learning_rate": 3.8118927789030854e-05, |
|
"logits/chosen": -1.5138304233551025, |
|
"logits/rejected": -1.5346544981002808, |
|
"logps/chosen": -4.37386417388916, |
|
"logps/rejected": -5.469226837158203, |
|
"loss": 20.9319, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3008665442466736, |
|
"rewards/margins": 0.07115120440721512, |
|
"rewards/rejected": -0.3720177412033081, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2328508495909377, |
|
"grad_norm": 100.57418060302734, |
|
"learning_rate": 3.802482070488373e-05, |
|
"logits/chosen": -1.3890790939331055, |
|
"logits/rejected": -1.4434179067611694, |
|
"logps/chosen": -3.4095160961151123, |
|
"logps/rejected": -4.254734039306641, |
|
"loss": 21.2175, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.24669814109802246, |
|
"rewards/margins": 0.06480761617422104, |
|
"rewards/rejected": -0.3115057349205017, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.23599748269351795, |
|
"grad_norm": 194.1370391845703, |
|
"learning_rate": 3.792853874591408e-05, |
|
"logits/chosen": -1.5562362670898438, |
|
"logits/rejected": -1.4487522840499878, |
|
"logps/chosen": -3.45831561088562, |
|
"logps/rejected": -4.16960334777832, |
|
"loss": 24.8363, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.23216786980628967, |
|
"rewards/margins": 0.047336287796497345, |
|
"rewards/rejected": -0.2795041799545288, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.23914411579609818, |
|
"grad_norm": 88.31356811523438, |
|
"learning_rate": 3.783009352950282e-05, |
|
"logits/chosen": -1.371385097503662, |
|
"logits/rejected": -1.373175859451294, |
|
"logps/chosen": -2.55993390083313, |
|
"logps/rejected": -3.111349105834961, |
|
"loss": 22.3814, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.17149809002876282, |
|
"rewards/margins": 0.04337615519762039, |
|
"rewards/rejected": -0.214874267578125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2422907488986784, |
|
"grad_norm": 126.74950408935547, |
|
"learning_rate": 3.772949693404954e-05, |
|
"logits/chosen": -1.33748459815979, |
|
"logits/rejected": -1.3754979372024536, |
|
"logps/chosen": -2.633439064025879, |
|
"logps/rejected": -3.534024715423584, |
|
"loss": 20.4661, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17298361659049988, |
|
"rewards/margins": 0.07067564874887466, |
|
"rewards/rejected": -0.24365928769111633, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.24543738200125864, |
|
"grad_norm": 90.40318298339844, |
|
"learning_rate": 3.762676109753919e-05, |
|
"logits/chosen": -1.2709859609603882, |
|
"logits/rejected": -1.294306755065918, |
|
"logps/chosen": -3.954099655151367, |
|
"logps/rejected": -5.9721527099609375, |
|
"loss": 21.932, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.29533275961875916, |
|
"rewards/margins": 0.12940457463264465, |
|
"rewards/rejected": -0.4247373640537262, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2485840151038389, |
|
"grad_norm": 84.59414672851562, |
|
"learning_rate": 3.7521898416077565e-05, |
|
"logits/chosen": -1.4984506368637085, |
|
"logits/rejected": -1.5229644775390625, |
|
"logps/chosen": -4.4091901779174805, |
|
"logps/rejected": -5.3940815925598145, |
|
"loss": 21.5859, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3109613358974457, |
|
"rewards/margins": 0.08055521547794342, |
|
"rewards/rejected": -0.3915165364742279, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2517306482064191, |
|
"grad_norm": 120.28202056884766, |
|
"learning_rate": 3.7414921542395546e-05, |
|
"logits/chosen": -1.5182693004608154, |
|
"logits/rejected": -1.5193490982055664, |
|
"logps/chosen": -4.545083045959473, |
|
"logps/rejected": -5.492725372314453, |
|
"loss": 21.539, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.36579376459121704, |
|
"rewards/margins": 0.06641928851604462, |
|
"rewards/rejected": -0.4322130084037781, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2548772813089994, |
|
"grad_norm": 143.28396606445312, |
|
"learning_rate": 3.7305843384322466e-05, |
|
"logits/chosen": -1.5114035606384277, |
|
"logits/rejected": -1.5092270374298096, |
|
"logps/chosen": -5.28603982925415, |
|
"logps/rejected": -6.232533931732178, |
|
"loss": 21.4891, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.43935927748680115, |
|
"rewards/margins": 0.08039890229701996, |
|
"rewards/rejected": -0.5197581648826599, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2580239144115796, |
|
"grad_norm": 129.09864807128906, |
|
"learning_rate": 3.71946771032286e-05, |
|
"logits/chosen": -1.6940416097640991, |
|
"logits/rejected": -1.6389005184173584, |
|
"logps/chosen": -5.122313022613525, |
|
"logps/rejected": -6.010600566864014, |
|
"loss": 21.8681, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.42212480306625366, |
|
"rewards/margins": 0.076592817902565, |
|
"rewards/rejected": -0.49871763586997986, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.26117054751415986, |
|
"grad_norm": 1118.02392578125, |
|
"learning_rate": 3.708143611243716e-05, |
|
"logits/chosen": -1.65127432346344, |
|
"logits/rejected": -1.6758639812469482, |
|
"logps/chosen": -5.203777313232422, |
|
"logps/rejected": -6.3162078857421875, |
|
"loss": 21.2512, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.37822961807250977, |
|
"rewards/margins": 0.09629149734973907, |
|
"rewards/rejected": -0.4745211601257324, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2643171806167401, |
|
"grad_norm": 109.98821258544922, |
|
"learning_rate": 3.696613407560582e-05, |
|
"logits/chosen": -1.6237115859985352, |
|
"logits/rejected": -1.5712984800338745, |
|
"logps/chosen": -4.632975101470947, |
|
"logps/rejected": -6.082078456878662, |
|
"loss": 20.9477, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3740273118019104, |
|
"rewards/margins": 0.103847935795784, |
|
"rewards/rejected": -0.4778752326965332, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2674638137193203, |
|
"grad_norm": 95.2988052368164, |
|
"learning_rate": 3.684878490507808e-05, |
|
"logits/chosen": -1.5806386470794678, |
|
"logits/rejected": -1.6192169189453125, |
|
"logps/chosen": -4.849827766418457, |
|
"logps/rejected": -5.800168037414551, |
|
"loss": 23.5806, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3749791085720062, |
|
"rewards/margins": 0.07270670682191849, |
|
"rewards/rejected": -0.4476858079433441, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.27061044682190055, |
|
"grad_norm": 111.99176788330078, |
|
"learning_rate": 3.6729402760204535e-05, |
|
"logits/chosen": -1.6522388458251953, |
|
"logits/rejected": -1.6433773040771484, |
|
"logps/chosen": -3.4129672050476074, |
|
"logps/rejected": -4.362156867980957, |
|
"loss": 21.9253, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.25616371631622314, |
|
"rewards/margins": 0.07649270445108414, |
|
"rewards/rejected": -0.3326564431190491, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2737570799244808, |
|
"grad_norm": 219.88124084472656, |
|
"learning_rate": 3.6608002045634535e-05, |
|
"logits/chosen": -1.7825971841812134, |
|
"logits/rejected": -1.6959110498428345, |
|
"logps/chosen": -3.785250425338745, |
|
"logps/rejected": -4.989777565002441, |
|
"loss": 22.1928, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.26640018820762634, |
|
"rewards/margins": 0.07046084105968475, |
|
"rewards/rejected": -0.3368610143661499, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.27690371302706107, |
|
"grad_norm": 110.93528747558594, |
|
"learning_rate": 3.6484597409577975e-05, |
|
"logits/chosen": -1.8389028310775757, |
|
"logits/rejected": -1.7533693313598633, |
|
"logps/chosen": -3.4091110229492188, |
|
"logps/rejected": -4.324118614196777, |
|
"loss": 21.2394, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.25699272751808167, |
|
"rewards/margins": 0.06507135927677155, |
|
"rewards/rejected": -0.322064071893692, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2800503461296413, |
|
"grad_norm": 128.312255859375, |
|
"learning_rate": 3.6359203742037966e-05, |
|
"logits/chosen": -1.8402115106582642, |
|
"logits/rejected": -1.7344493865966797, |
|
"logps/chosen": -4.041749000549316, |
|
"logps/rejected": -4.417330741882324, |
|
"loss": 22.7853, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2921445965766907, |
|
"rewards/margins": 0.04909106716513634, |
|
"rewards/rejected": -0.3412356376647949, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.28319697923222154, |
|
"grad_norm": 121.12706756591797, |
|
"learning_rate": 3.623183617301411e-05, |
|
"logits/chosen": -1.7311460971832275, |
|
"logits/rejected": -1.7096502780914307, |
|
"logps/chosen": -3.8819706439971924, |
|
"logps/rejected": -4.670237064361572, |
|
"loss": 22.6275, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.30139902234077454, |
|
"rewards/margins": 0.05851779133081436, |
|
"rewards/rejected": -0.3599168360233307, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.28634361233480177, |
|
"grad_norm": 93.03048706054688, |
|
"learning_rate": 3.610251007067699e-05, |
|
"logits/chosen": -1.836363434791565, |
|
"logits/rejected": -1.736104965209961, |
|
"logps/chosen": -4.1447577476501465, |
|
"logps/rejected": -4.325010299682617, |
|
"loss": 26.2728, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.32724231481552124, |
|
"rewards/margins": 0.010385597124695778, |
|
"rewards/rejected": -0.33762794733047485, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.289490245437382, |
|
"grad_norm": 76.58390808105469, |
|
"learning_rate": 3.597124103951379e-05, |
|
"logits/chosen": -1.7278220653533936, |
|
"logits/rejected": -1.7181174755096436, |
|
"logps/chosen": -4.0262017250061035, |
|
"logps/rejected": -4.855641841888428, |
|
"loss": 22.3804, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2886626124382019, |
|
"rewards/margins": 0.06016182899475098, |
|
"rewards/rejected": -0.3488244414329529, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.29263687853996223, |
|
"grad_norm": 80.33660888671875, |
|
"learning_rate": 3.583804491844551e-05, |
|
"logits/chosen": -1.8658571243286133, |
|
"logits/rejected": -1.7413606643676758, |
|
"logps/chosen": -3.758129835128784, |
|
"logps/rejected": -4.306906223297119, |
|
"loss": 26.088, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2766272723674774, |
|
"rewards/margins": 0.03810672461986542, |
|
"rewards/rejected": -0.31473398208618164, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.29578351164254246, |
|
"grad_norm": 66.17215728759766, |
|
"learning_rate": 3.5702937778915765e-05, |
|
"logits/chosen": -1.8694692850112915, |
|
"logits/rejected": -1.82939875125885, |
|
"logps/chosen": -2.9322712421417236, |
|
"logps/rejected": -3.7157013416290283, |
|
"loss": 21.7852, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2061152458190918, |
|
"rewards/margins": 0.056372471153736115, |
|
"rewards/rejected": -0.2624877095222473, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2989301447451227, |
|
"grad_norm": 95.2267837524414, |
|
"learning_rate": 3.556593592295171e-05, |
|
"logits/chosen": -1.8632274866104126, |
|
"logits/rejected": -1.8683363199234009, |
|
"logps/chosen": -2.8304595947265625, |
|
"logps/rejected": -3.464296817779541, |
|
"loss": 22.1458, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.19707690179347992, |
|
"rewards/margins": 0.04870922490954399, |
|
"rewards/rejected": -0.24578611552715302, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.302076777847703, |
|
"grad_norm": 128.1005096435547, |
|
"learning_rate": 3.5427055881196946e-05, |
|
"logits/chosen": -1.7504918575286865, |
|
"logits/rejected": -1.8846075534820557, |
|
"logps/chosen": -2.7551674842834473, |
|
"logps/rejected": -3.501314163208008, |
|
"loss": 21.4037, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.192325159907341, |
|
"rewards/margins": 0.05459358170628548, |
|
"rewards/rejected": -0.2469187080860138, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3052234109502832, |
|
"grad_norm": 64.81920623779297, |
|
"learning_rate": 3.5286314410916967e-05, |
|
"logits/chosen": -1.8015562295913696, |
|
"logits/rejected": -1.9157085418701172, |
|
"logps/chosen": -3.297150135040283, |
|
"logps/rejected": -4.347265243530273, |
|
"loss": 20.2599, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24196556210517883, |
|
"rewards/margins": 0.06687469035387039, |
|
"rewards/rejected": -0.30884024500846863, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.30837004405286345, |
|
"grad_norm": 121.4966812133789, |
|
"learning_rate": 3.5143728493977245e-05, |
|
"logits/chosen": -1.7404873371124268, |
|
"logits/rejected": -1.8498218059539795, |
|
"logps/chosen": -3.553678035736084, |
|
"logps/rejected": -4.084536075592041, |
|
"loss": 24.4702, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2725631594657898, |
|
"rewards/margins": 0.037132084369659424, |
|
"rewards/rejected": -0.3096952736377716, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3115166771554437, |
|
"grad_norm": 102.46180725097656, |
|
"learning_rate": 3.499931533479417e-05, |
|
"logits/chosen": -1.7682313919067383, |
|
"logits/rejected": -1.7660820484161377, |
|
"logps/chosen": -3.595475435256958, |
|
"logps/rejected": -4.801576137542725, |
|
"loss": 20.9722, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2746976315975189, |
|
"rewards/margins": 0.10004003345966339, |
|
"rewards/rejected": -0.3747376501560211, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3146633102580239, |
|
"grad_norm": 100.82923889160156, |
|
"learning_rate": 3.485309235825916e-05, |
|
"logits/chosen": -1.7638380527496338, |
|
"logits/rejected": -1.857962965965271, |
|
"logps/chosen": -4.1785569190979, |
|
"logps/rejected": -5.445678234100342, |
|
"loss": 20.121, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.30823373794555664, |
|
"rewards/margins": 0.09736496210098267, |
|
"rewards/rejected": -0.4055987298488617, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31780994336060414, |
|
"grad_norm": 299.635009765625, |
|
"learning_rate": 3.470507720763625e-05, |
|
"logits/chosen": -1.7603092193603516, |
|
"logits/rejected": -1.8294856548309326, |
|
"logps/chosen": -3.818953037261963, |
|
"logps/rejected": -4.965951442718506, |
|
"loss": 24.0421, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2867090702056885, |
|
"rewards/margins": 0.09908684343099594, |
|
"rewards/rejected": -0.385795921087265, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3209565764631844, |
|
"grad_norm": 121.77188110351562, |
|
"learning_rate": 3.4555287742433115e-05, |
|
"logits/chosen": -1.8968608379364014, |
|
"logits/rejected": -1.863628625869751, |
|
"logps/chosen": -3.3851046562194824, |
|
"logps/rejected": -4.313992500305176, |
|
"loss": 21.5651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2504531145095825, |
|
"rewards/margins": 0.07505444437265396, |
|
"rewards/rejected": -0.3255075514316559, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3241032095657646, |
|
"grad_norm": 84.7723617553711, |
|
"learning_rate": 3.440374203624628e-05, |
|
"logits/chosen": -1.8949018716812134, |
|
"logits/rejected": -2.03389573097229, |
|
"logps/chosen": -3.739046573638916, |
|
"logps/rejected": -4.937285423278809, |
|
"loss": 22.0895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2827950417995453, |
|
"rewards/margins": 0.07987246662378311, |
|
"rewards/rejected": -0.3626675605773926, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.3272498426683449, |
|
"grad_norm": 96.02967071533203, |
|
"learning_rate": 3.425045837458028e-05, |
|
"logits/chosen": -1.9336235523223877, |
|
"logits/rejected": -1.9811556339263916, |
|
"logps/chosen": -3.5748794078826904, |
|
"logps/rejected": -4.64247465133667, |
|
"loss": 20.7454, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2698992192745209, |
|
"rewards/margins": 0.07278282940387726, |
|
"rewards/rejected": -0.3426820635795593, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3303964757709251, |
|
"grad_norm": 138.71051025390625, |
|
"learning_rate": 3.4095455252641376e-05, |
|
"logits/chosen": -1.938104271888733, |
|
"logits/rejected": -2.024137020111084, |
|
"logps/chosen": -4.332060813903809, |
|
"logps/rejected": -5.391437530517578, |
|
"loss": 23.3511, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3168641924858093, |
|
"rewards/margins": 0.049729883670806885, |
|
"rewards/rejected": -0.3665940761566162, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.33354310887350536, |
|
"grad_norm": 93.8726577758789, |
|
"learning_rate": 3.393875137310588e-05, |
|
"logits/chosen": -1.8752260208129883, |
|
"logits/rejected": -1.8945411443710327, |
|
"logps/chosen": -4.053868770599365, |
|
"logps/rejected": -5.044325828552246, |
|
"loss": 21.8528, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3227534890174866, |
|
"rewards/margins": 0.0821223258972168, |
|
"rewards/rejected": -0.4048757553100586, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3366897419760856, |
|
"grad_norm": 261.39129638671875, |
|
"learning_rate": 3.378036564386349e-05, |
|
"logits/chosen": -1.770957589149475, |
|
"logits/rejected": -1.8808790445327759, |
|
"logps/chosen": -3.8808326721191406, |
|
"logps/rejected": -4.960693836212158, |
|
"loss": 23.7267, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3041539788246155, |
|
"rewards/margins": 0.08733677119016647, |
|
"rewards/rejected": -0.39149072766304016, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.3398363750786658, |
|
"grad_norm": 141.79991149902344, |
|
"learning_rate": 3.3620317175735945e-05, |
|
"logits/chosen": -1.929517149925232, |
|
"logits/rejected": -1.8599262237548828, |
|
"logps/chosen": -4.427219867706299, |
|
"logps/rejected": -5.757664680480957, |
|
"loss": 20.8591, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3481447994709015, |
|
"rewards/margins": 0.0858476310968399, |
|
"rewards/rejected": -0.4339924454689026, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.34298300818124605, |
|
"grad_norm": 76.495361328125, |
|
"learning_rate": 3.345862528017101e-05, |
|
"logits/chosen": -1.8648240566253662, |
|
"logits/rejected": -1.899430513381958, |
|
"logps/chosen": -4.430551528930664, |
|
"logps/rejected": -5.134209156036377, |
|
"loss": 21.6823, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3582889139652252, |
|
"rewards/margins": 0.05610053986310959, |
|
"rewards/rejected": -0.4143894612789154, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3461296412838263, |
|
"grad_norm": 65.95896911621094, |
|
"learning_rate": 3.32953094669124e-05, |
|
"logits/chosen": -1.6951459646224976, |
|
"logits/rejected": -1.7398831844329834, |
|
"logps/chosen": -5.35291051864624, |
|
"logps/rejected": -6.347973823547363, |
|
"loss": 24.8551, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4343182146549225, |
|
"rewards/margins": 0.085027314722538, |
|
"rewards/rejected": -0.5193454623222351, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.34927627438640657, |
|
"grad_norm": 64.50738525390625, |
|
"learning_rate": 3.313038944164577e-05, |
|
"logits/chosen": -1.7779582738876343, |
|
"logits/rejected": -1.8077032566070557, |
|
"logps/chosen": -4.008457183837891, |
|
"logps/rejected": -5.838412761688232, |
|
"loss": 19.2472, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3185553550720215, |
|
"rewards/margins": 0.10776933282613754, |
|
"rewards/rejected": -0.4263246953487396, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3524229074889868, |
|
"grad_norm": 62.579227447509766, |
|
"learning_rate": 3.296388510362095e-05, |
|
"logits/chosen": -1.5932537317276, |
|
"logits/rejected": -1.7019790410995483, |
|
"logps/chosen": -4.049741268157959, |
|
"logps/rejected": -4.859818935394287, |
|
"loss": 21.4107, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.29325228929519653, |
|
"rewards/margins": 0.06688085943460464, |
|
"rewards/rejected": -0.36013317108154297, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.35556954059156703, |
|
"grad_norm": 105.9216079711914, |
|
"learning_rate": 3.2795816543250977e-05, |
|
"logits/chosen": -1.5411794185638428, |
|
"logits/rejected": -1.5789968967437744, |
|
"logps/chosen": -3.8824076652526855, |
|
"logps/rejected": -4.560225486755371, |
|
"loss": 23.1195, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2929798662662506, |
|
"rewards/margins": 0.05188722163438797, |
|
"rewards/rejected": -0.34486711025238037, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.35871617369414727, |
|
"grad_norm": 55.46923065185547, |
|
"learning_rate": 3.262620403968792e-05, |
|
"logits/chosen": -1.5855820178985596, |
|
"logits/rejected": -1.7370961904525757, |
|
"logps/chosen": -3.6918272972106934, |
|
"logps/rejected": -5.205948352813721, |
|
"loss": 19.1367, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.27848196029663086, |
|
"rewards/margins": 0.11322972923517227, |
|
"rewards/rejected": -0.3917117416858673, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3618628067967275, |
|
"grad_norm": 114.82603454589844, |
|
"learning_rate": 3.245506805837605e-05, |
|
"logits/chosen": -1.6395822763442993, |
|
"logits/rejected": -1.8543764352798462, |
|
"logps/chosen": -4.298351287841797, |
|
"logps/rejected": -5.546226501464844, |
|
"loss": 19.9406, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30993199348449707, |
|
"rewards/margins": 0.08511951565742493, |
|
"rewards/rejected": -0.3950514793395996, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.36500943989930773, |
|
"grad_norm": 174.55496215820312, |
|
"learning_rate": 3.228242924858248e-05, |
|
"logits/chosen": -1.5872471332550049, |
|
"logits/rejected": -1.688132882118225, |
|
"logps/chosen": -4.568819999694824, |
|
"logps/rejected": -5.411607265472412, |
|
"loss": 22.4314, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.34597450494766235, |
|
"rewards/margins": 0.07728902995586395, |
|
"rewards/rejected": -0.4232635498046875, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.36815607300188796, |
|
"grad_norm": 70.5542221069336, |
|
"learning_rate": 3.210830844090555e-05, |
|
"logits/chosen": -1.6192104816436768, |
|
"logits/rejected": -1.6785539388656616, |
|
"logps/chosen": -5.1252007484436035, |
|
"logps/rejected": -5.851187705993652, |
|
"loss": 25.8619, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.40460100769996643, |
|
"rewards/margins": 0.06072293594479561, |
|
"rewards/rejected": -0.46532392501831055, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.3713027061044682, |
|
"grad_norm": 100.62268829345703, |
|
"learning_rate": 3.193272664476152e-05, |
|
"logits/chosen": -1.7602649927139282, |
|
"logits/rejected": -1.9346716403961182, |
|
"logps/chosen": -4.961272239685059, |
|
"logps/rejected": -5.8130645751953125, |
|
"loss": 22.8852, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3982604444026947, |
|
"rewards/margins": 0.059664536267519, |
|
"rewards/rejected": -0.457925021648407, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3744493392070485, |
|
"grad_norm": 411.0801696777344, |
|
"learning_rate": 3.1755705045849465e-05, |
|
"logits/chosen": -1.7633399963378906, |
|
"logits/rejected": -1.818737268447876, |
|
"logps/chosen": -5.510100364685059, |
|
"logps/rejected": -6.382575035095215, |
|
"loss": 23.8471, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4236491620540619, |
|
"rewards/margins": 0.06903719902038574, |
|
"rewards/rejected": -0.49268636107444763, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.3775959723096287, |
|
"grad_norm": 98.035888671875, |
|
"learning_rate": 3.157726500359509e-05, |
|
"logits/chosen": -1.825554609298706, |
|
"logits/rejected": -1.907472014427185, |
|
"logps/chosen": -5.569567680358887, |
|
"logps/rejected": -6.1025004386901855, |
|
"loss": 24.087, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.4460601210594177, |
|
"rewards/margins": 0.03472483158111572, |
|
"rewards/rejected": -0.48078498244285583, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.38074260541220895, |
|
"grad_norm": 80.47187805175781, |
|
"learning_rate": 3.1397428048573465e-05, |
|
"logits/chosen": -1.798015832901001, |
|
"logits/rejected": -1.9216489791870117, |
|
"logps/chosen": -4.644695281982422, |
|
"logps/rejected": -5.7896294593811035, |
|
"loss": 19.835, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.380901575088501, |
|
"rewards/margins": 0.08407244086265564, |
|
"rewards/rejected": -0.4649740159511566, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.3838892385147892, |
|
"grad_norm": 65.88395690917969, |
|
"learning_rate": 3.121621587991113e-05, |
|
"logits/chosen": -1.9489303827285767, |
|
"logits/rejected": -1.9782030582427979, |
|
"logps/chosen": -4.736275672912598, |
|
"logps/rejected": -5.893181800842285, |
|
"loss": 21.2523, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.37491172552108765, |
|
"rewards/margins": 0.09068160504102707, |
|
"rewards/rejected": -0.46559327840805054, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3870358716173694, |
|
"grad_norm": 126.57975769042969, |
|
"learning_rate": 3.1033650362667935e-05, |
|
"logits/chosen": -1.945927619934082, |
|
"logits/rejected": -2.0246009826660156, |
|
"logps/chosen": -4.42104434967041, |
|
"logps/rejected": -5.623631000518799, |
|
"loss": 20.477, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3376965820789337, |
|
"rewards/margins": 0.07996558398008347, |
|
"rewards/rejected": -0.41766220331192017, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.39018250471994964, |
|
"grad_norm": 88.92438507080078, |
|
"learning_rate": 3.084975352519874e-05, |
|
"logits/chosen": -2.063378095626831, |
|
"logits/rejected": -2.161208391189575, |
|
"logps/chosen": -4.2682085037231445, |
|
"logps/rejected": -5.291066646575928, |
|
"loss": 22.2295, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3386593759059906, |
|
"rewards/margins": 0.07158732414245605, |
|
"rewards/rejected": -0.41024675965309143, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3933291378225299, |
|
"grad_norm": 53.47737503051758, |
|
"learning_rate": 3.06645475564955e-05, |
|
"logits/chosen": -1.9409205913543701, |
|
"logits/rejected": -2.0371243953704834, |
|
"logps/chosen": -3.6241352558135986, |
|
"logps/rejected": -5.033164978027344, |
|
"loss": 20.5698, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.27396219968795776, |
|
"rewards/margins": 0.09085332602262497, |
|
"rewards/rejected": -0.36481553316116333, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3964757709251101, |
|
"grad_norm": 87.2447738647461, |
|
"learning_rate": 3.0478054803509975e-05, |
|
"logits/chosen": -1.9413238763809204, |
|
"logits/rejected": -1.989638328552246, |
|
"logps/chosen": -3.974926710128784, |
|
"logps/rejected": -5.115756034851074, |
|
"loss": 20.8679, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3056657314300537, |
|
"rewards/margins": 0.09486590325832367, |
|
"rewards/rejected": -0.4005316197872162, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3996224040276904, |
|
"grad_norm": 105.37754821777344, |
|
"learning_rate": 3.029029776845726e-05, |
|
"logits/chosen": -1.9769777059555054, |
|
"logits/rejected": -2.0631349086761475, |
|
"logps/chosen": -4.811491012573242, |
|
"logps/rejected": -6.024916648864746, |
|
"loss": 22.3949, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.36858421564102173, |
|
"rewards/margins": 0.09452919661998749, |
|
"rewards/rejected": -0.463113397359848, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4027690371302706, |
|
"grad_norm": 107.63380432128906, |
|
"learning_rate": 3.0101299106100766e-05, |
|
"logits/chosen": -1.9259755611419678, |
|
"logits/rejected": -2.0011420249938965, |
|
"logps/chosen": -4.672276496887207, |
|
"logps/rejected": -5.433979034423828, |
|
"loss": 23.4548, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.34038934111595154, |
|
"rewards/margins": 0.05264373868703842, |
|
"rewards/rejected": -0.39303308725357056, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.40591567023285086, |
|
"grad_norm": 72.93191528320312, |
|
"learning_rate": 2.991108162101862e-05, |
|
"logits/chosen": -1.8639154434204102, |
|
"logits/rejected": -2.00860333442688, |
|
"logps/chosen": -4.0379438400268555, |
|
"logps/rejected": -4.966481685638428, |
|
"loss": 24.2063, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3016512095928192, |
|
"rewards/margins": 0.05989114195108414, |
|
"rewards/rejected": -0.36154234409332275, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4090623033354311, |
|
"grad_norm": 241.30491638183594, |
|
"learning_rate": 2.971966826485212e-05, |
|
"logits/chosen": -2.0276923179626465, |
|
"logits/rejected": -2.075092077255249, |
|
"logps/chosen": -3.9584078788757324, |
|
"logps/rejected": -4.5398454666137695, |
|
"loss": 22.3358, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2686071991920471, |
|
"rewards/margins": 0.05414595082402229, |
|
"rewards/rejected": -0.3227531313896179, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4122089364380113, |
|
"grad_norm": 72.65229797363281, |
|
"learning_rate": 2.952708213353636e-05, |
|
"logits/chosen": -2.087306499481201, |
|
"logits/rejected": -2.120595932006836, |
|
"logps/chosen": -2.7464280128479004, |
|
"logps/rejected": -3.2665913105010986, |
|
"loss": 23.396, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.19495923817157745, |
|
"rewards/margins": 0.03470323234796524, |
|
"rewards/rejected": -0.2296624630689621, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.41535556954059155, |
|
"grad_norm": 36.565982818603516, |
|
"learning_rate": 2.9333346464513476e-05, |
|
"logits/chosen": -2.0568580627441406, |
|
"logits/rejected": -2.171510934829712, |
|
"logps/chosen": -3.1527762413024902, |
|
"logps/rejected": -3.5696024894714355, |
|
"loss": 23.204, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2180822342634201, |
|
"rewards/margins": 0.029619824141263962, |
|
"rewards/rejected": -0.24770204722881317, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4185022026431718, |
|
"grad_norm": 57.84255599975586, |
|
"learning_rate": 2.9138484633928818e-05, |
|
"logits/chosen": -1.940320372581482, |
|
"logits/rejected": -1.9845908880233765, |
|
"logps/chosen": -3.0434772968292236, |
|
"logps/rejected": -3.5398964881896973, |
|
"loss": 24.3501, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2063741683959961, |
|
"rewards/margins": 0.023456847295165062, |
|
"rewards/rejected": -0.2298310250043869, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.42164883574575207, |
|
"grad_norm": 56.995887756347656, |
|
"learning_rate": 2.8942520153810396e-05, |
|
"logits/chosen": -2.0002236366271973, |
|
"logits/rejected": -2.08671498298645, |
|
"logps/chosen": -2.834512710571289, |
|
"logps/rejected": -3.5050129890441895, |
|
"loss": 22.4039, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18090704083442688, |
|
"rewards/margins": 0.04532923549413681, |
|
"rewards/rejected": -0.2262362688779831, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4247954688483323, |
|
"grad_norm": 75.65125274658203, |
|
"learning_rate": 2.8745476669231894e-05, |
|
"logits/chosen": -2.020886182785034, |
|
"logits/rejected": -2.111823558807373, |
|
"logps/chosen": -3.5571112632751465, |
|
"logps/rejected": -4.481097221374512, |
|
"loss": 22.9676, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.22384686768054962, |
|
"rewards/margins": 0.04108366742730141, |
|
"rewards/rejected": -0.2649305462837219, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.42794210195091253, |
|
"grad_norm": 77.30415344238281, |
|
"learning_rate": 2.8547377955459704e-05, |
|
"logits/chosen": -1.9961265325546265, |
|
"logits/rejected": -2.0482177734375, |
|
"logps/chosen": -2.892690658569336, |
|
"logps/rejected": -3.2253260612487793, |
|
"loss": 25.6658, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.19483526051044464, |
|
"rewards/margins": 0.01912742853164673, |
|
"rewards/rejected": -0.21396267414093018, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.43108873505349277, |
|
"grad_norm": 49.21062088012695, |
|
"learning_rate": 2.834824791508413e-05, |
|
"logits/chosen": -1.930086374282837, |
|
"logits/rejected": -2.131298542022705, |
|
"logps/chosen": -2.739534854888916, |
|
"logps/rejected": -3.5602822303771973, |
|
"loss": 21.1908, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.17746233940124512, |
|
"rewards/margins": 0.06554970890283585, |
|
"rewards/rejected": -0.24301204085350037, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.434235368156073, |
|
"grad_norm": 64.88590240478516, |
|
"learning_rate": 2.814811057513537e-05, |
|
"logits/chosen": -2.0517029762268066, |
|
"logits/rejected": -2.067883253097534, |
|
"logps/chosen": -2.82458758354187, |
|
"logps/rejected": -3.6670260429382324, |
|
"loss": 21.8595, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1833667755126953, |
|
"rewards/margins": 0.0560932457447052, |
|
"rewards/rejected": -0.2394600361585617, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.43738200125865323, |
|
"grad_norm": 48.841331481933594, |
|
"learning_rate": 2.7946990084184383e-05, |
|
"logits/chosen": -1.798683524131775, |
|
"logits/rejected": -1.9806129932403564, |
|
"logps/chosen": -3.2995662689208984, |
|
"logps/rejected": -4.0815110206604, |
|
"loss": 22.0918, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2146444320678711, |
|
"rewards/margins": 0.05965212732553482, |
|
"rewards/rejected": -0.27429652214050293, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.44052863436123346, |
|
"grad_norm": 266.59381103515625, |
|
"learning_rate": 2.7744910709429104e-05, |
|
"logits/chosen": -1.800355315208435, |
|
"logits/rejected": -1.9262745380401611, |
|
"logps/chosen": -3.308371067047119, |
|
"logps/rejected": -4.3786821365356445, |
|
"loss": 22.6616, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20500688254833221, |
|
"rewards/margins": 0.07705695927143097, |
|
"rewards/rejected": -0.2820638120174408, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4436752674638137, |
|
"grad_norm": 45.74457550048828, |
|
"learning_rate": 2.754189683376641e-05, |
|
"logits/chosen": -1.8245214223861694, |
|
"logits/rejected": -1.9188095331192017, |
|
"logps/chosen": -2.6574292182922363, |
|
"logps/rejected": -3.3347110748291016, |
|
"loss": 21.6472, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1792256087064743, |
|
"rewards/margins": 0.054762959480285645, |
|
"rewards/rejected": -0.23398856818675995, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.446821900566394, |
|
"grad_norm": 82.67216491699219, |
|
"learning_rate": 2.7337972952850047e-05, |
|
"logits/chosen": -1.764173150062561, |
|
"logits/rejected": -1.9260650873184204, |
|
"logps/chosen": -2.8055293560028076, |
|
"logps/rejected": -3.9603447914123535, |
|
"loss": 21.7022, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.19627173244953156, |
|
"rewards/margins": 0.07794789969921112, |
|
"rewards/rejected": -0.2742196321487427, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4499685336689742, |
|
"grad_norm": 63.396240234375, |
|
"learning_rate": 2.713316367213499e-05, |
|
"logits/chosen": -1.6747219562530518, |
|
"logits/rejected": -1.8347587585449219, |
|
"logps/chosen": -2.9625911712646484, |
|
"logps/rejected": -3.7656357288360596, |
|
"loss": 22.6149, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.21037223935127258, |
|
"rewards/margins": 0.05833571031689644, |
|
"rewards/rejected": -0.26870793104171753, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.45311516677155445, |
|
"grad_norm": 118.00112915039062, |
|
"learning_rate": 2.692749370390855e-05, |
|
"logits/chosen": -1.7990179061889648, |
|
"logits/rejected": -1.8915067911148071, |
|
"logps/chosen": -3.0249316692352295, |
|
"logps/rejected": -4.06134033203125, |
|
"loss": 23.4425, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.21054935455322266, |
|
"rewards/margins": 0.05246324464678764, |
|
"rewards/rejected": -0.2630125880241394, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4562617998741347, |
|
"grad_norm": 64.52631378173828, |
|
"learning_rate": 2.6720987864308603e-05, |
|
"logits/chosen": -1.695908546447754, |
|
"logits/rejected": -1.7583353519439697, |
|
"logps/chosen": -2.815432548522949, |
|
"logps/rejected": -4.123710632324219, |
|
"loss": 21.0095, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1960502415895462, |
|
"rewards/margins": 0.08241166174411774, |
|
"rewards/rejected": -0.27846187353134155, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4594084329767149, |
|
"grad_norm": 59.4410285949707, |
|
"learning_rate": 2.6513671070329244e-05, |
|
"logits/chosen": -1.7788522243499756, |
|
"logits/rejected": -1.8245208263397217, |
|
"logps/chosen": -3.012934446334839, |
|
"logps/rejected": -4.003429412841797, |
|
"loss": 21.1484, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2150099277496338, |
|
"rewards/margins": 0.07829871028661728, |
|
"rewards/rejected": -0.2933086156845093, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.46255506607929514, |
|
"grad_norm": 84.89627075195312, |
|
"learning_rate": 2.630556833681434e-05, |
|
"logits/chosen": -1.738438606262207, |
|
"logits/rejected": -1.8424345254898071, |
|
"logps/chosen": -2.7983458042144775, |
|
"logps/rejected": -4.087245941162109, |
|
"loss": 19.2453, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.19751907885074615, |
|
"rewards/margins": 0.09776587784290314, |
|
"rewards/rejected": -0.2952849566936493, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.4657016991818754, |
|
"grad_norm": 101.38806915283203, |
|
"learning_rate": 2.609670477343921e-05, |
|
"logits/chosen": -1.6957628726959229, |
|
"logits/rejected": -1.825757384300232, |
|
"logps/chosen": -4.030215263366699, |
|
"logps/rejected": -5.008100509643555, |
|
"loss": 22.1478, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.30844345688819885, |
|
"rewards/margins": 0.0614703968167305, |
|
"rewards/rejected": -0.36991381645202637, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.46884833228445566, |
|
"grad_norm": 101.18181610107422, |
|
"learning_rate": 2.5887105581680905e-05, |
|
"logits/chosen": -1.7838348150253296, |
|
"logits/rejected": -1.7674500942230225, |
|
"logps/chosen": -4.438131809234619, |
|
"logps/rejected": -5.542893886566162, |
|
"loss": 23.806, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.36128634214401245, |
|
"rewards/margins": 0.07241909205913544, |
|
"rewards/rejected": -0.43370547890663147, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.4719949653870359, |
|
"grad_norm": 89.2279052734375, |
|
"learning_rate": 2.567679605177739e-05, |
|
"logits/chosen": -1.7873433828353882, |
|
"logits/rejected": -1.831865906715393, |
|
"logps/chosen": -4.315898895263672, |
|
"logps/rejected": -5.43391227722168, |
|
"loss": 20.4258, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.32439109683036804, |
|
"rewards/margins": 0.09124849736690521, |
|
"rewards/rejected": -0.41563957929611206, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4751415984896161, |
|
"grad_norm": 68.27491760253906, |
|
"learning_rate": 2.5465801559676033e-05, |
|
"logits/chosen": -1.716103196144104, |
|
"logits/rejected": -1.744837999343872, |
|
"logps/chosen": -3.913160800933838, |
|
"logps/rejected": -5.709442615509033, |
|
"loss": 19.3215, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.30374833941459656, |
|
"rewards/margins": 0.12692494690418243, |
|
"rewards/rejected": -0.4306732714176178, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.47828823159219636, |
|
"grad_norm": 149.6294708251953, |
|
"learning_rate": 2.525414756397174e-05, |
|
"logits/chosen": -1.7440742254257202, |
|
"logits/rejected": -1.8239097595214844, |
|
"logps/chosen": -3.586292266845703, |
|
"logps/rejected": -4.596356391906738, |
|
"loss": 19.9662, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2702713906764984, |
|
"rewards/margins": 0.08218260109424591, |
|
"rewards/rejected": -0.3524540364742279, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4814348646947766, |
|
"grad_norm": 102.944580078125, |
|
"learning_rate": 2.504185960283512e-05, |
|
"logits/chosen": -1.7996543645858765, |
|
"logits/rejected": -1.8109557628631592, |
|
"logps/chosen": -4.447735786437988, |
|
"logps/rejected": -5.870986461639404, |
|
"loss": 20.4207, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.35062670707702637, |
|
"rewards/margins": 0.09269314259290695, |
|
"rewards/rejected": -0.4433198869228363, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4845814977973568, |
|
"grad_norm": 128.53907775878906, |
|
"learning_rate": 2.482896329093106e-05, |
|
"logits/chosen": -1.9051790237426758, |
|
"logits/rejected": -1.9270706176757812, |
|
"logps/chosen": -5.1721906661987305, |
|
"logps/rejected": -6.744166374206543, |
|
"loss": 19.0615, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4306749701499939, |
|
"rewards/margins": 0.1142655462026596, |
|
"rewards/rejected": -0.5449405312538147, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.48772813089993705, |
|
"grad_norm": 123.44400024414062, |
|
"learning_rate": 2.4615484316328023e-05, |
|
"logits/chosen": -1.8487358093261719, |
|
"logits/rejected": -1.8219711780548096, |
|
"logps/chosen": -5.741638660430908, |
|
"logps/rejected": -7.048303127288818, |
|
"loss": 22.6075, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4748842120170593, |
|
"rewards/margins": 0.09859482944011688, |
|
"rewards/rejected": -0.5734790563583374, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4908747640025173, |
|
"grad_norm": 97.28683471679688, |
|
"learning_rate": 2.440144843739857e-05, |
|
"logits/chosen": -1.8166711330413818, |
|
"logits/rejected": -1.856359839439392, |
|
"logps/chosen": -6.369978904724121, |
|
"logps/rejected": -7.745943546295166, |
|
"loss": 21.1624, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5159797072410583, |
|
"rewards/margins": 0.09467221796512604, |
|
"rewards/rejected": -0.610651969909668, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.49402139710509757, |
|
"grad_norm": 94.76971435546875, |
|
"learning_rate": 2.4186881479711338e-05, |
|
"logits/chosen": -1.8901869058609009, |
|
"logits/rejected": -1.996917724609375, |
|
"logps/chosen": -5.151943206787109, |
|
"logps/rejected": -6.655333518981934, |
|
"loss": 17.5696, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3987768888473511, |
|
"rewards/margins": 0.11971308290958405, |
|
"rewards/rejected": -0.5184900164604187, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4971680302076778, |
|
"grad_norm": 362.07489013671875, |
|
"learning_rate": 2.397180933291491e-05, |
|
"logits/chosen": -1.6789305210113525, |
|
"logits/rejected": -1.75827157497406, |
|
"logps/chosen": -4.5332841873168945, |
|
"logps/rejected": -5.266444206237793, |
|
"loss": 22.7215, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3525087535381317, |
|
"rewards/margins": 0.07219593226909637, |
|
"rewards/rejected": -0.42470473051071167, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.500314663310258, |
|
"grad_norm": 181.0984344482422, |
|
"learning_rate": 2.375625794761401e-05, |
|
"logits/chosen": -1.769201636314392, |
|
"logits/rejected": -1.7219161987304688, |
|
"logps/chosen": -4.633937358856201, |
|
"logps/rejected": -5.043046474456787, |
|
"loss": 26.0541, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3703366816043854, |
|
"rewards/margins": 0.028562629595398903, |
|
"rewards/rejected": -0.3988993167877197, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5034612964128382, |
|
"grad_norm": 120.9494857788086, |
|
"learning_rate": 2.3540253332238266e-05, |
|
"logits/chosen": -1.6151552200317383, |
|
"logits/rejected": -1.646795630455017, |
|
"logps/chosen": -4.029574394226074, |
|
"logps/rejected": -5.215254783630371, |
|
"loss": 20.2479, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.314275324344635, |
|
"rewards/margins": 0.08437344431877136, |
|
"rewards/rejected": -0.39864879846572876, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5066079295154186, |
|
"grad_norm": 119.4858169555664, |
|
"learning_rate": 2.3323821549904038e-05, |
|
"logits/chosen": -1.670577049255371, |
|
"logits/rejected": -1.5533939599990845, |
|
"logps/chosen": -3.9187912940979004, |
|
"logps/rejected": -4.743254661560059, |
|
"loss": 23.6037, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3001677095890045, |
|
"rewards/margins": 0.06169123575091362, |
|
"rewards/rejected": -0.36185896396636963, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5097545626179988, |
|
"grad_norm": 316.2997741699219, |
|
"learning_rate": 2.310698871526966e-05, |
|
"logits/chosen": -1.5207440853118896, |
|
"logits/rejected": -1.6267799139022827, |
|
"logps/chosen": -3.097418785095215, |
|
"logps/rejected": -4.804646015167236, |
|
"loss": 21.8575, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.22499537467956543, |
|
"rewards/margins": 0.11616162210702896, |
|
"rewards/rejected": -0.3411570191383362, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.512901195720579, |
|
"grad_norm": 78.00189971923828, |
|
"learning_rate": 2.288978099138443e-05, |
|
"logits/chosen": -1.5745933055877686, |
|
"logits/rejected": -1.5564606189727783, |
|
"logps/chosen": -2.8804163932800293, |
|
"logps/rejected": -3.5308539867401123, |
|
"loss": 22.241, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.20254115760326385, |
|
"rewards/margins": 0.05405501648783684, |
|
"rewards/rejected": -0.2565961480140686, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5160478288231592, |
|
"grad_norm": 118.51643371582031, |
|
"learning_rate": 2.267222458653179e-05, |
|
"logits/chosen": -1.5091989040374756, |
|
"logits/rejected": -1.6645923852920532, |
|
"logps/chosen": -3.255237579345703, |
|
"logps/rejected": -4.126650333404541, |
|
"loss": 22.0187, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.23314771056175232, |
|
"rewards/margins": 0.06177164986729622, |
|
"rewards/rejected": -0.29491934180259705, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5191944619257395, |
|
"grad_norm": 68.80047607421875, |
|
"learning_rate": 2.245434575106702e-05, |
|
"logits/chosen": -1.525356411933899, |
|
"logits/rejected": -1.701436996459961, |
|
"logps/chosen": -3.166797161102295, |
|
"logps/rejected": -4.742985248565674, |
|
"loss": 20.3686, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2261020839214325, |
|
"rewards/margins": 0.08829782903194427, |
|
"rewards/rejected": -0.3143998980522156, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5223410950283197, |
|
"grad_norm": 73.1375503540039, |
|
"learning_rate": 2.223617077424988e-05, |
|
"logits/chosen": -1.6771663427352905, |
|
"logits/rejected": -1.7121098041534424, |
|
"logps/chosen": -3.020296573638916, |
|
"logps/rejected": -4.426422119140625, |
|
"loss": 20.0836, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21738722920417786, |
|
"rewards/margins": 0.09777109324932098, |
|
"rewards/rejected": -0.31515830755233765, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5254877281309, |
|
"grad_norm": 76.68984985351562, |
|
"learning_rate": 2.2017725981072536e-05, |
|
"logits/chosen": -1.4603363275527954, |
|
"logits/rejected": -1.5595886707305908, |
|
"logps/chosen": -3.6973624229431152, |
|
"logps/rejected": -5.027807712554932, |
|
"loss": 20.512, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2720819115638733, |
|
"rewards/margins": 0.08642515540122986, |
|
"rewards/rejected": -0.35850709676742554, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5286343612334802, |
|
"grad_norm": 122.99668884277344, |
|
"learning_rate": 2.1799037729083213e-05, |
|
"logits/chosen": -1.5949891805648804, |
|
"logits/rejected": -1.7137962579727173, |
|
"logps/chosen": -3.5109829902648926, |
|
"logps/rejected": -4.95348596572876, |
|
"loss": 21.517, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.27030450105667114, |
|
"rewards/margins": 0.09910550713539124, |
|
"rewards/rejected": -0.36940997838974, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5317809943360604, |
|
"grad_norm": 65.23582458496094, |
|
"learning_rate": 2.1580132405205862e-05, |
|
"logits/chosen": -1.4871020317077637, |
|
"logits/rejected": -1.5624678134918213, |
|
"logps/chosen": -4.474881172180176, |
|
"logps/rejected": -5.375269412994385, |
|
"loss": 23.3138, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3389451503753662, |
|
"rewards/margins": 0.06582923233509064, |
|
"rewards/rejected": -0.40477436780929565, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.5349276274386406, |
|
"grad_norm": 175.08432006835938, |
|
"learning_rate": 2.1361036422556337e-05, |
|
"logits/chosen": -1.5353832244873047, |
|
"logits/rejected": -1.596407175064087, |
|
"logps/chosen": -3.814873218536377, |
|
"logps/rejected": -4.92036771774292, |
|
"loss": 21.5442, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2893931567668915, |
|
"rewards/margins": 0.07075894623994827, |
|
"rewards/rejected": -0.36015206575393677, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5380742605412209, |
|
"grad_norm": 64.21197509765625, |
|
"learning_rate": 2.1141776217255365e-05, |
|
"logits/chosen": -1.567317247390747, |
|
"logits/rejected": -1.5555747747421265, |
|
"logps/chosen": -3.8906242847442627, |
|
"logps/rejected": -4.897479057312012, |
|
"loss": 21.8379, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29526472091674805, |
|
"rewards/margins": 0.06354343891143799, |
|
"rewards/rejected": -0.35880815982818604, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.5412208936438011, |
|
"grad_norm": 104.57052612304688, |
|
"learning_rate": 2.0922378245238787e-05, |
|
"logits/chosen": -1.5869696140289307, |
|
"logits/rejected": -1.6049997806549072, |
|
"logps/chosen": -3.8140482902526855, |
|
"logps/rejected": -4.755133628845215, |
|
"loss": 23.1968, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.29255491495132446, |
|
"rewards/margins": 0.052004069089889526, |
|
"rewards/rejected": -0.3445590138435364, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5443675267463813, |
|
"grad_norm": 92.2053451538086, |
|
"learning_rate": 2.070286897906537e-05, |
|
"logits/chosen": -1.602929711341858, |
|
"logits/rejected": -1.6071062088012695, |
|
"logps/chosen": -3.990319013595581, |
|
"logps/rejected": -5.2248215675354, |
|
"loss": 20.3706, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3073904812335968, |
|
"rewards/margins": 0.09087739139795303, |
|
"rewards/rejected": -0.39826786518096924, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.5475141598489616, |
|
"grad_norm": 83.128662109375, |
|
"learning_rate": 2.0483274904722647e-05, |
|
"logits/chosen": -1.7051680088043213, |
|
"logits/rejected": -1.6087182760238647, |
|
"logps/chosen": -3.986027956008911, |
|
"logps/rejected": -4.851881980895996, |
|
"loss": 21.4848, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.30843600630760193, |
|
"rewards/margins": 0.06898938864469528, |
|
"rewards/rejected": -0.3774254322052002, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5506607929515418, |
|
"grad_norm": 62.2298583984375, |
|
"learning_rate": 2.026362251843109e-05, |
|
"logits/chosen": -1.6034513711929321, |
|
"logits/rejected": -1.699464201927185, |
|
"logps/chosen": -3.4193336963653564, |
|
"logps/rejected": -4.403960227966309, |
|
"loss": 21.3108, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2610613703727722, |
|
"rewards/margins": 0.08181565254926682, |
|
"rewards/rejected": -0.34287700057029724, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5538074260541221, |
|
"grad_norm": 88.62437438964844, |
|
"learning_rate": 2.004393832344711e-05, |
|
"logits/chosen": -1.6719697713851929, |
|
"logits/rejected": -1.5851457118988037, |
|
"logps/chosen": -3.8325066566467285, |
|
"logps/rejected": -5.3017473220825195, |
|
"loss": 19.635, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3032756447792053, |
|
"rewards/margins": 0.09231220185756683, |
|
"rewards/rejected": -0.39558783173561096, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5569540591567024, |
|
"grad_norm": 64.06165313720703, |
|
"learning_rate": 1.9824248826865124e-05, |
|
"logits/chosen": -1.5828460454940796, |
|
"logits/rejected": -1.6327168941497803, |
|
"logps/chosen": -4.681789398193359, |
|
"logps/rejected": -6.566616058349609, |
|
"loss": 18.3853, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3667379915714264, |
|
"rewards/margins": 0.12741395831108093, |
|
"rewards/rejected": -0.49415192008018494, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.5601006922592826, |
|
"grad_norm": 204.93890380859375, |
|
"learning_rate": 1.9604580536419254e-05, |
|
"logits/chosen": -1.572584867477417, |
|
"logits/rejected": -1.6088756322860718, |
|
"logps/chosen": -5.441628456115723, |
|
"logps/rejected": -7.085760593414307, |
|
"loss": 24.9097, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.45653265714645386, |
|
"rewards/margins": 0.0925588458776474, |
|
"rewards/rejected": -0.5490914583206177, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5632473253618628, |
|
"grad_norm": 162.79714965820312, |
|
"learning_rate": 1.93849599572849e-05, |
|
"logits/chosen": -1.6288610696792603, |
|
"logits/rejected": -1.6398794651031494, |
|
"logps/chosen": -5.213116645812988, |
|
"logps/rejected": -6.9830803871154785, |
|
"loss": 20.22, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.42777156829833984, |
|
"rewards/margins": 0.12980665266513824, |
|
"rewards/rejected": -0.5575782060623169, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.5663939584644431, |
|
"grad_norm": 75.16659545898438, |
|
"learning_rate": 1.916541358888062e-05, |
|
"logits/chosen": -1.6041675806045532, |
|
"logits/rejected": -1.6970984935760498, |
|
"logps/chosen": -4.644831657409668, |
|
"logps/rejected": -5.80092716217041, |
|
"loss": 20.4964, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.37974128127098083, |
|
"rewards/margins": 0.09219308942556381, |
|
"rewards/rejected": -0.47193440794944763, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5695405915670233, |
|
"grad_norm": 110.90229797363281, |
|
"learning_rate": 1.8945967921670676e-05, |
|
"logits/chosen": -1.619327187538147, |
|
"logits/rejected": -1.6541610956192017, |
|
"logps/chosen": -5.146854400634766, |
|
"logps/rejected": -6.011466026306152, |
|
"loss": 22.4066, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.41492849588394165, |
|
"rewards/margins": 0.07109946012496948, |
|
"rewards/rejected": -0.48602795600891113, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.5726872246696035, |
|
"grad_norm": 139.65293884277344, |
|
"learning_rate": 1.872664943396875e-05, |
|
"logits/chosen": -1.6764265298843384, |
|
"logits/rejected": -1.6785293817520142, |
|
"logps/chosen": -4.107344150543213, |
|
"logps/rejected": -5.6308698654174805, |
|
"loss": 20.0103, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3305855095386505, |
|
"rewards/margins": 0.11647170782089233, |
|
"rewards/rejected": -0.44705715775489807, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5758338577721838, |
|
"grad_norm": 147.52713012695312, |
|
"learning_rate": 1.8507484588743025e-05, |
|
"logits/chosen": -1.7002742290496826, |
|
"logits/rejected": -1.7680556774139404, |
|
"logps/chosen": -4.6784772872924805, |
|
"logps/rejected": -5.973324775695801, |
|
"loss": 21.0769, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3756680190563202, |
|
"rewards/margins": 0.09194694459438324, |
|
"rewards/rejected": -0.4676149785518646, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.578980490874764, |
|
"grad_norm": 71.16407012939453, |
|
"learning_rate": 1.828849983042321e-05, |
|
"logits/chosen": -1.7075554132461548, |
|
"logits/rejected": -1.6953094005584717, |
|
"logps/chosen": -4.460357666015625, |
|
"logps/rejected": -5.521221160888672, |
|
"loss": 21.7677, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.35953736305236816, |
|
"rewards/margins": 0.08199813961982727, |
|
"rewards/rejected": -0.44153547286987305, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5821271239773442, |
|
"grad_norm": 114.27317810058594, |
|
"learning_rate": 1.8069721581709697e-05, |
|
"logits/chosen": -1.6304935216903687, |
|
"logits/rejected": -1.6967551708221436, |
|
"logps/chosen": -4.526963233947754, |
|
"logps/rejected": -5.7123494148254395, |
|
"loss": 21.5069, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.35851508378982544, |
|
"rewards/margins": 0.07467497885227203, |
|
"rewards/rejected": -0.4331900477409363, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5852737570799245, |
|
"grad_norm": 71.74990844726562, |
|
"learning_rate": 1.785117624038546e-05, |
|
"logits/chosen": -1.704414963722229, |
|
"logits/rejected": -1.7506616115570068, |
|
"logps/chosen": -5.388034820556641, |
|
"logps/rejected": -6.3465657234191895, |
|
"loss": 21.8977, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4279704689979553, |
|
"rewards/margins": 0.05819786712527275, |
|
"rewards/rejected": -0.48616838455200195, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5884203901825047, |
|
"grad_norm": 78.14295196533203, |
|
"learning_rate": 1.763289017613085e-05, |
|
"logits/chosen": -1.6152721643447876, |
|
"logits/rejected": -1.640634536743164, |
|
"logps/chosen": -4.3263750076293945, |
|
"logps/rejected": -5.279467582702637, |
|
"loss": 21.887, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.34328222274780273, |
|
"rewards/margins": 0.07140573114156723, |
|
"rewards/rejected": -0.41468796133995056, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.5915670232850849, |
|
"grad_norm": 219.88279724121094, |
|
"learning_rate": 1.741488972734184e-05, |
|
"logits/chosen": -1.5857679843902588, |
|
"logits/rejected": -1.65940260887146, |
|
"logps/chosen": -4.669988632202148, |
|
"logps/rejected": -6.202586650848389, |
|
"loss": 20.5667, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3540535271167755, |
|
"rewards/margins": 0.10686901956796646, |
|
"rewards/rejected": -0.46092256903648376, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5947136563876652, |
|
"grad_norm": 90.00337219238281, |
|
"learning_rate": 1.7197201197952065e-05, |
|
"logits/chosen": -1.5206947326660156, |
|
"logits/rejected": -1.53545343875885, |
|
"logps/chosen": -4.086690902709961, |
|
"logps/rejected": -4.490893363952637, |
|
"loss": 25.9453, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.30406898260116577, |
|
"rewards/margins": 0.034761372953653336, |
|
"rewards/rejected": -0.3388303220272064, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.5978602894902454, |
|
"grad_norm": 79.93099212646484, |
|
"learning_rate": 1.6979850854258938e-05, |
|
"logits/chosen": -1.3608052730560303, |
|
"logits/rejected": -1.4760938882827759, |
|
"logps/chosen": -3.6326985359191895, |
|
"logps/rejected": -5.186118125915527, |
|
"loss": 20.6064, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2708989083766937, |
|
"rewards/margins": 0.10907317698001862, |
|
"rewards/rejected": -0.37997210025787354, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6010069225928257, |
|
"grad_norm": 54.11685562133789, |
|
"learning_rate": 1.6762864921754426e-05, |
|
"logits/chosen": -1.3788961172103882, |
|
"logits/rejected": -1.4954605102539062, |
|
"logps/chosen": -3.189054250717163, |
|
"logps/rejected": -4.365990161895752, |
|
"loss": 20.0193, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.23121857643127441, |
|
"rewards/margins": 0.09906688332557678, |
|
"rewards/rejected": -0.3302854597568512, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.604153555695406, |
|
"grad_norm": 78.23949432373047, |
|
"learning_rate": 1.654626958196059e-05, |
|
"logits/chosen": -1.509225606918335, |
|
"logits/rejected": -1.4755313396453857, |
|
"logps/chosen": -4.190049648284912, |
|
"logps/rejected": -5.553238391876221, |
|
"loss": 18.6024, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3084833025932312, |
|
"rewards/margins": 0.10999338328838348, |
|
"rewards/rejected": -0.4184766709804535, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6073001887979862, |
|
"grad_norm": 46.66254806518555, |
|
"learning_rate": 1.633009096927062e-05, |
|
"logits/chosen": -1.5157467126846313, |
|
"logits/rejected": -1.6129589080810547, |
|
"logps/chosen": -3.3808016777038574, |
|
"logps/rejected": -4.686802864074707, |
|
"loss": 18.8156, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.26101940870285034, |
|
"rewards/margins": 0.10853584110736847, |
|
"rewards/rejected": -0.3695552349090576, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6104468219005664, |
|
"grad_norm": 76.67229461669922, |
|
"learning_rate": 1.6114355167795407e-05, |
|
"logits/chosen": -1.507666826248169, |
|
"logits/rejected": -1.642401099205017, |
|
"logps/chosen": -4.4493513107299805, |
|
"logps/rejected": -5.8435235023498535, |
|
"loss": 20.6314, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.32613635063171387, |
|
"rewards/margins": 0.10264672338962555, |
|
"rewards/rejected": -0.42878302931785583, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6135934550031467, |
|
"grad_norm": 97.02481842041016, |
|
"learning_rate": 1.5899088208216215e-05, |
|
"logits/chosen": -1.501697301864624, |
|
"logits/rejected": -1.594618558883667, |
|
"logps/chosen": -4.284520149230957, |
|
"logps/rejected": -4.852963447570801, |
|
"loss": 26.4688, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.33568352460861206, |
|
"rewards/margins": 0.03864779695868492, |
|
"rewards/rejected": -0.37433135509490967, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6167400881057269, |
|
"grad_norm": 176.32850646972656, |
|
"learning_rate": 1.568431606464388e-05, |
|
"logits/chosen": -1.595866084098816, |
|
"logits/rejected": -1.6668930053710938, |
|
"logps/chosen": -4.345438480377197, |
|
"logps/rejected": -5.242307662963867, |
|
"loss": 21.0145, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3100406527519226, |
|
"rewards/margins": 0.0767713412642479, |
|
"rewards/rejected": -0.3868120312690735, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6198867212083071, |
|
"grad_norm": 76.86431884765625, |
|
"learning_rate": 1.547006465148471e-05, |
|
"logits/chosen": -1.5940501689910889, |
|
"logits/rejected": -1.7789547443389893, |
|
"logps/chosen": -4.4857177734375, |
|
"logps/rejected": -5.875302314758301, |
|
"loss": 21.8847, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3244941830635071, |
|
"rewards/margins": 0.08251677453517914, |
|
"rewards/rejected": -0.4070109724998474, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.6230333543108874, |
|
"grad_norm": 49.81745147705078, |
|
"learning_rate": 1.5256359820313718e-05, |
|
"logits/chosen": -1.550085425376892, |
|
"logits/rejected": -1.5959933996200562, |
|
"logps/chosen": -3.699030637741089, |
|
"logps/rejected": -4.6470842361450195, |
|
"loss": 20.7306, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2788650095462799, |
|
"rewards/margins": 0.0799705758690834, |
|
"rewards/rejected": -0.3588356077671051, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6261799874134676, |
|
"grad_norm": 81.01653289794922, |
|
"learning_rate": 1.5043227356755292e-05, |
|
"logits/chosen": -1.58163321018219, |
|
"logits/rejected": -1.663260817527771, |
|
"logps/chosen": -4.869448661804199, |
|
"logps/rejected": -5.365525245666504, |
|
"loss": 24.1646, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.34130221605300903, |
|
"rewards/margins": 0.04551910609006882, |
|
"rewards/rejected": -0.38682132959365845, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.6293266205160478, |
|
"grad_norm": 101.5945053100586, |
|
"learning_rate": 1.4830692977371985e-05, |
|
"logits/chosen": -1.747009038925171, |
|
"logits/rejected": -1.7761609554290771, |
|
"logps/chosen": -4.585317134857178, |
|
"logps/rejected": -5.033480644226074, |
|
"loss": 23.2309, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3538682162761688, |
|
"rewards/margins": 0.037090349942445755, |
|
"rewards/rejected": -0.39095860719680786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.632473253618628, |
|
"grad_norm": 55.57672882080078, |
|
"learning_rate": 1.4618782326561483e-05, |
|
"logits/chosen": -1.7331736087799072, |
|
"logits/rejected": -1.771627426147461, |
|
"logps/chosen": -3.9518864154815674, |
|
"logps/rejected": -4.847538948059082, |
|
"loss": 20.4833, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2925838530063629, |
|
"rewards/margins": 0.0719093531370163, |
|
"rewards/rejected": -0.3644932210445404, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.6356198867212083, |
|
"grad_norm": 75.53394317626953, |
|
"learning_rate": 1.4407520973462408e-05, |
|
"logits/chosen": -1.7358888387680054, |
|
"logits/rejected": -1.7642987966537476, |
|
"logps/chosen": -4.450674057006836, |
|
"logps/rejected": -5.2704572677612305, |
|
"loss": 22.8124, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3556494116783142, |
|
"rewards/margins": 0.04838743433356285, |
|
"rewards/rejected": -0.40403684973716736, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6387665198237885, |
|
"grad_norm": 67.8470230102539, |
|
"learning_rate": 1.4196934408869118e-05, |
|
"logits/chosen": -1.8153152465820312, |
|
"logits/rejected": -1.8065166473388672, |
|
"logps/chosen": -5.316075325012207, |
|
"logps/rejected": -6.770912170410156, |
|
"loss": 21.5925, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3945319950580597, |
|
"rewards/margins": 0.06610045582056046, |
|
"rewards/rejected": -0.46063241362571716, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6419131529263687, |
|
"grad_norm": 104.53321075439453, |
|
"learning_rate": 1.3987048042155977e-05, |
|
"logits/chosen": -1.6470744609832764, |
|
"logits/rejected": -1.6989984512329102, |
|
"logps/chosen": -4.787189960479736, |
|
"logps/rejected": -5.5443525314331055, |
|
"loss": 22.5867, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3872155249118805, |
|
"rewards/margins": 0.05858270451426506, |
|
"rewards/rejected": -0.44579824805259705, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.645059786028949, |
|
"grad_norm": 122.49982452392578, |
|
"learning_rate": 1.377788719821149e-05, |
|
"logits/chosen": -1.6421356201171875, |
|
"logits/rejected": -1.702820062637329, |
|
"logps/chosen": -4.435242652893066, |
|
"logps/rejected": -4.579672336578369, |
|
"loss": 25.1424, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3478389382362366, |
|
"rewards/margins": 0.0215731430798769, |
|
"rewards/rejected": -0.3694121241569519, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.6482064191315292, |
|
"grad_norm": 145.1405487060547, |
|
"learning_rate": 1.3569477114382568e-05, |
|
"logits/chosen": -1.6365470886230469, |
|
"logits/rejected": -1.6954962015151978, |
|
"logps/chosen": -4.985340595245361, |
|
"logps/rejected": -5.898791313171387, |
|
"loss": 21.7627, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.385195255279541, |
|
"rewards/margins": 0.05228766053915024, |
|
"rewards/rejected": -0.43748289346694946, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6513530522341096, |
|
"grad_norm": 82.04701232910156, |
|
"learning_rate": 1.3361842937429436e-05, |
|
"logits/chosen": -1.6654088497161865, |
|
"logits/rejected": -1.732187032699585, |
|
"logps/chosen": -4.262317180633545, |
|
"logps/rejected": -5.410677909851074, |
|
"loss": 20.2359, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3416784703731537, |
|
"rewards/margins": 0.08638517558574677, |
|
"rewards/rejected": -0.42806363105773926, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.6544996853366898, |
|
"grad_norm": 95.95136260986328, |
|
"learning_rate": 1.3155009720491368e-05, |
|
"logits/chosen": -1.5801721811294556, |
|
"logits/rejected": -1.5603923797607422, |
|
"logps/chosen": -5.278650760650635, |
|
"logps/rejected": -6.190367698669434, |
|
"loss": 22.4881, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3998781740665436, |
|
"rewards/margins": 0.06602592766284943, |
|
"rewards/rejected": -0.4659040868282318, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.65764631843927, |
|
"grad_norm": 60.0530891418457, |
|
"learning_rate": 1.2949002420063828e-05, |
|
"logits/chosen": -1.6326820850372314, |
|
"logits/rejected": -1.720810890197754, |
|
"logps/chosen": -4.082489967346191, |
|
"logps/rejected": -5.006215572357178, |
|
"loss": 21.0105, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.32317107915878296, |
|
"rewards/margins": 0.07444654405117035, |
|
"rewards/rejected": -0.3976176679134369, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.6607929515418502, |
|
"grad_norm": 221.81906127929688, |
|
"learning_rate": 1.2743845892987183e-05, |
|
"logits/chosen": -1.6526765823364258, |
|
"logits/rejected": -1.697488784790039, |
|
"logps/chosen": -4.53380823135376, |
|
"logps/rejected": -5.771850109100342, |
|
"loss": 23.2634, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.34263211488723755, |
|
"rewards/margins": 0.07287438213825226, |
|
"rewards/rejected": -0.4155064523220062, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6639395846444305, |
|
"grad_norm": 137.2283172607422, |
|
"learning_rate": 1.2539564893447489e-05, |
|
"logits/chosen": -1.631956696510315, |
|
"logits/rejected": -1.654306173324585, |
|
"logps/chosen": -4.1559600830078125, |
|
"logps/rejected": -5.033182621002197, |
|
"loss": 22.6183, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.32424792647361755, |
|
"rewards/margins": 0.06618380546569824, |
|
"rewards/rejected": -0.3904317319393158, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.6670862177470107, |
|
"grad_norm": 72.95520782470703, |
|
"learning_rate": 1.2336184069989663e-05, |
|
"logits/chosen": -1.670440435409546, |
|
"logits/rejected": -1.6872297525405884, |
|
"logps/chosen": -3.9552032947540283, |
|
"logps/rejected": -5.303035259246826, |
|
"loss": 19.5681, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.31223589181900024, |
|
"rewards/margins": 0.09164074063301086, |
|
"rewards/rejected": -0.4038766026496887, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6702328508495909, |
|
"grad_norm": 90.91898345947266, |
|
"learning_rate": 1.2133727962543356e-05, |
|
"logits/chosen": -1.6696465015411377, |
|
"logits/rejected": -1.6963016986846924, |
|
"logps/chosen": -4.434679985046387, |
|
"logps/rejected": -5.158357620239258, |
|
"loss": 21.8675, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3489730954170227, |
|
"rewards/margins": 0.05557180196046829, |
|
"rewards/rejected": -0.4045449197292328, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.6733794839521712, |
|
"grad_norm": 185.79261779785156, |
|
"learning_rate": 1.193222099946202e-05, |
|
"logits/chosen": -1.6571991443634033, |
|
"logits/rejected": -1.7073132991790771, |
|
"logps/chosen": -4.607517242431641, |
|
"logps/rejected": -5.376668930053711, |
|
"loss": 22.3462, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.35802438855171204, |
|
"rewards/margins": 0.0643647164106369, |
|
"rewards/rejected": -0.42238911986351013, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6765261170547514, |
|
"grad_norm": 71.50703430175781, |
|
"learning_rate": 1.1731687494575319e-05, |
|
"logits/chosen": -1.585889458656311, |
|
"logits/rejected": -1.6507800817489624, |
|
"logps/chosen": -4.845611572265625, |
|
"logps/rejected": -6.422255516052246, |
|
"loss": 18.5681, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.37992939352989197, |
|
"rewards/margins": 0.10727685689926147, |
|
"rewards/rejected": -0.48720628023147583, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6796727501573316, |
|
"grad_norm": 210.3772430419922, |
|
"learning_rate": 1.153215164425547e-05, |
|
"logits/chosen": -1.5637327432632446, |
|
"logits/rejected": -1.628791093826294, |
|
"logps/chosen": -4.643498420715332, |
|
"logps/rejected": -5.90508508682251, |
|
"loss": 22.429, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3530879020690918, |
|
"rewards/margins": 0.07370196282863617, |
|
"rewards/rejected": -0.42678990960121155, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6828193832599119, |
|
"grad_norm": 324.6168212890625, |
|
"learning_rate": 1.133363752449768e-05, |
|
"logits/chosen": -1.6127498149871826, |
|
"logits/rejected": -1.5895841121673584, |
|
"logps/chosen": -3.8858344554901123, |
|
"logps/rejected": -5.141265392303467, |
|
"loss": 18.9867, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.31075209379196167, |
|
"rewards/margins": 0.10046511888504028, |
|
"rewards/rejected": -0.41121721267700195, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.6859660163624921, |
|
"grad_norm": 269.12744140625, |
|
"learning_rate": 1.1136169088015177e-05, |
|
"logits/chosen": -1.5152666568756104, |
|
"logits/rejected": -1.5772387981414795, |
|
"logps/chosen": -4.37540864944458, |
|
"logps/rejected": -5.073463439941406, |
|
"loss": 22.4614, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3488185405731201, |
|
"rewards/margins": 0.05801800638437271, |
|
"rewards/rejected": -0.40683650970458984, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6891126494650723, |
|
"grad_norm": 407.53985595703125, |
|
"learning_rate": 1.0939770161349015e-05, |
|
"logits/chosen": -1.604278802871704, |
|
"logits/rejected": -1.6394538879394531, |
|
"logps/chosen": -4.725668907165527, |
|
"logps/rejected": -6.037966728210449, |
|
"loss": 23.0495, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3864028751850128, |
|
"rewards/margins": 0.09246636927127838, |
|
"rewards/rejected": -0.4788691997528076, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.6922592825676526, |
|
"grad_norm": 65.52562713623047, |
|
"learning_rate": 1.0744464441993205e-05, |
|
"logits/chosen": -1.4906436204910278, |
|
"logits/rejected": -1.570569634437561, |
|
"logps/chosen": -4.404895782470703, |
|
"logps/rejected": -5.454612731933594, |
|
"loss": 21.9146, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3443445563316345, |
|
"rewards/margins": 0.07481996715068817, |
|
"rewards/rejected": -0.4191645085811615, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6954059156702328, |
|
"grad_norm": 60.899654388427734, |
|
"learning_rate": 1.0550275495535382e-05, |
|
"logits/chosen": -1.5062484741210938, |
|
"logits/rejected": -1.5998207330703735, |
|
"logps/chosen": -5.046140193939209, |
|
"logps/rejected": -6.212726593017578, |
|
"loss": 22.0906, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3926524817943573, |
|
"rewards/margins": 0.08822645246982574, |
|
"rewards/rejected": -0.48087891936302185, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.6985525487728131, |
|
"grad_norm": 85.36582946777344, |
|
"learning_rate": 1.0357226752813343e-05, |
|
"logits/chosen": -1.48141348361969, |
|
"logits/rejected": -1.532138705253601, |
|
"logps/chosen": -4.955922603607178, |
|
"logps/rejected": -6.1522979736328125, |
|
"loss": 19.2663, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3955458402633667, |
|
"rewards/margins": 0.09683366119861603, |
|
"rewards/rejected": -0.4923795163631439, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7016991818753934, |
|
"grad_norm": 92.5035171508789, |
|
"learning_rate": 1.0165341507087922e-05, |
|
"logits/chosen": -1.4898306131362915, |
|
"logits/rejected": -1.589817762374878, |
|
"logps/chosen": -4.877270221710205, |
|
"logps/rejected": -6.326567649841309, |
|
"loss": 21.0751, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3771550953388214, |
|
"rewards/margins": 0.10272278636693954, |
|
"rewards/rejected": -0.47987785935401917, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.7048458149779736, |
|
"grad_norm": 100.18026733398438, |
|
"learning_rate": 9.974642911232413e-06, |
|
"logits/chosen": -1.5176981687545776, |
|
"logits/rejected": -1.5406978130340576, |
|
"logps/chosen": -5.319207191467285, |
|
"logps/rejected": -6.242737770080566, |
|
"loss": 20.9524, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4210137724876404, |
|
"rewards/margins": 0.07255946844816208, |
|
"rewards/rejected": -0.49357327818870544, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7079924480805538, |
|
"grad_norm": 176.3753662109375, |
|
"learning_rate": 9.785153974938912e-06, |
|
"logits/chosen": -1.5830824375152588, |
|
"logits/rejected": -1.6101982593536377, |
|
"logps/chosen": -5.879128456115723, |
|
"logps/rejected": -6.807085990905762, |
|
"loss": 22.111, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44119253754615784, |
|
"rewards/margins": 0.07570262253284454, |
|
"rewards/rejected": -0.5168951749801636, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.7111390811831341, |
|
"grad_norm": 67.40308380126953, |
|
"learning_rate": 9.596897561942026e-06, |
|
"logits/chosen": -1.463176965713501, |
|
"logits/rejected": -1.4804832935333252, |
|
"logps/chosen": -4.481048107147217, |
|
"logps/rejected": -5.287797451019287, |
|
"loss": 22.1994, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3575854003429413, |
|
"rewards/margins": 0.06364957243204117, |
|
"rewards/rejected": -0.42123493552207947, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 93.39257049560547, |
|
"learning_rate": 9.409896387260082e-06, |
|
"logits/chosen": -1.4179964065551758, |
|
"logits/rejected": -1.4655730724334717, |
|
"logps/chosen": -4.708760738372803, |
|
"logps/rejected": -6.217686653137207, |
|
"loss": 21.4161, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.37350720167160034, |
|
"rewards/margins": 0.10105752944946289, |
|
"rewards/rejected": -0.47456473112106323, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.7174323473882945, |
|
"grad_norm": 97.41583251953125, |
|
"learning_rate": 9.224173014454372e-06, |
|
"logits/chosen": -1.4397246837615967, |
|
"logits/rejected": -1.4766523838043213, |
|
"logps/chosen": -4.817109107971191, |
|
"logps/rejected": -6.214907169342041, |
|
"loss": 22.7104, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.384985089302063, |
|
"rewards/margins": 0.0952010303735733, |
|
"rewards/rejected": -0.4801861345767975, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7205789804908748, |
|
"grad_norm": 103.4198989868164, |
|
"learning_rate": 9.039749852906606e-06, |
|
"logits/chosen": -1.368666648864746, |
|
"logits/rejected": -1.4239342212677002, |
|
"logps/chosen": -4.382673740386963, |
|
"logps/rejected": -5.262811183929443, |
|
"loss": 20.8727, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.35242724418640137, |
|
"rewards/margins": 0.075123131275177, |
|
"rewards/rejected": -0.42755040526390076, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.723725613593455, |
|
"grad_norm": 131.38589477539062, |
|
"learning_rate": 8.856649155115002e-06, |
|
"logits/chosen": -1.409711241722107, |
|
"logits/rejected": -1.455235481262207, |
|
"logps/chosen": -4.550191402435303, |
|
"logps/rejected": -5.52540922164917, |
|
"loss": 23.0103, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3620757460594177, |
|
"rewards/margins": 0.06903600692749023, |
|
"rewards/rejected": -0.43111175298690796, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7268722466960352, |
|
"grad_norm": 60.0385627746582, |
|
"learning_rate": 8.674893014009311e-06, |
|
"logits/chosen": -1.3705095052719116, |
|
"logits/rejected": -1.4764083623886108, |
|
"logps/chosen": -4.423483848571777, |
|
"logps/rejected": -5.486600875854492, |
|
"loss": 21.3505, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3566300570964813, |
|
"rewards/margins": 0.07945708185434341, |
|
"rewards/rejected": -0.4360871911048889, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.7300188797986155, |
|
"grad_norm": 80.497802734375, |
|
"learning_rate": 8.494503360285084e-06, |
|
"logits/chosen": -1.406087875366211, |
|
"logits/rejected": -1.5597848892211914, |
|
"logps/chosen": -4.28043270111084, |
|
"logps/rejected": -5.639766216278076, |
|
"loss": 21.9094, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3276395797729492, |
|
"rewards/margins": 0.07139433920383453, |
|
"rewards/rejected": -0.39903393387794495, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7331655129011957, |
|
"grad_norm": 106.78560638427734, |
|
"learning_rate": 8.315501959757506e-06, |
|
"logits/chosen": -1.4479920864105225, |
|
"logits/rejected": -1.530386209487915, |
|
"logps/chosen": -5.356269836425781, |
|
"logps/rejected": -6.295357704162598, |
|
"loss": 20.2622, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.39192715287208557, |
|
"rewards/margins": 0.07843243330717087, |
|
"rewards/rejected": -0.47035956382751465, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.7363121460037759, |
|
"grad_norm": 70.2252426147461, |
|
"learning_rate": 8.137910410735119e-06, |
|
"logits/chosen": -1.3913201093673706, |
|
"logits/rejected": -1.5211797952651978, |
|
"logps/chosen": -4.186515808105469, |
|
"logps/rejected": -5.630705833435059, |
|
"loss": 19.5955, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3086017966270447, |
|
"rewards/margins": 0.1026659831404686, |
|
"rewards/rejected": -0.4112677574157715, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.7394587791063562, |
|
"grad_norm": 192.9811553955078, |
|
"learning_rate": 7.961750141413811e-06, |
|
"logits/chosen": -1.4113714694976807, |
|
"logits/rejected": -1.4863709211349487, |
|
"logps/chosen": -4.043957710266113, |
|
"logps/rejected": -4.903926849365234, |
|
"loss": 21.1766, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.30225270986557007, |
|
"rewards/margins": 0.0713002160191536, |
|
"rewards/rejected": -0.3735528886318207, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.7426054122089364, |
|
"grad_norm": 120.66477966308594, |
|
"learning_rate": 7.787042407291236e-06, |
|
"logits/chosen": -1.4459470510482788, |
|
"logits/rejected": -1.4732497930526733, |
|
"logps/chosen": -4.194180488586426, |
|
"logps/rejected": -5.103634834289551, |
|
"loss": 21.7414, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.33040323853492737, |
|
"rewards/margins": 0.07123108208179474, |
|
"rewards/rejected": -0.4016343653202057, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7457520453115167, |
|
"grad_norm": 76.2223892211914, |
|
"learning_rate": 7.613808288602185e-06, |
|
"logits/chosen": -1.3101516962051392, |
|
"logits/rejected": -1.410070776939392, |
|
"logps/chosen": -3.897928237915039, |
|
"logps/rejected": -4.853459358215332, |
|
"loss": 20.4936, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.30166110396385193, |
|
"rewards/margins": 0.07222743332386017, |
|
"rewards/rejected": -0.3738885223865509, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.748898678414097, |
|
"grad_norm": 71.2408676147461, |
|
"learning_rate": 7.442068687774983e-06, |
|
"logits/chosen": -1.3900350332260132, |
|
"logits/rejected": -1.4306429624557495, |
|
"logps/chosen": -4.03500509262085, |
|
"logps/rejected": -4.971550941467285, |
|
"loss": 20.8514, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.31287750601768494, |
|
"rewards/margins": 0.07024455070495605, |
|
"rewards/rejected": -0.383122056722641, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7520453115166772, |
|
"grad_norm": 174.92535400390625, |
|
"learning_rate": 7.271844326909465e-06, |
|
"logits/chosen": -1.3968006372451782, |
|
"logits/rejected": -1.3997862339019775, |
|
"logps/chosen": -4.94242000579834, |
|
"logps/rejected": -5.543642520904541, |
|
"loss": 23.6965, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.36712104082107544, |
|
"rewards/margins": 0.041298940777778625, |
|
"rewards/rejected": -0.40841999650001526, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.7551919446192574, |
|
"grad_norm": 83.12405395507812, |
|
"learning_rate": 7.1031557452765934e-06, |
|
"logits/chosen": -1.4155142307281494, |
|
"logits/rejected": -1.4555690288543701, |
|
"logps/chosen": -3.987143039703369, |
|
"logps/rejected": -5.240988731384277, |
|
"loss": 20.4557, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.312110960483551, |
|
"rewards/margins": 0.08850479125976562, |
|
"rewards/rejected": -0.40061575174331665, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7583385777218377, |
|
"grad_norm": 82.25894165039062, |
|
"learning_rate": 6.936023296840211e-06, |
|
"logits/chosen": -1.3227570056915283, |
|
"logits/rejected": -1.4542601108551025, |
|
"logps/chosen": -4.520358562469482, |
|
"logps/rejected": -5.628200531005859, |
|
"loss": 21.0717, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3516823947429657, |
|
"rewards/margins": 0.07068557292222977, |
|
"rewards/rejected": -0.42236796021461487, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.7614852108244179, |
|
"grad_norm": 63.93009567260742, |
|
"learning_rate": 6.770467147801152e-06, |
|
"logits/chosen": -1.3352692127227783, |
|
"logits/rejected": -1.4765124320983887, |
|
"logps/chosen": -3.903353452682495, |
|
"logps/rejected": -5.777923107147217, |
|
"loss": 18.1176, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30418699979782104, |
|
"rewards/margins": 0.1330038160085678, |
|
"rewards/rejected": -0.43719083070755005, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7646318439269981, |
|
"grad_norm": 123.6090087890625, |
|
"learning_rate": 6.606507274163949e-06, |
|
"logits/chosen": -1.4196144342422485, |
|
"logits/rejected": -1.5160802602767944, |
|
"logps/chosen": -4.3763604164123535, |
|
"logps/rejected": -5.507956504821777, |
|
"loss": 21.3593, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3455001711845398, |
|
"rewards/margins": 0.08908528089523315, |
|
"rewards/rejected": -0.43458548188209534, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.7677784770295784, |
|
"grad_norm": 79.51527404785156, |
|
"learning_rate": 6.444163459326569e-06, |
|
"logits/chosen": -1.3841816186904907, |
|
"logits/rejected": -1.44673752784729, |
|
"logps/chosen": -4.642246723175049, |
|
"logps/rejected": -5.952216625213623, |
|
"loss": 20.2826, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.37046322226524353, |
|
"rewards/margins": 0.10095451772212982, |
|
"rewards/rejected": -0.47141775488853455, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7709251101321586, |
|
"grad_norm": 115.33991241455078, |
|
"learning_rate": 6.283455291693303e-06, |
|
"logits/chosen": -1.2804498672485352, |
|
"logits/rejected": -1.336126446723938, |
|
"logps/chosen": -4.530810356140137, |
|
"logps/rejected": -5.714901924133301, |
|
"loss": 23.5811, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3543975353240967, |
|
"rewards/margins": 0.07916755974292755, |
|
"rewards/rejected": -0.43356508016586304, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7740717432347388, |
|
"grad_norm": 102.68350219726562, |
|
"learning_rate": 6.124402162311274e-06, |
|
"logits/chosen": -1.3455007076263428, |
|
"logits/rejected": -1.3819594383239746, |
|
"logps/chosen": -4.560150146484375, |
|
"logps/rejected": -5.909863471984863, |
|
"loss": 21.4806, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.36466413736343384, |
|
"rewards/margins": 0.07453545182943344, |
|
"rewards/rejected": -0.4391995966434479, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.777218376337319, |
|
"grad_norm": 78.0007553100586, |
|
"learning_rate": 5.9670232625306955e-06, |
|
"logits/chosen": -1.3267484903335571, |
|
"logits/rejected": -1.3938989639282227, |
|
"logps/chosen": -4.1908979415893555, |
|
"logps/rejected": -4.819875240325928, |
|
"loss": 24.9323, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3211560845375061, |
|
"rewards/margins": 0.054157011210918427, |
|
"rewards/rejected": -0.3753131031990051, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.7803650094398993, |
|
"grad_norm": 910.2023315429688, |
|
"learning_rate": 5.81133758168922e-06, |
|
"logits/chosen": -1.4007585048675537, |
|
"logits/rejected": -1.4542076587677002, |
|
"logps/chosen": -5.091724872589111, |
|
"logps/rejected": -6.444447994232178, |
|
"loss": 20.9318, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.36028310656547546, |
|
"rewards/margins": 0.09358057379722595, |
|
"rewards/rejected": -0.4538637101650238, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7835116425424795, |
|
"grad_norm": 68.25672912597656, |
|
"learning_rate": 5.6573639048207315e-06, |
|
"logits/chosen": -1.3604391813278198, |
|
"logits/rejected": -1.3182973861694336, |
|
"logps/chosen": -4.621526718139648, |
|
"logps/rejected": -5.245944023132324, |
|
"loss": 21.9955, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.32450687885284424, |
|
"rewards/margins": 0.07323630154132843, |
|
"rewards/rejected": -0.3977431654930115, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.7866582756450597, |
|
"grad_norm": 106.51322937011719, |
|
"learning_rate": 5.5051208103887025e-06, |
|
"logits/chosen": -1.3608815670013428, |
|
"logits/rejected": -1.4448637962341309, |
|
"logps/chosen": -4.045924663543701, |
|
"logps/rejected": -5.57630729675293, |
|
"loss": 20.889, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3083491623401642, |
|
"rewards/margins": 0.09919731318950653, |
|
"rewards/rejected": -0.40754643082618713, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.78980490874764, |
|
"grad_norm": 70.59749603271484, |
|
"learning_rate": 5.354626668044535e-06, |
|
"logits/chosen": -1.3460859060287476, |
|
"logits/rejected": -1.412706732749939, |
|
"logps/chosen": -3.734891891479492, |
|
"logps/rejected": -4.818475246429443, |
|
"loss": 21.0468, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2917167842388153, |
|
"rewards/margins": 0.07943135499954224, |
|
"rewards/rejected": -0.37114813923835754, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.7929515418502202, |
|
"grad_norm": 83.2120361328125, |
|
"learning_rate": 5.205899636411078e-06, |
|
"logits/chosen": -1.3329652547836304, |
|
"logits/rejected": -1.3952248096466064, |
|
"logps/chosen": -4.460053443908691, |
|
"logps/rejected": -4.993377685546875, |
|
"loss": 25.4182, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.34508955478668213, |
|
"rewards/margins": 0.03861779719591141, |
|
"rewards/rejected": -0.38370734453201294, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7960981749528006, |
|
"grad_norm": 74.94086456298828, |
|
"learning_rate": 5.058957660891613e-06, |
|
"logits/chosen": -1.353829264640808, |
|
"logits/rejected": -1.36537766456604, |
|
"logps/chosen": -3.8537967205047607, |
|
"logps/rejected": -4.86336612701416, |
|
"loss": 21.0046, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.29704272747039795, |
|
"rewards/margins": 0.07927833497524261, |
|
"rewards/rejected": -0.376321017742157, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.7992448080553808, |
|
"grad_norm": 68.53548431396484, |
|
"learning_rate": 4.913818471504552e-06, |
|
"logits/chosen": -1.3891483545303345, |
|
"logits/rejected": -1.4956327676773071, |
|
"logps/chosen": -3.83349609375, |
|
"logps/rejected": -5.111277103424072, |
|
"loss": 20.258, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.299319326877594, |
|
"rewards/margins": 0.09995778650045395, |
|
"rewards/rejected": -0.3992771506309509, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.802391441157961, |
|
"grad_norm": 161.29922485351562, |
|
"learning_rate": 4.770499580744125e-06, |
|
"logits/chosen": -1.3398183584213257, |
|
"logits/rejected": -1.3453642129898071, |
|
"logps/chosen": -3.9315247535705566, |
|
"logps/rejected": -4.841611862182617, |
|
"loss": 22.4824, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.30572158098220825, |
|
"rewards/margins": 0.06097061559557915, |
|
"rewards/rejected": -0.3666921854019165, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8055380742605412, |
|
"grad_norm": 68.45879364013672, |
|
"learning_rate": 4.629018281467357e-06, |
|
"logits/chosen": -1.297154188156128, |
|
"logits/rejected": -1.338921070098877, |
|
"logps/chosen": -3.7794177532196045, |
|
"logps/rejected": -4.509110927581787, |
|
"loss": 21.658, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2888321876525879, |
|
"rewards/margins": 0.05916588753461838, |
|
"rewards/rejected": -0.3479980528354645, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8086847073631215, |
|
"grad_norm": 74.77375793457031, |
|
"learning_rate": 4.489391644807462e-06, |
|
"logits/chosen": -1.4385647773742676, |
|
"logits/rejected": -1.5144340991973877, |
|
"logps/chosen": -3.69215726852417, |
|
"logps/rejected": -4.667183876037598, |
|
"loss": 21.0338, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2842097282409668, |
|
"rewards/margins": 0.07265909761190414, |
|
"rewards/rejected": -0.35686883330345154, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.8118313404657017, |
|
"grad_norm": 78.63387298583984, |
|
"learning_rate": 4.351636518114091e-06, |
|
"logits/chosen": -1.3093000650405884, |
|
"logits/rejected": -1.3893928527832031, |
|
"logps/chosen": -3.599902629852295, |
|
"logps/rejected": -4.570587635040283, |
|
"loss": 22.1635, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2749050259590149, |
|
"rewards/margins": 0.08025064319372177, |
|
"rewards/rejected": -0.3551556468009949, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8149779735682819, |
|
"grad_norm": 78.53893280029297, |
|
"learning_rate": 4.215769522920487e-06, |
|
"logits/chosen": -1.2443653345108032, |
|
"logits/rejected": -1.3605782985687256, |
|
"logps/chosen": -3.2713770866394043, |
|
"logps/rejected": -4.569630146026611, |
|
"loss": 20.9369, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24853453040122986, |
|
"rewards/margins": 0.10017738491296768, |
|
"rewards/rejected": -0.34871190786361694, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.8181246066708622, |
|
"grad_norm": 82.4554672241211, |
|
"learning_rate": 4.0818070529379715e-06, |
|
"logits/chosen": -1.383690357208252, |
|
"logits/rejected": -1.4704560041427612, |
|
"logps/chosen": -4.524319171905518, |
|
"logps/rejected": -5.7077460289001465, |
|
"loss": 21.9118, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.316571444272995, |
|
"rewards/margins": 0.0641409307718277, |
|
"rewards/rejected": -0.3807123601436615, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8212712397734424, |
|
"grad_norm": 71.1880111694336, |
|
"learning_rate": 3.949765272077843e-06, |
|
"logits/chosen": -1.3107343912124634, |
|
"logits/rejected": -1.3561115264892578, |
|
"logps/chosen": -3.846195936203003, |
|
"logps/rejected": -4.79428768157959, |
|
"loss": 21.0994, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.27155357599258423, |
|
"rewards/margins": 0.07163957506418228, |
|
"rewards/rejected": -0.3431931734085083, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.8244178728760226, |
|
"grad_norm": 50.073204040527344, |
|
"learning_rate": 3.819660112501053e-06, |
|
"logits/chosen": -1.2764497995376587, |
|
"logits/rejected": -1.3517284393310547, |
|
"logps/chosen": -3.5745315551757812, |
|
"logps/rejected": -4.921723365783691, |
|
"loss": 19.6469, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.27279648184776306, |
|
"rewards/margins": 0.1019618958234787, |
|
"rewards/rejected": -0.37475839257240295, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8275645059786029, |
|
"grad_norm": 83.62207794189453, |
|
"learning_rate": 3.6915072726958514e-06, |
|
"logits/chosen": -1.2466180324554443, |
|
"logits/rejected": -1.2861813306808472, |
|
"logps/chosen": -3.430490016937256, |
|
"logps/rejected": -4.824821949005127, |
|
"loss": 20.5161, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2643739581108093, |
|
"rewards/margins": 0.1028999462723732, |
|
"rewards/rejected": -0.3672739565372467, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.8307111390811831, |
|
"grad_norm": 76.6629638671875, |
|
"learning_rate": 3.5653222155835686e-06, |
|
"logits/chosen": -1.2766977548599243, |
|
"logits/rejected": -1.3114259243011475, |
|
"logps/chosen": -4.222517967224121, |
|
"logps/rejected": -5.029845714569092, |
|
"loss": 22.1218, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3145274221897125, |
|
"rewards/margins": 0.06165830045938492, |
|
"rewards/rejected": -0.37618574500083923, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8338577721837633, |
|
"grad_norm": 159.4115447998047, |
|
"learning_rate": 3.4411201666529003e-06, |
|
"logits/chosen": -1.3758924007415771, |
|
"logits/rejected": -1.4244683980941772, |
|
"logps/chosen": -4.457423210144043, |
|
"logps/rejected": -5.342848300933838, |
|
"loss": 23.3834, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.31679028272628784, |
|
"rewards/margins": 0.06267707049846649, |
|
"rewards/rejected": -0.3794673979282379, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.8370044052863436, |
|
"grad_norm": 56.71870803833008, |
|
"learning_rate": 3.3189161121227564e-06, |
|
"logits/chosen": -1.3166803121566772, |
|
"logits/rejected": -1.385522723197937, |
|
"logps/chosen": -3.8323776721954346, |
|
"logps/rejected": -4.732277870178223, |
|
"loss": 23.3384, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2978932559490204, |
|
"rewards/margins": 0.0644349679350853, |
|
"rewards/rejected": -0.3623282313346863, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.8401510383889238, |
|
"grad_norm": 66.62996673583984, |
|
"learning_rate": 3.198724797134074e-06, |
|
"logits/chosen": -1.2822662591934204, |
|
"logits/rejected": -1.4124181270599365, |
|
"logps/chosen": -3.9724369049072266, |
|
"logps/rejected": -5.0466437339782715, |
|
"loss": 22.4903, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2994682192802429, |
|
"rewards/margins": 0.0788046196103096, |
|
"rewards/rejected": -0.3782728910446167, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.8432976714915041, |
|
"grad_norm": 70.8177261352539, |
|
"learning_rate": 3.080560723970616e-06, |
|
"logits/chosen": -1.2813329696655273, |
|
"logits/rejected": -1.3586981296539307, |
|
"logps/chosen": -3.6214439868927, |
|
"logps/rejected": -4.637081623077393, |
|
"loss": 20.5515, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.28146275877952576, |
|
"rewards/margins": 0.07804764062166214, |
|
"rewards/rejected": -0.3595103919506073, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8464443045940844, |
|
"grad_norm": 64.40753173828125, |
|
"learning_rate": 2.96443815030917e-06, |
|
"logits/chosen": -1.3396605253219604, |
|
"logits/rejected": -1.4255945682525635, |
|
"logps/chosen": -3.604154586791992, |
|
"logps/rejected": -4.95128059387207, |
|
"loss": 20.7037, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2776135206222534, |
|
"rewards/margins": 0.09353432059288025, |
|
"rewards/rejected": -0.37114784121513367, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.8495909376966646, |
|
"grad_norm": 93.99842071533203, |
|
"learning_rate": 2.850371087499195e-06, |
|
"logits/chosen": -1.381260633468628, |
|
"logits/rejected": -1.4631612300872803, |
|
"logps/chosen": -4.883763790130615, |
|
"logps/rejected": -6.07845401763916, |
|
"loss": 21.0858, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3591059148311615, |
|
"rewards/margins": 0.09570769965648651, |
|
"rewards/rejected": -0.4548136591911316, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8527375707992448, |
|
"grad_norm": 62.075279235839844, |
|
"learning_rate": 2.7383732988722057e-06, |
|
"logits/chosen": -1.3089946508407593, |
|
"logits/rejected": -1.3634613752365112, |
|
"logps/chosen": -3.7724010944366455, |
|
"logps/rejected": -4.929832458496094, |
|
"loss": 19.0202, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.273120641708374, |
|
"rewards/margins": 0.09602681547403336, |
|
"rewards/rejected": -0.36914747953414917, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.8558842039018251, |
|
"grad_norm": 80.0210189819336, |
|
"learning_rate": 2.6284582980811136e-06, |
|
"logits/chosen": -1.4461333751678467, |
|
"logits/rejected": -1.370339035987854, |
|
"logps/chosen": -4.136780738830566, |
|
"logps/rejected": -5.008397579193115, |
|
"loss": 23.5672, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3027392327785492, |
|
"rewards/margins": 0.062295325100421906, |
|
"rewards/rejected": -0.3650345206260681, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8590308370044053, |
|
"grad_norm": 169.91099548339844, |
|
"learning_rate": 2.5206393474696422e-06, |
|
"logits/chosen": -1.2922241687774658, |
|
"logits/rejected": -1.3685882091522217, |
|
"logps/chosen": -3.8860459327697754, |
|
"logps/rejected": -4.820228099822998, |
|
"loss": 20.1345, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2878992557525635, |
|
"rewards/margins": 0.07816118001937866, |
|
"rewards/rejected": -0.36606043577194214, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.8621774701069855, |
|
"grad_norm": 291.87542724609375, |
|
"learning_rate": 2.4149294564721146e-06, |
|
"logits/chosen": -1.390933632850647, |
|
"logits/rejected": -1.477757215499878, |
|
"logps/chosen": -4.5947346687316895, |
|
"logps/rejected": -5.662859916687012, |
|
"loss": 22.1173, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.32581329345703125, |
|
"rewards/margins": 0.0882103443145752, |
|
"rewards/rejected": -0.4140236973762512, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8653241032095658, |
|
"grad_norm": 50.774810791015625, |
|
"learning_rate": 2.3113413800437145e-06, |
|
"logits/chosen": -1.3678381443023682, |
|
"logits/rejected": -1.4147788286209106, |
|
"logps/chosen": -4.411424160003662, |
|
"logps/rejected": -5.547976970672607, |
|
"loss": 20.419, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3136950135231018, |
|
"rewards/margins": 0.08119923621416092, |
|
"rewards/rejected": -0.3948942720890045, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.868470736312146, |
|
"grad_norm": 75.1661605834961, |
|
"learning_rate": 2.2098876171215e-06, |
|
"logits/chosen": -1.2949163913726807, |
|
"logits/rejected": -1.4591166973114014, |
|
"logps/chosen": -3.913958787918091, |
|
"logps/rejected": -4.945563316345215, |
|
"loss": 20.5075, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30175596475601196, |
|
"rewards/margins": 0.09277000278234482, |
|
"rewards/rejected": -0.394525945186615, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8716173694147262, |
|
"grad_norm": 116.18523406982422, |
|
"learning_rate": 2.110580409116261e-06, |
|
"logits/chosen": -1.3234283924102783, |
|
"logits/rejected": -1.3651349544525146, |
|
"logps/chosen": -4.782530307769775, |
|
"logps/rejected": -5.800885200500488, |
|
"loss": 22.8406, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3511677384376526, |
|
"rewards/margins": 0.07397367060184479, |
|
"rewards/rejected": -0.4251413345336914, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.8747640025173065, |
|
"grad_norm": 145.46861267089844, |
|
"learning_rate": 2.013431738435465e-06, |
|
"logits/chosen": -1.3332188129425049, |
|
"logits/rejected": -1.4134724140167236, |
|
"logps/chosen": -4.268718242645264, |
|
"logps/rejected": -5.433601379394531, |
|
"loss": 22.5056, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3332800269126892, |
|
"rewards/margins": 0.07072637230157852, |
|
"rewards/rejected": -0.4040064215660095, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8779106356198867, |
|
"grad_norm": 117.83720397949219, |
|
"learning_rate": 1.9184533270374928e-06, |
|
"logits/chosen": -1.3927792310714722, |
|
"logits/rejected": -1.4590123891830444, |
|
"logps/chosen": -4.519114017486572, |
|
"logps/rejected": -5.810807228088379, |
|
"loss": 21.2018, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.34003710746765137, |
|
"rewards/margins": 0.08822458237409592, |
|
"rewards/rejected": -0.4282616972923279, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.8810572687224669, |
|
"grad_norm": 128.75563049316406, |
|
"learning_rate": 1.8256566350172211e-06, |
|
"logits/chosen": -1.4642970561981201, |
|
"logits/rejected": -1.56011962890625, |
|
"logps/chosen": -5.124087810516357, |
|
"logps/rejected": -6.271437168121338, |
|
"loss": 20.9824, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.34806352853775024, |
|
"rewards/margins": 0.0969148576259613, |
|
"rewards/rejected": -0.44497838616371155, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8842039018250472, |
|
"grad_norm": 88.87577056884766, |
|
"learning_rate": 1.7350528592232962e-06, |
|
"logits/chosen": -1.3359493017196655, |
|
"logits/rejected": -1.4811887741088867, |
|
"logps/chosen": -4.525036811828613, |
|
"logps/rejected": -5.623012542724609, |
|
"loss": 22.1104, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3581879138946533, |
|
"rewards/margins": 0.07608196139335632, |
|
"rewards/rejected": -0.43426984548568726, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.8873505349276274, |
|
"grad_norm": 69.19255065917969, |
|
"learning_rate": 1.6466529319070735e-06, |
|
"logits/chosen": -1.2726246118545532, |
|
"logits/rejected": -1.39580237865448, |
|
"logps/chosen": -3.7457852363586426, |
|
"logps/rejected": -5.324977397918701, |
|
"loss": 18.2434, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2871127724647522, |
|
"rewards/margins": 0.11219409853219986, |
|
"rewards/rejected": -0.39930686354637146, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8904971680302077, |
|
"grad_norm": 73.79737854003906, |
|
"learning_rate": 1.560467519403579e-06, |
|
"logits/chosen": -1.3266379833221436, |
|
"logits/rejected": -1.3948261737823486, |
|
"logps/chosen": -4.1067681312561035, |
|
"logps/rejected": -4.673392295837402, |
|
"loss": 22.1702, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3159501254558563, |
|
"rewards/margins": 0.04971395805478096, |
|
"rewards/rejected": -0.3656640946865082, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.893643801132788, |
|
"grad_norm": 106.870361328125, |
|
"learning_rate": 1.4765070208444732e-06, |
|
"logits/chosen": -1.3216549158096313, |
|
"logits/rejected": -1.35343337059021, |
|
"logps/chosen": -4.343778133392334, |
|
"logps/rejected": -5.122066497802734, |
|
"loss": 22.7187, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.33430585265159607, |
|
"rewards/margins": 0.06294408440589905, |
|
"rewards/rejected": -0.3972499370574951, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8967904342353682, |
|
"grad_norm": 62.6711311340332, |
|
"learning_rate": 1.3947815669033026e-06, |
|
"logits/chosen": -1.3594673871994019, |
|
"logits/rejected": -1.4739999771118164, |
|
"logps/chosen": -4.087611198425293, |
|
"logps/rejected": -5.339770317077637, |
|
"loss": 20.526, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.31806594133377075, |
|
"rewards/margins": 0.08883042633533478, |
|
"rewards/rejected": -0.40689635276794434, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.8999370673379484, |
|
"grad_norm": 98.1043930053711, |
|
"learning_rate": 1.3153010185731495e-06, |
|
"logits/chosen": -1.2508734464645386, |
|
"logits/rejected": -1.32900869846344, |
|
"logps/chosen": -4.235801696777344, |
|
"logps/rejected": -5.670529842376709, |
|
"loss": 20.3076, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3296756148338318, |
|
"rewards/margins": 0.09636791795492172, |
|
"rewards/rejected": -0.4260435700416565, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9030837004405287, |
|
"grad_norm": 87.73750305175781, |
|
"learning_rate": 1.2380749659767766e-06, |
|
"logits/chosen": -1.3343340158462524, |
|
"logits/rejected": -1.3880221843719482, |
|
"logps/chosen": -4.322578430175781, |
|
"logps/rejected": -5.371191501617432, |
|
"loss": 20.9961, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.33630794286727905, |
|
"rewards/margins": 0.0794602781534195, |
|
"rewards/rejected": -0.41576823592185974, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.9062303335431089, |
|
"grad_norm": 72.0036392211914, |
|
"learning_rate": 1.1631127272095077e-06, |
|
"logits/chosen": -1.3422092199325562, |
|
"logits/rejected": -1.4017739295959473, |
|
"logps/chosen": -3.97587251663208, |
|
"logps/rejected": -5.63102388381958, |
|
"loss": 18.4484, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.302670419216156, |
|
"rewards/margins": 0.1103433147072792, |
|
"rewards/rejected": -0.413013756275177, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9093769666456891, |
|
"grad_norm": 55.72761917114258, |
|
"learning_rate": 1.0904233472148862e-06, |
|
"logits/chosen": -1.4325498342514038, |
|
"logits/rejected": -1.5191594362258911, |
|
"logps/chosen": -4.523946285247803, |
|
"logps/rejected": -5.913887023925781, |
|
"loss": 20.9945, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.34643903374671936, |
|
"rewards/margins": 0.07747067511081696, |
|
"rewards/rejected": -0.4239097237586975, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.9125235997482694, |
|
"grad_norm": 74.03398132324219, |
|
"learning_rate": 1.0200155966933333e-06, |
|
"logits/chosen": -1.3860814571380615, |
|
"logits/rejected": -1.4824600219726562, |
|
"logps/chosen": -4.180668830871582, |
|
"logps/rejected": -5.086295127868652, |
|
"loss": 22.6256, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.31416332721710205, |
|
"rewards/margins": 0.06807545572519302, |
|
"rewards/rejected": -0.3822387754917145, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9156702328508496, |
|
"grad_norm": 55.17578887939453, |
|
"learning_rate": 9.51897971043847e-07, |
|
"logits/chosen": -1.277956485748291, |
|
"logits/rejected": -1.4699045419692993, |
|
"logps/chosen": -3.923815965652466, |
|
"logps/rejected": -5.776226997375488, |
|
"loss": 18.1837, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.30271369218826294, |
|
"rewards/margins": 0.13357527554035187, |
|
"rewards/rejected": -0.4362889230251312, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.9188168659534298, |
|
"grad_norm": 67.42135620117188, |
|
"learning_rate": 8.860786893389761e-07, |
|
"logits/chosen": -1.3501498699188232, |
|
"logits/rejected": -1.4162402153015137, |
|
"logps/chosen": -4.456291198730469, |
|
"logps/rejected": -4.891867637634277, |
|
"loss": 23.4746, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.35184237360954285, |
|
"rewards/margins": 0.03937570005655289, |
|
"rewards/rejected": -0.3912180960178375, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.92196349905601, |
|
"grad_norm": 86.8721923828125, |
|
"learning_rate": 8.225656933330972e-07, |
|
"logits/chosen": -1.396032691001892, |
|
"logits/rejected": -1.3607252836227417, |
|
"logps/chosen": -4.139504909515381, |
|
"logps/rejected": -5.256811618804932, |
|
"loss": 20.6197, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.31887346506118774, |
|
"rewards/margins": 0.08756524324417114, |
|
"rewards/rejected": -0.4064387381076813, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.9251101321585903, |
|
"grad_norm": 63.26131057739258, |
|
"learning_rate": 7.613666465041492e-07, |
|
"logits/chosen": -1.296687364578247, |
|
"logits/rejected": -1.338370442390442, |
|
"logps/chosen": -4.0869526863098145, |
|
"logps/rejected": -4.680004596710205, |
|
"loss": 22.3496, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.30148619413375854, |
|
"rewards/margins": 0.06435124576091766, |
|
"rewards/rejected": -0.365837424993515, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.9282567652611705, |
|
"grad_norm": 64.71456909179688, |
|
"learning_rate": 7.024889331289731e-07, |
|
"logits/chosen": -1.3576750755310059, |
|
"logits/rejected": -1.4629138708114624, |
|
"logps/chosen": -4.305732250213623, |
|
"logps/rejected": -6.287524700164795, |
|
"loss": 19.0147, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3270648717880249, |
|
"rewards/margins": 0.12565208971500397, |
|
"rewards/rejected": -0.45271697640419006, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.9314033983637507, |
|
"grad_norm": 79.55664825439453, |
|
"learning_rate": 6.459396573923227e-07, |
|
"logits/chosen": -1.2750294208526611, |
|
"logits/rejected": -1.3182651996612549, |
|
"logps/chosen": -3.8780131340026855, |
|
"logps/rejected": -5.497721195220947, |
|
"loss": 19.3141, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.29957860708236694, |
|
"rewards/margins": 0.11124887317419052, |
|
"rewards/rejected": -0.41082748770713806, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.934550031466331, |
|
"grad_norm": 97.28962707519531, |
|
"learning_rate": 5.917256425296725e-07, |
|
"logits/chosen": -1.3326900005340576, |
|
"logits/rejected": -1.3848145008087158, |
|
"logps/chosen": -4.326709270477295, |
|
"logps/rejected": -5.8570427894592285, |
|
"loss": 17.956, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.32169783115386963, |
|
"rewards/margins": 0.11987517029047012, |
|
"rewards/rejected": -0.44157299399375916, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.9376966645689113, |
|
"grad_norm": 104.4383773803711, |
|
"learning_rate": 5.398534300039227e-07, |
|
"logits/chosen": -1.3669896125793457, |
|
"logits/rejected": -1.4102351665496826, |
|
"logps/chosen": -4.2153167724609375, |
|
"logps/rejected": -5.1999030113220215, |
|
"loss": 20.9588, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3310829997062683, |
|
"rewards/margins": 0.07336001843214035, |
|
"rewards/rejected": -0.40444302558898926, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.9408432976714916, |
|
"grad_norm": 59.6121826171875, |
|
"learning_rate": 4.903292787161129e-07, |
|
"logits/chosen": -1.4228112697601318, |
|
"logits/rejected": -1.528313159942627, |
|
"logps/chosen": -4.338911533355713, |
|
"logps/rejected": -5.048561096191406, |
|
"loss": 22.4697, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3458613455295563, |
|
"rewards/margins": 0.05565253645181656, |
|
"rewards/rejected": -0.40151387453079224, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.9439899307740718, |
|
"grad_norm": 134.8368377685547, |
|
"learning_rate": 4.4315916425021755e-07, |
|
"logits/chosen": -1.4706683158874512, |
|
"logits/rejected": -1.5189244747161865, |
|
"logps/chosen": -4.430064678192139, |
|
"logps/rejected": -4.881100177764893, |
|
"loss": 24.7599, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.34278133511543274, |
|
"rewards/margins": 0.03427756577730179, |
|
"rewards/rejected": -0.37705889344215393, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.947136563876652, |
|
"grad_norm": 75.44186401367188, |
|
"learning_rate": 3.983487781521311e-07, |
|
"logits/chosen": -1.3628993034362793, |
|
"logits/rejected": -1.5227676630020142, |
|
"logps/chosen": -4.508485317230225, |
|
"logps/rejected": -5.836249351501465, |
|
"loss": 21.4824, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.35081833600997925, |
|
"rewards/margins": 0.0795225128531456, |
|
"rewards/rejected": -0.43034085631370544, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.9502831969792322, |
|
"grad_norm": 53.86139678955078, |
|
"learning_rate": 3.5590352724293565e-07, |
|
"logits/chosen": -1.2814509868621826, |
|
"logits/rejected": -1.383336067199707, |
|
"logps/chosen": -3.697767972946167, |
|
"logps/rejected": -5.5374345779418945, |
|
"loss": 18.3089, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28700894117355347, |
|
"rewards/margins": 0.12951508164405823, |
|
"rewards/rejected": -0.4165240228176117, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9534298300818125, |
|
"grad_norm": 55.83627700805664, |
|
"learning_rate": 3.1582853296649785e-07, |
|
"logits/chosen": -1.3301982879638672, |
|
"logits/rejected": -1.4231036901474, |
|
"logps/chosen": -3.7521042823791504, |
|
"logps/rejected": -4.861963748931885, |
|
"loss": 19.3616, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2891950309276581, |
|
"rewards/margins": 0.09701049327850342, |
|
"rewards/rejected": -0.3862055242061615, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.9565764631843927, |
|
"grad_norm": 88.61446380615234, |
|
"learning_rate": 2.7812863077153253e-07, |
|
"logits/chosen": -1.2899259328842163, |
|
"logits/rejected": -1.398050308227539, |
|
"logps/chosen": -4.068936824798584, |
|
"logps/rejected": -5.717960357666016, |
|
"loss": 17.8938, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.31747734546661377, |
|
"rewards/margins": 0.11797485500574112, |
|
"rewards/rejected": -0.4354521632194519, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9597230962869729, |
|
"grad_norm": 58.96453857421875, |
|
"learning_rate": 2.4280836952814913e-07, |
|
"logits/chosen": -1.3611301183700562, |
|
"logits/rejected": -1.4117127656936646, |
|
"logps/chosen": -4.0526018142700195, |
|
"logps/rejected": -5.437824249267578, |
|
"loss": 21.3406, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.31340762972831726, |
|
"rewards/margins": 0.07557393610477448, |
|
"rewards/rejected": -0.38898158073425293, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.9628697293895532, |
|
"grad_norm": 82.22030639648438, |
|
"learning_rate": 2.0987201097897757e-07, |
|
"logits/chosen": -1.290305256843567, |
|
"logits/rejected": -1.3669493198394775, |
|
"logps/chosen": -4.012240409851074, |
|
"logps/rejected": -6.001503944396973, |
|
"loss": 18.4697, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3110642433166504, |
|
"rewards/margins": 0.14227357506752014, |
|
"rewards/rejected": -0.45333781838417053, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9660163624921334, |
|
"grad_norm": 69.16776275634766, |
|
"learning_rate": 1.7932352922496844e-07, |
|
"logits/chosen": -1.3238952159881592, |
|
"logits/rejected": -1.4009875059127808, |
|
"logps/chosen": -4.168734550476074, |
|
"logps/rejected": -5.520012855529785, |
|
"loss": 18.6757, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3233780264854431, |
|
"rewards/margins": 0.10560549795627594, |
|
"rewards/rejected": -0.42898350954055786, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.9691629955947136, |
|
"grad_norm": 87.41554260253906, |
|
"learning_rate": 1.5116661024584756e-07, |
|
"logits/chosen": -1.3047425746917725, |
|
"logits/rejected": -1.2935268878936768, |
|
"logps/chosen": -3.8972859382629395, |
|
"logps/rejected": -5.7956743240356445, |
|
"loss": 19.4437, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2976101040840149, |
|
"rewards/margins": 0.13567054271697998, |
|
"rewards/rejected": -0.4332806169986725, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9723096286972939, |
|
"grad_norm": 129.71432495117188, |
|
"learning_rate": 1.254046514553986e-07, |
|
"logits/chosen": -1.3411355018615723, |
|
"logits/rejected": -1.3150873184204102, |
|
"logps/chosen": -4.793996334075928, |
|
"logps/rejected": -6.1579155921936035, |
|
"loss": 22.5465, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.36123085021972656, |
|
"rewards/margins": 0.08643898367881775, |
|
"rewards/rejected": -0.4476698338985443, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.9754562617998741, |
|
"grad_norm": 156.82296752929688, |
|
"learning_rate": 1.0204076129150198e-07, |
|
"logits/chosen": -1.3176259994506836, |
|
"logits/rejected": -1.371140956878662, |
|
"logps/chosen": -4.381787300109863, |
|
"logps/rejected": -5.822647571563721, |
|
"loss": 20.2445, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.322052001953125, |
|
"rewards/margins": 0.08496570587158203, |
|
"rewards/rejected": -0.40701770782470703, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9786028949024543, |
|
"grad_norm": 101.22770690917969, |
|
"learning_rate": 8.107775884109048e-08, |
|
"logits/chosen": -1.377939224243164, |
|
"logits/rejected": -1.460756540298462, |
|
"logps/chosen": -4.821037292480469, |
|
"logps/rejected": -5.5621137619018555, |
|
"loss": 23.1685, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.37864041328430176, |
|
"rewards/margins": 0.05817138031125069, |
|
"rewards/rejected": -0.43681177496910095, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.9817495280050346, |
|
"grad_norm": 93.55181884765625, |
|
"learning_rate": 6.251817349998578e-08, |
|
"logits/chosen": -1.2559947967529297, |
|
"logits/rejected": -1.3171112537384033, |
|
"logps/chosen": -3.9931647777557373, |
|
"logps/rejected": -5.348459243774414, |
|
"loss": 22.9477, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30862289667129517, |
|
"rewards/margins": 0.0842631608247757, |
|
"rewards/rejected": -0.39288607239723206, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9848961611076148, |
|
"grad_norm": 80.63821411132812, |
|
"learning_rate": 4.636424466771372e-08, |
|
"logits/chosen": -1.24492347240448, |
|
"logits/rejected": -1.3349525928497314, |
|
"logps/chosen": -4.380553245544434, |
|
"logps/rejected": -5.421158313751221, |
|
"loss": 22.0329, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.34523719549179077, |
|
"rewards/margins": 0.07052381336688995, |
|
"rewards/rejected": -0.4157610535621643, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.9880427942101951, |
|
"grad_norm": 55.254642486572266, |
|
"learning_rate": 3.261792147728704e-08, |
|
"logits/chosen": -1.3501121997833252, |
|
"logits/rejected": -1.3522610664367676, |
|
"logps/chosen": -4.829428195953369, |
|
"logps/rejected": -5.480432033538818, |
|
"loss": 22.6751, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3382914662361145, |
|
"rewards/margins": 0.05635923147201538, |
|
"rewards/rejected": -0.3946506381034851, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9911894273127754, |
|
"grad_norm": 102.65123748779297, |
|
"learning_rate": 2.1280862560026927e-08, |
|
"logits/chosen": -1.350527048110962, |
|
"logits/rejected": -1.3495935201644897, |
|
"logps/chosen": -3.8183772563934326, |
|
"logps/rejected": -4.949650764465332, |
|
"loss": 22.3353, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3000851273536682, |
|
"rewards/margins": 0.07860491424798965, |
|
"rewards/rejected": -0.37869006395339966, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9943360604153556, |
|
"grad_norm": 67.94386291503906, |
|
"learning_rate": 1.2354435845436385e-08, |
|
"logits/chosen": -1.2602336406707764, |
|
"logits/rejected": -1.2594802379608154, |
|
"logps/chosen": -3.5885491371154785, |
|
"logps/rejected": -4.909377098083496, |
|
"loss": 18.7801, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.27459749579429626, |
|
"rewards/margins": 0.10287781804800034, |
|
"rewards/rejected": -0.3774753212928772, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9974826935179358, |
|
"grad_norm": 78.847412109375, |
|
"learning_rate": 5.8397183961411694e-09, |
|
"logits/chosen": -1.4188308715820312, |
|
"logits/rejected": -1.3911654949188232, |
|
"logps/chosen": -4.257325649261475, |
|
"logps/rejected": -5.559029579162598, |
|
"loss": 20.67, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.30969610810279846, |
|
"rewards/margins": 0.08111827820539474, |
|
"rewards/rejected": -0.3908143639564514, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1589, |
|
"total_flos": 0.0, |
|
"train_loss": 22.009478435192264, |
|
"train_runtime": 23016.83, |
|
"train_samples_per_second": 1.105, |
|
"train_steps_per_second": 0.069 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1589, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|