|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": 0.4053989052772522, |
|
"logits/rejected": 0.1312936246395111, |
|
"logps/chosen": -434.00537109375, |
|
"logps/rejected": -516.5983276367188, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": 0.15297521650791168, |
|
"logits/rejected": 0.29175662994384766, |
|
"logps/chosen": -365.80181884765625, |
|
"logps/rejected": -353.0853271484375, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.0007080123177729547, |
|
"rewards/margins": -5.8396861277287826e-05, |
|
"rewards/rejected": -0.0006496154237538576, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.11968117952346802, |
|
"logits/rejected": 0.2041483372449875, |
|
"logps/chosen": -340.0993347167969, |
|
"logps/rejected": -348.33087158203125, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": -0.000655159296002239, |
|
"rewards/margins": -8.313418220495805e-05, |
|
"rewards/rejected": -0.0005720251356251538, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": 0.2551038861274719, |
|
"logits/rejected": 0.25183868408203125, |
|
"logps/chosen": -383.1521301269531, |
|
"logps/rejected": -364.0672302246094, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.0005650260718539357, |
|
"rewards/margins": 0.00010353984544053674, |
|
"rewards/rejected": -0.0006685658590868115, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.1547292321920395, |
|
"logits/rejected": 0.27106207609176636, |
|
"logps/chosen": -401.61614990234375, |
|
"logps/rejected": -385.8863220214844, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.0006045111804269254, |
|
"rewards/margins": -5.9384223277447745e-05, |
|
"rewards/rejected": -0.0005451269680634141, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": 0.23326897621154785, |
|
"logits/rejected": 0.27433687448501587, |
|
"logps/chosen": -441.8401794433594, |
|
"logps/rejected": -432.41485595703125, |
|
"loss": 0.2047, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.000930719543248415, |
|
"rewards/margins": 0.000368706532754004, |
|
"rewards/rejected": -0.0012994259595870972, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.17064206302165985, |
|
"logits/rejected": 0.3185887336730957, |
|
"logps/chosen": -410.41473388671875, |
|
"logps/rejected": -414.3666076660156, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.001497046323493123, |
|
"rewards/margins": 0.00016530933498870581, |
|
"rewards/rejected": -0.0016623556148260832, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": 0.12393184751272202, |
|
"logits/rejected": 0.2235107123851776, |
|
"logps/chosen": -354.70562744140625, |
|
"logps/rejected": -356.94586181640625, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.924403740209527e-05, |
|
"rewards/margins": 0.0009310436435043812, |
|
"rewards/rejected": -0.000990287633612752, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.14353762567043304, |
|
"logits/rejected": 0.2516772449016571, |
|
"logps/chosen": -392.6264343261719, |
|
"logps/rejected": -380.66351318359375, |
|
"loss": 0.208, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0002285484952153638, |
|
"rewards/margins": 0.001034508110024035, |
|
"rewards/rejected": -0.0008059596875682473, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": 0.24103212356567383, |
|
"logits/rejected": 0.1776101142168045, |
|
"logps/chosen": -393.3184509277344, |
|
"logps/rejected": -416.2762145996094, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0003849788336083293, |
|
"rewards/margins": 0.0017982361605390906, |
|
"rewards/rejected": -0.0021832147613167763, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": 0.16443544626235962, |
|
"logits/rejected": 0.17112873494625092, |
|
"logps/chosen": -416.6537170410156, |
|
"logps/rejected": -411.6963806152344, |
|
"loss": 0.2064, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.0005408526631072164, |
|
"rewards/margins": 0.0026028361171483994, |
|
"rewards/rejected": -0.002061983570456505, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": 0.2384149730205536, |
|
"logits/rejected": 0.1614537537097931, |
|
"logps/chosen": -406.7789306640625, |
|
"logps/rejected": -391.0703430175781, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.012324010953307152, |
|
"rewards/margins": 0.0033186424989253283, |
|
"rewards/rejected": 0.009005369618535042, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": 0.08936997503042221, |
|
"logits/rejected": 0.25732293725013733, |
|
"logps/chosen": -397.0160827636719, |
|
"logps/rejected": -431.9867248535156, |
|
"loss": 0.2064, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.01773521490395069, |
|
"rewards/margins": 0.007638473063707352, |
|
"rewards/rejected": 0.01009674184024334, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": 0.14923642575740814, |
|
"logits/rejected": 0.27579236030578613, |
|
"logps/chosen": -407.21258544921875, |
|
"logps/rejected": -401.8697204589844, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.005354008637368679, |
|
"rewards/margins": 0.00832393579185009, |
|
"rewards/rejected": -0.0029699269216507673, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": 0.13965365290641785, |
|
"logits/rejected": 0.20428553223609924, |
|
"logps/chosen": -396.0818786621094, |
|
"logps/rejected": -384.4440612792969, |
|
"loss": 0.2016, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.011929613538086414, |
|
"rewards/margins": 0.00782632827758789, |
|
"rewards/rejected": -0.01975594088435173, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": 0.0017524458235129714, |
|
"logits/rejected": 0.048104483634233475, |
|
"logps/chosen": -403.7103576660156, |
|
"logps/rejected": -421.3060607910156, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.020950669422745705, |
|
"rewards/margins": 0.01481578964740038, |
|
"rewards/rejected": -0.03576646000146866, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": 0.050279758870601654, |
|
"logits/rejected": 0.12556883692741394, |
|
"logps/chosen": -467.9580993652344, |
|
"logps/rejected": -487.71844482421875, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.044767118990421295, |
|
"rewards/margins": 0.03162100166082382, |
|
"rewards/rejected": -0.07638812065124512, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": -0.020991306751966476, |
|
"logits/rejected": 0.15817420184612274, |
|
"logps/chosen": -522.8843383789062, |
|
"logps/rejected": -518.1360473632812, |
|
"loss": 0.2105, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.08218502998352051, |
|
"rewards/margins": 0.02223752811551094, |
|
"rewards/rejected": -0.10442256927490234, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": 0.08929436653852463, |
|
"logits/rejected": 0.09290768206119537, |
|
"logps/chosen": -470.04296875, |
|
"logps/rejected": -501.46661376953125, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.08902844041585922, |
|
"rewards/margins": 0.03190689533948898, |
|
"rewards/rejected": -0.12093535810709, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": 0.017135417088866234, |
|
"logits/rejected": 0.09486501663923264, |
|
"logps/chosen": -600.1754760742188, |
|
"logps/rejected": -609.9652709960938, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11600615829229355, |
|
"rewards/margins": 0.041202057152986526, |
|
"rewards/rejected": -0.15720821917057037, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -0.032647065818309784, |
|
"logits/rejected": 0.09240031987428665, |
|
"logps/chosen": -527.2655029296875, |
|
"logps/rejected": -595.9906005859375, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1231132298707962, |
|
"rewards/margins": 0.051538724452257156, |
|
"rewards/rejected": -0.17465195059776306, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": 0.03914088383316994, |
|
"logits/rejected": 0.028707262128591537, |
|
"logps/chosen": -524.7379760742188, |
|
"logps/rejected": -570.7955322265625, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12313251197338104, |
|
"rewards/margins": 0.04237721115350723, |
|
"rewards/rejected": -0.16550973057746887, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": 0.010909264907240868, |
|
"logits/rejected": -0.024190250784158707, |
|
"logps/chosen": -549.97607421875, |
|
"logps/rejected": -590.9778442382812, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.14806947112083435, |
|
"rewards/margins": 0.04638643562793732, |
|
"rewards/rejected": -0.19445592164993286, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -0.018162641674280167, |
|
"logits/rejected": 0.000972352921962738, |
|
"logps/chosen": -549.0392456054688, |
|
"logps/rejected": -598.3811645507812, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1491001546382904, |
|
"rewards/margins": 0.04847537726163864, |
|
"rewards/rejected": -0.19757553935050964, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -0.12062356621026993, |
|
"logits/rejected": 0.04868536815047264, |
|
"logps/chosen": -484.58685302734375, |
|
"logps/rejected": -516.2865600585938, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.1070113405585289, |
|
"rewards/margins": 0.028431424871087074, |
|
"rewards/rejected": -0.13544276356697083, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -0.041638366878032684, |
|
"logits/rejected": 0.0221172496676445, |
|
"logps/chosen": -507.4730529785156, |
|
"logps/rejected": -527.9345703125, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09331385791301727, |
|
"rewards/margins": 0.03461749479174614, |
|
"rewards/rejected": -0.1279313564300537, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.05273251608014107, |
|
"logits/rejected": -0.022044766694307327, |
|
"logps/chosen": -538.548583984375, |
|
"logps/rejected": -590.6500854492188, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10730306804180145, |
|
"rewards/margins": 0.039316385984420776, |
|
"rewards/rejected": -0.14661946892738342, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -0.12279339134693146, |
|
"logits/rejected": -0.079288050532341, |
|
"logps/chosen": -560.711181640625, |
|
"logps/rejected": -635.7985229492188, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12682856619358063, |
|
"rewards/margins": 0.0557611808180809, |
|
"rewards/rejected": -0.18258973956108093, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -0.12978403270244598, |
|
"logits/rejected": -0.05718718096613884, |
|
"logps/chosen": -534.7046508789062, |
|
"logps/rejected": -573.9546508789062, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.12655052542686462, |
|
"rewards/margins": 0.05336705967783928, |
|
"rewards/rejected": -0.1799176186323166, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.13656684756278992, |
|
"logits/rejected": -0.07923261821269989, |
|
"logps/chosen": -537.6559448242188, |
|
"logps/rejected": -627.1619873046875, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13470812141895294, |
|
"rewards/margins": 0.05948293209075928, |
|
"rewards/rejected": -0.1941910684108734, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -0.11429516226053238, |
|
"logits/rejected": -0.12869636714458466, |
|
"logps/chosen": -543.2788696289062, |
|
"logps/rejected": -558.6578369140625, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12002478539943695, |
|
"rewards/margins": 0.050586897879838943, |
|
"rewards/rejected": -0.1706116795539856, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.18651030957698822, |
|
"logits/rejected": -0.052459727972745895, |
|
"logps/chosen": -461.1847229003906, |
|
"logps/rejected": -506.2823181152344, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.08158674091100693, |
|
"rewards/margins": 0.045484792441129684, |
|
"rewards/rejected": -0.1270715296268463, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.1330818384885788, |
|
"logits/rejected": -0.09265539795160294, |
|
"logps/chosen": -483.45599365234375, |
|
"logps/rejected": -515.7194213867188, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10115663707256317, |
|
"rewards/margins": 0.04430658370256424, |
|
"rewards/rejected": -0.14546321332454681, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.11196194589138031, |
|
"logits/rejected": -0.09686783701181412, |
|
"logps/chosen": -557.8707885742188, |
|
"logps/rejected": -571.39794921875, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.12075225263834, |
|
"rewards/margins": 0.04514995589852333, |
|
"rewards/rejected": -0.16590221226215363, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -0.08174435794353485, |
|
"logits/rejected": -0.06923134624958038, |
|
"logps/chosen": -531.8570556640625, |
|
"logps/rejected": -605.0074462890625, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11230266094207764, |
|
"rewards/margins": 0.06913084536790848, |
|
"rewards/rejected": -0.18143349885940552, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -0.16061343252658844, |
|
"logits/rejected": -0.027816006913781166, |
|
"logps/chosen": -534.1594848632812, |
|
"logps/rejected": -573.8477783203125, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0970136970281601, |
|
"rewards/margins": 0.045039448887109756, |
|
"rewards/rejected": -0.14205312728881836, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.24527081847190857, |
|
"logits/rejected": -0.1484527587890625, |
|
"logps/chosen": -544.539306640625, |
|
"logps/rejected": -564.9341430664062, |
|
"loss": 0.1859, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.09417351335287094, |
|
"rewards/margins": 0.056557249277830124, |
|
"rewards/rejected": -0.15073075890541077, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.1722763478755951, |
|
"logits/rejected": -0.17130622267723083, |
|
"logps/chosen": -496.27313232421875, |
|
"logps/rejected": -557.1398315429688, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0965786799788475, |
|
"rewards/margins": 0.057069409638643265, |
|
"rewards/rejected": -0.15364809334278107, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.286950945854187, |
|
"logits/rejected": -0.12660877406597137, |
|
"logps/chosen": -487.8304138183594, |
|
"logps/rejected": -506.80267333984375, |
|
"loss": 0.1784, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.08378596603870392, |
|
"rewards/margins": 0.04325443506240845, |
|
"rewards/rejected": -0.12704041600227356, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.2721463441848755, |
|
"logits/rejected": -0.19791728258132935, |
|
"logps/chosen": -525.049560546875, |
|
"logps/rejected": -564.6629028320312, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1035178154706955, |
|
"rewards/margins": 0.03107512556016445, |
|
"rewards/rejected": -0.1345929503440857, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -0.17590856552124023, |
|
"logits/rejected": -0.16738948225975037, |
|
"logps/chosen": -520.5001831054688, |
|
"logps/rejected": -564.5961303710938, |
|
"loss": 0.1854, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09734812378883362, |
|
"rewards/margins": 0.0474289208650589, |
|
"rewards/rejected": -0.14477702975273132, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": -0.17745746672153473, |
|
"logits/rejected": -0.18353696167469025, |
|
"logps/chosen": -508.34637451171875, |
|
"logps/rejected": -577.1897583007812, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0865376815199852, |
|
"rewards/margins": 0.049374908208847046, |
|
"rewards/rejected": -0.13591258227825165, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.17098669707775116, |
|
"logits/rejected": -0.13703958690166473, |
|
"logps/chosen": -539.40087890625, |
|
"logps/rejected": -593.8014526367188, |
|
"loss": 0.1851, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09274087101221085, |
|
"rewards/margins": 0.05765017122030258, |
|
"rewards/rejected": -0.15039105713367462, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.20213007926940918, |
|
"logits/rejected": -0.20745894312858582, |
|
"logps/chosen": -453.65478515625, |
|
"logps/rejected": -497.0008850097656, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08567062765359879, |
|
"rewards/margins": 0.04529280215501785, |
|
"rewards/rejected": -0.13096341490745544, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.21449732780456543, |
|
"logits/rejected": -0.19523288309574127, |
|
"logps/chosen": -547.9935302734375, |
|
"logps/rejected": -572.2437744140625, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09648506343364716, |
|
"rewards/margins": 0.03453432396054268, |
|
"rewards/rejected": -0.13101938366889954, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.22678379714488983, |
|
"logits/rejected": -0.18668214976787567, |
|
"logps/chosen": -519.3316650390625, |
|
"logps/rejected": -561.0910034179688, |
|
"loss": 0.1896, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09367823600769043, |
|
"rewards/margins": 0.058413583785295486, |
|
"rewards/rejected": -0.15209180116653442, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -0.20922398567199707, |
|
"logits/rejected": -0.15190599858760834, |
|
"logps/chosen": -554.7764892578125, |
|
"logps/rejected": -584.9015502929688, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11632993072271347, |
|
"rewards/margins": 0.048789944499731064, |
|
"rewards/rejected": -0.16511985659599304, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.2042142152786255, |
|
"logits/rejected": -0.19644713401794434, |
|
"logps/chosen": -522.6699829101562, |
|
"logps/rejected": -589.4488525390625, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.10839296877384186, |
|
"rewards/margins": 0.06792866438627243, |
|
"rewards/rejected": -0.17632164061069489, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -0.29581087827682495, |
|
"logits/rejected": -0.17288121581077576, |
|
"logps/chosen": -492.73297119140625, |
|
"logps/rejected": -565.6483764648438, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10467328131198883, |
|
"rewards/margins": 0.0681912824511528, |
|
"rewards/rejected": -0.17286454141139984, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -0.1768864393234253, |
|
"logits/rejected": -0.19145308434963226, |
|
"logps/chosen": -481.3701171875, |
|
"logps/rejected": -552.0697021484375, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10367073863744736, |
|
"rewards/margins": 0.0636112317442894, |
|
"rewards/rejected": -0.16728197038173676, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.25040799379348755, |
|
"logits/rejected": -0.2705633044242859, |
|
"logps/chosen": -473.15777587890625, |
|
"logps/rejected": -521.9263916015625, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.10329292714595795, |
|
"rewards/margins": 0.04386230558156967, |
|
"rewards/rejected": -0.1471552550792694, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.22839005291461945, |
|
"logits/rejected": -0.18522998690605164, |
|
"logps/chosen": -562.2581176757812, |
|
"logps/rejected": -602.7523193359375, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11718226969242096, |
|
"rewards/margins": 0.054385870695114136, |
|
"rewards/rejected": -0.1715681403875351, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.28231528401374817, |
|
"logits/rejected": -0.1746218502521515, |
|
"logps/chosen": -508.0462951660156, |
|
"logps/rejected": -521.466552734375, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0966549962759018, |
|
"rewards/margins": 0.04605900123715401, |
|
"rewards/rejected": -0.1427139937877655, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.14483687281608582, |
|
"logits/rejected": -0.18159925937652588, |
|
"logps/chosen": -519.6622314453125, |
|
"logps/rejected": -554.4771728515625, |
|
"loss": 0.1881, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10754968971014023, |
|
"rewards/margins": 0.043608419597148895, |
|
"rewards/rejected": -0.15115809440612793, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.27724790573120117, |
|
"logits/rejected": -0.18303519487380981, |
|
"logps/chosen": -502.09747314453125, |
|
"logps/rejected": -529.7732543945312, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0977000966668129, |
|
"rewards/margins": 0.04301925003528595, |
|
"rewards/rejected": -0.14071933925151825, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.2885403633117676, |
|
"logits/rejected": -0.16289584338665009, |
|
"logps/chosen": -519.3963012695312, |
|
"logps/rejected": -553.4032592773438, |
|
"loss": 0.1871, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10111876577138901, |
|
"rewards/margins": 0.040404774248600006, |
|
"rewards/rejected": -0.1415235549211502, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -0.23527821898460388, |
|
"logits/rejected": -0.2247372567653656, |
|
"logps/chosen": -481.41552734375, |
|
"logps/rejected": -552.4132080078125, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.10331599414348602, |
|
"rewards/margins": 0.040959432721138, |
|
"rewards/rejected": -0.14427544176578522, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.28348255157470703, |
|
"logits/rejected": -0.26194503903388977, |
|
"logps/chosen": -467.77740478515625, |
|
"logps/rejected": -507.6676330566406, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10089198499917984, |
|
"rewards/margins": 0.0405060276389122, |
|
"rewards/rejected": -0.14139802753925323, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.22403912246227264, |
|
"logits/rejected": -0.19076624512672424, |
|
"logps/chosen": -500.50982666015625, |
|
"logps/rejected": -544.9527587890625, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09000807255506516, |
|
"rewards/margins": 0.05757290869951248, |
|
"rewards/rejected": -0.14758098125457764, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -0.25425633788108826, |
|
"logits/rejected": -0.13311608135700226, |
|
"logps/chosen": -523.557373046875, |
|
"logps/rejected": -576.8714599609375, |
|
"loss": 0.1667, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.09282426536083221, |
|
"rewards/margins": 0.06447537243366241, |
|
"rewards/rejected": -0.15729963779449463, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.25314217805862427, |
|
"logits/rejected": -0.2236749678850174, |
|
"logps/chosen": -442.9320373535156, |
|
"logps/rejected": -521.5167236328125, |
|
"loss": 0.1816, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.08528304100036621, |
|
"rewards/margins": 0.05453087016940117, |
|
"rewards/rejected": -0.13981391489505768, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.2589682936668396, |
|
"logits/rejected": -0.2686694264411926, |
|
"logps/chosen": -501.1578674316406, |
|
"logps/rejected": -545.9219970703125, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09304684400558472, |
|
"rewards/margins": 0.06486930698156357, |
|
"rewards/rejected": -0.1579161435365677, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.15998974442481995, |
|
"logits/rejected": -0.26897841691970825, |
|
"logps/chosen": -483.9093322753906, |
|
"logps/rejected": -574.64990234375, |
|
"loss": 0.1784, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0897846445441246, |
|
"rewards/margins": 0.059298910200595856, |
|
"rewards/rejected": -0.14908355474472046, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.30697402358055115, |
|
"logits/rejected": -0.2922336459159851, |
|
"logps/chosen": -446.08642578125, |
|
"logps/rejected": -485.38311767578125, |
|
"loss": 0.1835, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.07366035133600235, |
|
"rewards/margins": 0.05683215707540512, |
|
"rewards/rejected": -0.13049249351024628, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.27867692708969116, |
|
"logits/rejected": -0.23723456263542175, |
|
"logps/chosen": -472.146240234375, |
|
"logps/rejected": -522.3912963867188, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.08290112018585205, |
|
"rewards/margins": 0.06648631393909454, |
|
"rewards/rejected": -0.1493874490261078, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -0.27338069677352905, |
|
"logits/rejected": -0.0849432423710823, |
|
"logps/chosen": -505.39404296875, |
|
"logps/rejected": -564.4884643554688, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0819827988743782, |
|
"rewards/margins": 0.07497727125883102, |
|
"rewards/rejected": -0.15696007013320923, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.31306496262550354, |
|
"logits/rejected": -0.15988986194133759, |
|
"logps/chosen": -536.2052001953125, |
|
"logps/rejected": -565.6595458984375, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10776883363723755, |
|
"rewards/margins": 0.04764767736196518, |
|
"rewards/rejected": -0.15541651844978333, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -0.1312873661518097, |
|
"logits/rejected": -0.1946374773979187, |
|
"logps/chosen": -489.32037353515625, |
|
"logps/rejected": -532.713623046875, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10007087886333466, |
|
"rewards/margins": 0.0466584786772728, |
|
"rewards/rejected": -0.14672937989234924, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -0.18406830728054047, |
|
"logits/rejected": -0.18264801800251007, |
|
"logps/chosen": -455.0455627441406, |
|
"logps/rejected": -483.98748779296875, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.08491896092891693, |
|
"rewards/margins": 0.042202599346637726, |
|
"rewards/rejected": -0.12712153792381287, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -0.2591504454612732, |
|
"logits/rejected": -0.1554795801639557, |
|
"logps/chosen": -488.55328369140625, |
|
"logps/rejected": -532.9073486328125, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09263734519481659, |
|
"rewards/margins": 0.04989578202366829, |
|
"rewards/rejected": -0.1425331085920334, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.28501999378204346, |
|
"logits/rejected": -0.23185932636260986, |
|
"logps/chosen": -485.5430603027344, |
|
"logps/rejected": -541.1561889648438, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09013941138982773, |
|
"rewards/margins": 0.051737189292907715, |
|
"rewards/rejected": -0.14187659323215485, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.25448185205459595, |
|
"logits/rejected": -0.13845598697662354, |
|
"logps/chosen": -504.44085693359375, |
|
"logps/rejected": -557.7171630859375, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09454745799303055, |
|
"rewards/margins": 0.0519348680973053, |
|
"rewards/rejected": -0.14648231863975525, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.20559599995613098, |
|
"logits/rejected": -0.1910923421382904, |
|
"logps/chosen": -518.5410766601562, |
|
"logps/rejected": -559.4114379882812, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09824297577142715, |
|
"rewards/margins": 0.051730893552303314, |
|
"rewards/rejected": -0.14997386932373047, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.20453593134880066, |
|
"logits/rejected": -0.22350621223449707, |
|
"logps/chosen": -517.8855590820312, |
|
"logps/rejected": -554.9312744140625, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0931503102183342, |
|
"rewards/margins": 0.059033893048763275, |
|
"rewards/rejected": -0.15218421816825867, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.24915683269500732, |
|
"logits/rejected": -0.14556431770324707, |
|
"logps/chosen": -458.88006591796875, |
|
"logps/rejected": -483.50872802734375, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0776047632098198, |
|
"rewards/margins": 0.055497486144304276, |
|
"rewards/rejected": -0.13310226798057556, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.22538790106773376, |
|
"logits/rejected": -0.16318151354789734, |
|
"logps/chosen": -516.1492309570312, |
|
"logps/rejected": -586.5814208984375, |
|
"loss": 0.1829, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09301020205020905, |
|
"rewards/margins": 0.06428654491901398, |
|
"rewards/rejected": -0.15729674696922302, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.2806158661842346, |
|
"logits/rejected": -0.066395103931427, |
|
"logps/chosen": -479.8614807128906, |
|
"logps/rejected": -503.1717224121094, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09291915595531464, |
|
"rewards/margins": 0.04909076914191246, |
|
"rewards/rejected": -0.1420099288225174, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.325612872838974, |
|
"logits/rejected": -0.22937624156475067, |
|
"logps/chosen": -526.00341796875, |
|
"logps/rejected": -547.8123168945312, |
|
"loss": 0.1746, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09224705398082733, |
|
"rewards/margins": 0.054325349628925323, |
|
"rewards/rejected": -0.14657239615917206, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -0.2622816264629364, |
|
"logits/rejected": -0.15280409157276154, |
|
"logps/chosen": -533.4638061523438, |
|
"logps/rejected": -595.7601318359375, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09688106924295425, |
|
"rewards/margins": 0.05785801261663437, |
|
"rewards/rejected": -0.15473909676074982, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.18345573544502258, |
|
"logits/rejected": -0.16111025214195251, |
|
"logps/chosen": -487.77886962890625, |
|
"logps/rejected": -538.51171875, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.08573255687952042, |
|
"rewards/margins": 0.0516083724796772, |
|
"rewards/rejected": -0.13734093308448792, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.2816532254219055, |
|
"logits/rejected": -0.15862765908241272, |
|
"logps/chosen": -512.4444580078125, |
|
"logps/rejected": -536.9342651367188, |
|
"loss": 0.1844, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09420600533485413, |
|
"rewards/margins": 0.05614888668060303, |
|
"rewards/rejected": -0.15035490691661835, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.21904349327087402, |
|
"logits/rejected": -0.258176326751709, |
|
"logps/chosen": -549.0933837890625, |
|
"logps/rejected": -618.8422241210938, |
|
"loss": 0.18, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10313485562801361, |
|
"rewards/margins": 0.07112576067447662, |
|
"rewards/rejected": -0.17426061630249023, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.3128640353679657, |
|
"logits/rejected": -0.10742131620645523, |
|
"logps/chosen": -525.4251098632812, |
|
"logps/rejected": -553.0595092773438, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08687300980091095, |
|
"rewards/margins": 0.06989692151546478, |
|
"rewards/rejected": -0.15676993131637573, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.1870919167995453, |
|
"logits/rejected": -0.18910066783428192, |
|
"logps/chosen": -514.9647827148438, |
|
"logps/rejected": -548.2384643554688, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0961606353521347, |
|
"rewards/margins": 0.05352962762117386, |
|
"rewards/rejected": -0.14969027042388916, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.31641727685928345, |
|
"logits/rejected": -0.21929411590099335, |
|
"logps/chosen": -452.8780212402344, |
|
"logps/rejected": -497.7808532714844, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08406294882297516, |
|
"rewards/margins": 0.06199796125292778, |
|
"rewards/rejected": -0.14606089890003204, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.2824193239212036, |
|
"logits/rejected": -0.2219020426273346, |
|
"logps/chosen": -516.4136962890625, |
|
"logps/rejected": -553.1406860351562, |
|
"loss": 0.1761, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09720293432474136, |
|
"rewards/margins": 0.046975888311862946, |
|
"rewards/rejected": -0.1441788375377655, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.09483526647090912, |
|
"logits/rejected": -0.24578902125358582, |
|
"logps/chosen": -496.60028076171875, |
|
"logps/rejected": -558.0816650390625, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0964212641119957, |
|
"rewards/margins": 0.050969939678907394, |
|
"rewards/rejected": -0.14739122986793518, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.20135729014873505, |
|
"logits/rejected": -0.25128036737442017, |
|
"logps/chosen": -545.1806640625, |
|
"logps/rejected": -568.894775390625, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09586036950349808, |
|
"rewards/margins": 0.04870045185089111, |
|
"rewards/rejected": -0.1445608288049698, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -0.2596682012081146, |
|
"logits/rejected": -0.22516381740570068, |
|
"logps/chosen": -498.10028076171875, |
|
"logps/rejected": -564.5962524414062, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09488777071237564, |
|
"rewards/margins": 0.06974340975284576, |
|
"rewards/rejected": -0.164631187915802, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -0.2944473624229431, |
|
"logits/rejected": -0.2125546932220459, |
|
"logps/chosen": -502.841552734375, |
|
"logps/rejected": -546.7924194335938, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08710362762212753, |
|
"rewards/margins": 0.059937745332717896, |
|
"rewards/rejected": -0.14704139530658722, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.2098701447248459, |
|
"logits/rejected": -0.2635635733604431, |
|
"logps/chosen": -481.09478759765625, |
|
"logps/rejected": -569.34326171875, |
|
"loss": 0.1763, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09881605207920074, |
|
"rewards/margins": 0.06372065842151642, |
|
"rewards/rejected": -0.16253669559955597, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.20950980484485626, |
|
"logits/rejected": -0.2304944545030594, |
|
"logps/chosen": -508.41241455078125, |
|
"logps/rejected": -574.2259521484375, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0780680924654007, |
|
"rewards/margins": 0.06727245450019836, |
|
"rewards/rejected": -0.14534054696559906, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -0.2829793095588684, |
|
"logits/rejected": -0.2987596392631531, |
|
"logps/chosen": -475.51031494140625, |
|
"logps/rejected": -521.2598876953125, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09187036752700806, |
|
"rewards/margins": 0.06315977871417999, |
|
"rewards/rejected": -0.15503014624118805, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -0.23163847625255585, |
|
"logits/rejected": -0.24427077174186707, |
|
"logps/chosen": -477.51507568359375, |
|
"logps/rejected": -542.298828125, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0930924192070961, |
|
"rewards/margins": 0.06024498865008354, |
|
"rewards/rejected": -0.15333738923072815, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1881250925163322, |
|
"train_runtime": 7837.4153, |
|
"train_samples_per_second": 3.828, |
|
"train_steps_per_second": 0.12 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|