phi-2-irepo-chatml-v10-i1 / trainer_state.json
lole25's picture
Model save
308ed71 verified
raw
history blame contribute delete
No virus
46 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994666666666666,
"eval_steps": 500,
"global_step": 937,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.319148936170213e-08,
"logits/chosen": 0.4053989052772522,
"logits/rejected": 0.1312936246395111,
"logps/chosen": -434.00537109375,
"logps/rejected": -516.5983276367188,
"loss": 0.1853,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 5.319148936170213e-07,
"logits/chosen": 0.15297521650791168,
"logits/rejected": 0.29175662994384766,
"logps/chosen": -365.80181884765625,
"logps/rejected": -353.0853271484375,
"loss": 0.2099,
"rewards/accuracies": 0.25,
"rewards/chosen": -0.0007080123177729547,
"rewards/margins": -5.8396861277287826e-05,
"rewards/rejected": -0.0006496154237538576,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.0638297872340427e-06,
"logits/chosen": 0.11968117952346802,
"logits/rejected": 0.2041483372449875,
"logps/chosen": -340.0993347167969,
"logps/rejected": -348.33087158203125,
"loss": 0.2094,
"rewards/accuracies": 0.26249998807907104,
"rewards/chosen": -0.000655159296002239,
"rewards/margins": -8.313418220495805e-05,
"rewards/rejected": -0.0005720251356251538,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.595744680851064e-06,
"logits/chosen": 0.2551038861274719,
"logits/rejected": 0.25183868408203125,
"logps/chosen": -383.1521301269531,
"logps/rejected": -364.0672302246094,
"loss": 0.2012,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.0005650260718539357,
"rewards/margins": 0.00010353984544053674,
"rewards/rejected": -0.0006685658590868115,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 2.1276595744680853e-06,
"logits/chosen": 0.1547292321920395,
"logits/rejected": 0.27106207609176636,
"logps/chosen": -401.61614990234375,
"logps/rejected": -385.8863220214844,
"loss": 0.2099,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.0006045111804269254,
"rewards/margins": -5.9384223277447745e-05,
"rewards/rejected": -0.0005451269680634141,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 2.6595744680851065e-06,
"logits/chosen": 0.23326897621154785,
"logits/rejected": 0.27433687448501587,
"logps/chosen": -441.8401794433594,
"logps/rejected": -432.41485595703125,
"loss": 0.2047,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.000930719543248415,
"rewards/margins": 0.000368706532754004,
"rewards/rejected": -0.0012994259595870972,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 3.191489361702128e-06,
"logits/chosen": 0.17064206302165985,
"logits/rejected": 0.3185887336730957,
"logps/chosen": -410.41473388671875,
"logps/rejected": -414.3666076660156,
"loss": 0.2182,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.001497046323493123,
"rewards/margins": 0.00016530933498870581,
"rewards/rejected": -0.0016623556148260832,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 3.723404255319149e-06,
"logits/chosen": 0.12393184751272202,
"logits/rejected": 0.2235107123851776,
"logps/chosen": -354.70562744140625,
"logps/rejected": -356.94586181640625,
"loss": 0.2086,
"rewards/accuracies": 0.5,
"rewards/chosen": -5.924403740209527e-05,
"rewards/margins": 0.0009310436435043812,
"rewards/rejected": -0.000990287633612752,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 4.255319148936171e-06,
"logits/chosen": 0.14353762567043304,
"logits/rejected": 0.2516772449016571,
"logps/chosen": -392.6264343261719,
"logps/rejected": -380.66351318359375,
"loss": 0.208,
"rewards/accuracies": 0.40625,
"rewards/chosen": 0.0002285484952153638,
"rewards/margins": 0.001034508110024035,
"rewards/rejected": -0.0008059596875682473,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 4.787234042553192e-06,
"logits/chosen": 0.24103212356567383,
"logits/rejected": 0.1776101142168045,
"logps/chosen": -393.3184509277344,
"logps/rejected": -416.2762145996094,
"loss": 0.1992,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.0003849788336083293,
"rewards/margins": 0.0017982361605390906,
"rewards/rejected": -0.0021832147613167763,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 4.999375059004058e-06,
"logits/chosen": 0.16443544626235962,
"logits/rejected": 0.17112873494625092,
"logps/chosen": -416.6537170410156,
"logps/rejected": -411.6963806152344,
"loss": 0.2064,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": 0.0005408526631072164,
"rewards/margins": 0.0026028361171483994,
"rewards/rejected": -0.002061983570456505,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 4.9955571065548795e-06,
"logits/chosen": 0.2384149730205536,
"logits/rejected": 0.1614537537097931,
"logps/chosen": -406.7789306640625,
"logps/rejected": -391.0703430175781,
"loss": 0.2008,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.012324010953307152,
"rewards/margins": 0.0033186424989253283,
"rewards/rejected": 0.009005369618535042,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 4.9882736864879e-06,
"logits/chosen": 0.08936997503042221,
"logits/rejected": 0.25732293725013733,
"logps/chosen": -397.0160827636719,
"logps/rejected": -431.9867248535156,
"loss": 0.2064,
"rewards/accuracies": 0.53125,
"rewards/chosen": 0.01773521490395069,
"rewards/margins": 0.007638473063707352,
"rewards/rejected": 0.01009674184024334,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 4.977534912960124e-06,
"logits/chosen": 0.14923642575740814,
"logits/rejected": 0.27579236030578613,
"logps/chosen": -407.21258544921875,
"logps/rejected": -401.8697204589844,
"loss": 0.2048,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.005354008637368679,
"rewards/margins": 0.00832393579185009,
"rewards/rejected": -0.0029699269216507673,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 4.963355698422092e-06,
"logits/chosen": 0.13965365290641785,
"logits/rejected": 0.20428553223609924,
"logps/chosen": -396.0818786621094,
"logps/rejected": -384.4440612792969,
"loss": 0.2016,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.011929613538086414,
"rewards/margins": 0.00782632827758789,
"rewards/rejected": -0.01975594088435173,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 4.945755732909625e-06,
"logits/chosen": 0.0017524458235129714,
"logits/rejected": 0.048104483634233475,
"logps/chosen": -403.7103576660156,
"logps/rejected": -421.3060607910156,
"loss": 0.1918,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.020950669422745705,
"rewards/margins": 0.01481578964740038,
"rewards/rejected": -0.03576646000146866,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 4.924759456701167e-06,
"logits/chosen": 0.050279758870601654,
"logits/rejected": 0.12556883692741394,
"logps/chosen": -467.9580993652344,
"logps/rejected": -487.71844482421875,
"loss": 0.1868,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.044767118990421295,
"rewards/margins": 0.03162100166082382,
"rewards/rejected": -0.07638812065124512,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 4.900396026378671e-06,
"logits/chosen": -0.020991306751966476,
"logits/rejected": 0.15817420184612274,
"logps/chosen": -522.8843383789062,
"logps/rejected": -518.1360473632812,
"loss": 0.2105,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.08218502998352051,
"rewards/margins": 0.02223752811551094,
"rewards/rejected": -0.10442256927490234,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 4.872699274339169e-06,
"logits/chosen": 0.08929436653852463,
"logits/rejected": 0.09290768206119537,
"logps/chosen": -470.04296875,
"logps/rejected": -501.46661376953125,
"loss": 0.1886,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.08902844041585922,
"rewards/margins": 0.03190689533948898,
"rewards/rejected": -0.12093535810709,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 4.8417076618132434e-06,
"logits/chosen": 0.017135417088866234,
"logits/rejected": 0.09486501663923264,
"logps/chosen": -600.1754760742188,
"logps/rejected": -609.9652709960938,
"loss": 0.1887,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.11600615829229355,
"rewards/margins": 0.041202057152986526,
"rewards/rejected": -0.15720821917057037,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 4.807464225455655e-06,
"logits/chosen": -0.032647065818309784,
"logits/rejected": 0.09240031987428665,
"logps/chosen": -527.2655029296875,
"logps/rejected": -595.9906005859375,
"loss": 0.1949,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1231132298707962,
"rewards/margins": 0.051538724452257156,
"rewards/rejected": -0.17465195059776306,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 4.770016517582283e-06,
"logits/chosen": 0.03914088383316994,
"logits/rejected": 0.028707262128591537,
"logps/chosen": -524.7379760742188,
"logps/rejected": -570.7955322265625,
"loss": 0.1902,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.12313251197338104,
"rewards/margins": 0.04237721115350723,
"rewards/rejected": -0.16550973057746887,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 4.7294165401363616e-06,
"logits/chosen": 0.010909264907240868,
"logits/rejected": -0.024190250784158707,
"logps/chosen": -549.97607421875,
"logps/rejected": -590.9778442382812,
"loss": 0.1843,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.14806947112083435,
"rewards/margins": 0.04638643562793732,
"rewards/rejected": -0.19445592164993286,
"step": 220
},
{
"epoch": 0.25,
"learning_rate": 4.68572067247573e-06,
"logits/chosen": -0.018162641674280167,
"logits/rejected": 0.000972352921962738,
"logps/chosen": -549.0392456054688,
"logps/rejected": -598.3811645507812,
"loss": 0.2025,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.1491001546382904,
"rewards/margins": 0.04847537726163864,
"rewards/rejected": -0.19757553935050964,
"step": 230
},
{
"epoch": 0.26,
"learning_rate": 4.638989593081364e-06,
"logits/chosen": -0.12062356621026993,
"logits/rejected": 0.04868536815047264,
"logps/chosen": -484.58685302734375,
"logps/rejected": -516.2865600585938,
"loss": 0.1908,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.1070113405585289,
"rewards/margins": 0.028431424871087074,
"rewards/rejected": -0.13544276356697083,
"step": 240
},
{
"epoch": 0.27,
"learning_rate": 4.5892881952959015e-06,
"logits/chosen": -0.041638366878032684,
"logits/rejected": 0.0221172496676445,
"logps/chosen": -507.4730529785156,
"logps/rejected": -527.9345703125,
"loss": 0.2052,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.09331385791301727,
"rewards/margins": 0.03461749479174614,
"rewards/rejected": -0.1279313564300537,
"step": 250
},
{
"epoch": 0.28,
"learning_rate": 4.536685497209182e-06,
"logits/chosen": -0.05273251608014107,
"logits/rejected": -0.022044766694307327,
"logps/chosen": -538.548583984375,
"logps/rejected": -590.6500854492188,
"loss": 0.182,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.10730306804180145,
"rewards/margins": 0.039316385984420776,
"rewards/rejected": -0.14661946892738342,
"step": 260
},
{
"epoch": 0.29,
"learning_rate": 4.481254545815943e-06,
"logits/chosen": -0.12279339134693146,
"logits/rejected": -0.079288050532341,
"logps/chosen": -560.711181640625,
"logps/rejected": -635.7985229492188,
"loss": 0.1845,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.12682856619358063,
"rewards/margins": 0.0557611808180809,
"rewards/rejected": -0.18258973956108093,
"step": 270
},
{
"epoch": 0.3,
"learning_rate": 4.42307231557875e-06,
"logits/chosen": -0.12978403270244598,
"logits/rejected": -0.05718718096613884,
"logps/chosen": -534.7046508789062,
"logps/rejected": -573.9546508789062,
"loss": 0.1872,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.12655052542686462,
"rewards/margins": 0.05336705967783928,
"rewards/rejected": -0.1799176186323166,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 4.3622196015370305e-06,
"logits/chosen": -0.13656684756278992,
"logits/rejected": -0.07923261821269989,
"logps/chosen": -537.6559448242188,
"logps/rejected": -627.1619873046875,
"loss": 0.1952,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.13470812141895294,
"rewards/margins": 0.05948293209075928,
"rewards/rejected": -0.1941910684108734,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 4.298780907110648e-06,
"logits/chosen": -0.11429516226053238,
"logits/rejected": -0.12869636714458466,
"logps/chosen": -543.2788696289062,
"logps/rejected": -558.6578369140625,
"loss": 0.1847,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.12002478539943695,
"rewards/margins": 0.050586897879838943,
"rewards/rejected": -0.1706116795539856,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 4.23284432675381e-06,
"logits/chosen": -0.18651030957698822,
"logits/rejected": -0.052459727972745895,
"logps/chosen": -461.1847229003906,
"logps/rejected": -506.2823181152344,
"loss": 0.1928,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.08158674091100693,
"rewards/margins": 0.045484792441129684,
"rewards/rejected": -0.1270715296268463,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 4.164501423622277e-06,
"logits/chosen": -0.1330818384885788,
"logits/rejected": -0.09265539795160294,
"logps/chosen": -483.45599365234375,
"logps/rejected": -515.7194213867188,
"loss": 0.1797,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.10115663707256317,
"rewards/margins": 0.04430658370256424,
"rewards/rejected": -0.14546321332454681,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 4.0938471024237355e-06,
"logits/chosen": -0.11196194589138031,
"logits/rejected": -0.09686783701181412,
"logps/chosen": -557.8707885742188,
"logps/rejected": -571.39794921875,
"loss": 0.1958,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.12075225263834,
"rewards/margins": 0.04514995589852333,
"rewards/rejected": -0.16590221226215363,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 4.020979477627907e-06,
"logits/chosen": -0.08174435794353485,
"logits/rejected": -0.06923134624958038,
"logps/chosen": -531.8570556640625,
"logps/rejected": -605.0074462890625,
"loss": 0.1889,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.11230266094207764,
"rewards/margins": 0.06913084536790848,
"rewards/rejected": -0.18143349885940552,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 3.9459997372194105e-06,
"logits/chosen": -0.16061343252658844,
"logits/rejected": -0.027816006913781166,
"logps/chosen": -534.1594848632812,
"logps/rejected": -573.8477783203125,
"loss": 0.1926,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.0970136970281601,
"rewards/margins": 0.045039448887109756,
"rewards/rejected": -0.14205312728881836,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 3.869012002182573e-06,
"logits/chosen": -0.24527081847190857,
"logits/rejected": -0.1484527587890625,
"logps/chosen": -544.539306640625,
"logps/rejected": -564.9341430664062,
"loss": 0.1859,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.09417351335287094,
"rewards/margins": 0.056557249277830124,
"rewards/rejected": -0.15073075890541077,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 3.7901231819133104e-06,
"logits/chosen": -0.1722763478755951,
"logits/rejected": -0.17130622267723083,
"logps/chosen": -496.27313232421875,
"logps/rejected": -557.1398315429688,
"loss": 0.1877,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.0965786799788475,
"rewards/margins": 0.057069409638643265,
"rewards/rejected": -0.15364809334278107,
"step": 370
},
{
"epoch": 0.41,
"learning_rate": 3.709442825758875e-06,
"logits/chosen": -0.286950945854187,
"logits/rejected": -0.12660877406597137,
"logps/chosen": -487.8304138183594,
"logps/rejected": -506.80267333984375,
"loss": 0.1784,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.08378596603870392,
"rewards/margins": 0.04325443506240845,
"rewards/rejected": -0.12704041600227356,
"step": 380
},
{
"epoch": 0.42,
"learning_rate": 3.6270829708916113e-06,
"logits/chosen": -0.2721463441848755,
"logits/rejected": -0.19791728258132935,
"logps/chosen": -525.049560546875,
"logps/rejected": -564.6629028320312,
"loss": 0.1924,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.1035178154706955,
"rewards/margins": 0.03107512556016445,
"rewards/rejected": -0.1345929503440857,
"step": 390
},
{
"epoch": 0.43,
"learning_rate": 3.543157986727991e-06,
"logits/chosen": -0.17590856552124023,
"logits/rejected": -0.16738948225975037,
"logps/chosen": -520.5001831054688,
"logps/rejected": -564.5961303710938,
"loss": 0.1854,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09734812378883362,
"rewards/margins": 0.0474289208650589,
"rewards/rejected": -0.14477702975273132,
"step": 400
},
{
"epoch": 0.44,
"learning_rate": 3.4577844161089614e-06,
"logits/chosen": -0.17745746672153473,
"logits/rejected": -0.18353696167469025,
"logps/chosen": -508.34637451171875,
"logps/rejected": -577.1897583007812,
"loss": 0.1804,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.0865376815199852,
"rewards/margins": 0.049374908208847046,
"rewards/rejected": -0.13591258227825165,
"step": 410
},
{
"epoch": 0.45,
"learning_rate": 3.3710808134621577e-06,
"logits/chosen": -0.17098669707775116,
"logits/rejected": -0.13703958690166473,
"logps/chosen": -539.40087890625,
"logps/rejected": -593.8014526367188,
"loss": 0.1851,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09274087101221085,
"rewards/margins": 0.05765017122030258,
"rewards/rejected": -0.15039105713367462,
"step": 420
},
{
"epoch": 0.46,
"learning_rate": 3.2831675801707126e-06,
"logits/chosen": -0.20213007926940918,
"logits/rejected": -0.20745894312858582,
"logps/chosen": -453.65478515625,
"logps/rejected": -497.0008850097656,
"loss": 0.1824,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08567062765359879,
"rewards/margins": 0.04529280215501785,
"rewards/rejected": -0.13096341490745544,
"step": 430
},
{
"epoch": 0.47,
"learning_rate": 3.194166797377289e-06,
"logits/chosen": -0.21449732780456543,
"logits/rejected": -0.19523288309574127,
"logps/chosen": -547.9935302734375,
"logps/rejected": -572.2437744140625,
"loss": 0.1901,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.09648506343364716,
"rewards/margins": 0.03453432396054268,
"rewards/rejected": -0.13101938366889954,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 3.104202056455501e-06,
"logits/chosen": -0.22678379714488983,
"logits/rejected": -0.18668214976787567,
"logps/chosen": -519.3316650390625,
"logps/rejected": -561.0910034179688,
"loss": 0.1896,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09367823600769043,
"rewards/margins": 0.058413583785295486,
"rewards/rejected": -0.15209180116653442,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 3.013398287384144e-06,
"logits/chosen": -0.20922398567199707,
"logits/rejected": -0.15190599858760834,
"logps/chosen": -554.7764892578125,
"logps/rejected": -584.9015502929688,
"loss": 0.1777,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.11632993072271347,
"rewards/margins": 0.048789944499731064,
"rewards/rejected": -0.16511985659599304,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 2.9218815852625717e-06,
"logits/chosen": -0.2042142152786255,
"logits/rejected": -0.19644713401794434,
"logps/chosen": -522.6699829101562,
"logps/rejected": -589.4488525390625,
"loss": 0.189,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.10839296877384186,
"rewards/margins": 0.06792866438627243,
"rewards/rejected": -0.17632164061069489,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 2.829779035208113e-06,
"logits/chosen": -0.29581087827682495,
"logits/rejected": -0.17288121581077576,
"logps/chosen": -492.73297119140625,
"logps/rejected": -565.6483764648438,
"loss": 0.1819,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.10467328131198883,
"rewards/margins": 0.0681912824511528,
"rewards/rejected": -0.17286454141139984,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 2.737218535878705e-06,
"logits/chosen": -0.1768864393234253,
"logits/rejected": -0.19145308434963226,
"logps/chosen": -481.3701171875,
"logps/rejected": -552.0697021484375,
"loss": 0.1861,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.10367073863744736,
"rewards/margins": 0.0636112317442894,
"rewards/rejected": -0.16728197038173676,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 2.64432862186579e-06,
"logits/chosen": -0.25040799379348755,
"logits/rejected": -0.2705633044242859,
"logps/chosen": -473.15777587890625,
"logps/rejected": -521.9263916015625,
"loss": 0.1845,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.10329292714595795,
"rewards/margins": 0.04386230558156967,
"rewards/rejected": -0.1471552550792694,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 2.551238285204126e-06,
"logits/chosen": -0.22839005291461945,
"logits/rejected": -0.18522998690605164,
"logps/chosen": -562.2581176757812,
"logps/rejected": -602.7523193359375,
"loss": 0.1852,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.11718226969242096,
"rewards/margins": 0.054385870695114136,
"rewards/rejected": -0.1715681403875351,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 2.4580767962463688e-06,
"logits/chosen": -0.28231528401374817,
"logits/rejected": -0.1746218502521515,
"logps/chosen": -508.0462951660156,
"logps/rejected": -521.466552734375,
"loss": 0.1887,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.0966549962759018,
"rewards/margins": 0.04605900123715401,
"rewards/rejected": -0.1427139937877655,
"step": 520
},
{
"epoch": 0.57,
"learning_rate": 2.3649735241511546e-06,
"logits/chosen": -0.14483687281608582,
"logits/rejected": -0.18159925937652588,
"logps/chosen": -519.6622314453125,
"logps/rejected": -554.4771728515625,
"loss": 0.1881,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.10754968971014023,
"rewards/margins": 0.043608419597148895,
"rewards/rejected": -0.15115809440612793,
"step": 530
},
{
"epoch": 0.58,
"learning_rate": 2.2720577572339914e-06,
"logits/chosen": -0.27724790573120117,
"logits/rejected": -0.18303519487380981,
"logps/chosen": -502.09747314453125,
"logps/rejected": -529.7732543945312,
"loss": 0.1902,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.0977000966668129,
"rewards/margins": 0.04301925003528595,
"rewards/rejected": -0.14071933925151825,
"step": 540
},
{
"epoch": 0.59,
"learning_rate": 2.1794585234303995e-06,
"logits/chosen": -0.2885403633117676,
"logits/rejected": -0.16289584338665009,
"logps/chosen": -519.3963012695312,
"logps/rejected": -553.4032592773438,
"loss": 0.1871,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.10111876577138901,
"rewards/margins": 0.040404774248600006,
"rewards/rejected": -0.1415235549211502,
"step": 550
},
{
"epoch": 0.6,
"learning_rate": 2.0873044111206407e-06,
"logits/chosen": -0.23527821898460388,
"logits/rejected": -0.2247372567653656,
"logps/chosen": -481.41552734375,
"logps/rejected": -552.4132080078125,
"loss": 0.2026,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.10331599414348602,
"rewards/margins": 0.040959432721138,
"rewards/rejected": -0.14427544176578522,
"step": 560
},
{
"epoch": 0.61,
"learning_rate": 1.9957233905648293e-06,
"logits/chosen": -0.28348255157470703,
"logits/rejected": -0.26194503903388977,
"logps/chosen": -467.77740478515625,
"logps/rejected": -507.6676330566406,
"loss": 0.1887,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.10089198499917984,
"rewards/margins": 0.0405060276389122,
"rewards/rejected": -0.14139802753925323,
"step": 570
},
{
"epoch": 0.62,
"learning_rate": 1.904842636196402e-06,
"logits/chosen": -0.22403912246227264,
"logits/rejected": -0.19076624512672424,
"logps/chosen": -500.50982666015625,
"logps/rejected": -544.9527587890625,
"loss": 0.1793,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.09000807255506516,
"rewards/margins": 0.05757290869951248,
"rewards/rejected": -0.14758098125457764,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 1.814788350020726e-06,
"logits/chosen": -0.25425633788108826,
"logits/rejected": -0.13311608135700226,
"logps/chosen": -523.557373046875,
"logps/rejected": -576.8714599609375,
"loss": 0.1667,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.09282426536083221,
"rewards/margins": 0.06447537243366241,
"rewards/rejected": -0.15729963779449463,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 1.725685586364051e-06,
"logits/chosen": -0.25314217805862427,
"logits/rejected": -0.2236749678850174,
"logps/chosen": -442.9320373535156,
"logps/rejected": -521.5167236328125,
"loss": 0.1816,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.08528304100036621,
"rewards/margins": 0.05453087016940117,
"rewards/rejected": -0.13981391489505768,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 1.6376580782162172e-06,
"logits/chosen": -0.2589682936668396,
"logits/rejected": -0.2686694264411926,
"logps/chosen": -501.1578674316406,
"logps/rejected": -545.9219970703125,
"loss": 0.1949,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.09304684400558472,
"rewards/margins": 0.06486930698156357,
"rewards/rejected": -0.1579161435365677,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 1.550828065408227e-06,
"logits/chosen": -0.15998974442481995,
"logits/rejected": -0.26897841691970825,
"logps/chosen": -483.9093322753906,
"logps/rejected": -574.64990234375,
"loss": 0.1784,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.0897846445441246,
"rewards/margins": 0.059298910200595856,
"rewards/rejected": -0.14908355474472046,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 1.4653161248633053e-06,
"logits/chosen": -0.30697402358055115,
"logits/rejected": -0.2922336459159851,
"logps/chosen": -446.08642578125,
"logps/rejected": -485.38311767578125,
"loss": 0.1835,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.07366035133600235,
"rewards/margins": 0.05683215707540512,
"rewards/rejected": -0.13049249351024628,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 1.381241003157162e-06,
"logits/chosen": -0.27867692708969116,
"logits/rejected": -0.23723456263542175,
"logps/chosen": -472.146240234375,
"logps/rejected": -522.3912963867188,
"loss": 0.1886,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.08290112018585205,
"rewards/margins": 0.06648631393909454,
"rewards/rejected": -0.1493874490261078,
"step": 640
},
{
"epoch": 0.69,
"learning_rate": 1.298719451619979e-06,
"logits/chosen": -0.27338069677352905,
"logits/rejected": -0.0849432423710823,
"logps/chosen": -505.39404296875,
"logps/rejected": -564.4884643554688,
"loss": 0.1769,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.0819827988743782,
"rewards/margins": 0.07497727125883102,
"rewards/rejected": -0.15696007013320923,
"step": 650
},
{
"epoch": 0.7,
"learning_rate": 1.2178660642091036e-06,
"logits/chosen": -0.31306496262550354,
"logits/rejected": -0.15988986194133759,
"logps/chosen": -536.2052001953125,
"logps/rejected": -565.6595458984375,
"loss": 0.1917,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.10776883363723755,
"rewards/margins": 0.04764767736196518,
"rewards/rejected": -0.15541651844978333,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 1.1387931183775821e-06,
"logits/chosen": -0.1312873661518097,
"logits/rejected": -0.1946374773979187,
"logps/chosen": -489.32037353515625,
"logps/rejected": -532.713623046875,
"loss": 0.1923,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.10007087886333466,
"rewards/margins": 0.0466584786772728,
"rewards/rejected": -0.14672937989234924,
"step": 670
},
{
"epoch": 0.73,
"learning_rate": 1.061610419159532e-06,
"logits/chosen": -0.18406830728054047,
"logits/rejected": -0.18264801800251007,
"logps/chosen": -455.0455627441406,
"logps/rejected": -483.98748779296875,
"loss": 0.187,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.08491896092891693,
"rewards/margins": 0.042202599346637726,
"rewards/rejected": -0.12712153792381287,
"step": 680
},
{
"epoch": 0.74,
"learning_rate": 9.864251466888364e-07,
"logits/chosen": -0.2591504454612732,
"logits/rejected": -0.1554795801639557,
"logps/chosen": -488.55328369140625,
"logps/rejected": -532.9073486328125,
"loss": 0.1807,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09263734519481659,
"rewards/margins": 0.04989578202366829,
"rewards/rejected": -0.1425331085920334,
"step": 690
},
{
"epoch": 0.75,
"learning_rate": 9.133417073629288e-07,
"logits/chosen": -0.28501999378204346,
"logits/rejected": -0.23185932636260986,
"logps/chosen": -485.5430603027344,
"logps/rejected": -541.1561889648438,
"loss": 0.1604,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09013941138982773,
"rewards/margins": 0.051737189292907715,
"rewards/rejected": -0.14187659323215485,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 8.424615888583332e-07,
"logits/chosen": -0.25448185205459595,
"logits/rejected": -0.13845598697662354,
"logps/chosen": -504.44085693359375,
"logps/rejected": -557.7171630859375,
"loss": 0.1875,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09454745799303055,
"rewards/margins": 0.0519348680973053,
"rewards/rejected": -0.14648231863975525,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 7.738832191993092e-07,
"logits/chosen": -0.20559599995613098,
"logits/rejected": -0.1910923421382904,
"logps/chosen": -518.5410766601562,
"logps/rejected": -559.4114379882812,
"loss": 0.1781,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09824297577142715,
"rewards/margins": 0.051730893552303314,
"rewards/rejected": -0.14997386932373047,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 7.077018300752917e-07,
"logits/chosen": -0.20453593134880066,
"logits/rejected": -0.22350621223449707,
"logps/chosen": -517.8855590820312,
"logps/rejected": -554.9312744140625,
"loss": 0.173,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.0931503102183342,
"rewards/margins": 0.059033893048763275,
"rewards/rejected": -0.15218421816825867,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 6.440093245969342e-07,
"logits/chosen": -0.24915683269500732,
"logits/rejected": -0.14556431770324707,
"logps/chosen": -458.88006591796875,
"logps/rejected": -483.50872802734375,
"loss": 0.1791,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.0776047632098198,
"rewards/margins": 0.055497486144304276,
"rewards/rejected": -0.13310226798057556,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 5.828941496744075e-07,
"logits/chosen": -0.22538790106773376,
"logits/rejected": -0.16318151354789734,
"logps/chosen": -516.1492309570312,
"logps/rejected": -586.5814208984375,
"loss": 0.1829,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.09301020205020905,
"rewards/margins": 0.06428654491901398,
"rewards/rejected": -0.15729674696922302,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 5.244411731951671e-07,
"logits/chosen": -0.2806158661842346,
"logits/rejected": -0.066395103931427,
"logps/chosen": -479.8614807128906,
"logps/rejected": -503.1717224121094,
"loss": 0.1733,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.09291915595531464,
"rewards/margins": 0.04909076914191246,
"rewards/rejected": -0.1420099288225174,
"step": 760
},
{
"epoch": 0.82,
"learning_rate": 4.6873156617173594e-07,
"logits/chosen": -0.325612872838974,
"logits/rejected": -0.22937624156475067,
"logps/chosen": -526.00341796875,
"logps/rejected": -547.8123168945312,
"loss": 0.1746,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09224705398082733,
"rewards/margins": 0.054325349628925323,
"rewards/rejected": -0.14657239615917206,
"step": 770
},
{
"epoch": 0.83,
"learning_rate": 4.1584269002318653e-07,
"logits/chosen": -0.2622816264629364,
"logits/rejected": -0.15280409157276154,
"logps/chosen": -533.4638061523438,
"logps/rejected": -595.7601318359375,
"loss": 0.1815,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09688106924295425,
"rewards/margins": 0.05785801261663437,
"rewards/rejected": -0.15473909676074982,
"step": 780
},
{
"epoch": 0.84,
"learning_rate": 3.658479891468258e-07,
"logits/chosen": -0.18345573544502258,
"logits/rejected": -0.16111025214195251,
"logps/chosen": -487.77886962890625,
"logps/rejected": -538.51171875,
"loss": 0.184,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.08573255687952042,
"rewards/margins": 0.0516083724796772,
"rewards/rejected": -0.13734093308448792,
"step": 790
},
{
"epoch": 0.85,
"learning_rate": 3.18816888929272e-07,
"logits/chosen": -0.2816532254219055,
"logits/rejected": -0.15862765908241272,
"logps/chosen": -512.4444580078125,
"logps/rejected": -536.9342651367188,
"loss": 0.1844,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09420600533485413,
"rewards/margins": 0.05614888668060303,
"rewards/rejected": -0.15035490691661835,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 2.748146993385484e-07,
"logits/chosen": -0.21904349327087402,
"logits/rejected": -0.258176326751709,
"logps/chosen": -549.0933837890625,
"logps/rejected": -618.8422241210938,
"loss": 0.18,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10313485562801361,
"rewards/margins": 0.07112576067447662,
"rewards/rejected": -0.17426061630249023,
"step": 810
},
{
"epoch": 0.87,
"learning_rate": 2.3390252423108077e-07,
"logits/chosen": -0.3128640353679657,
"logits/rejected": -0.10742131620645523,
"logps/chosen": -525.4251098632812,
"logps/rejected": -553.0595092773438,
"loss": 0.1734,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.08687300980091095,
"rewards/margins": 0.06989692151546478,
"rewards/rejected": -0.15676993131637573,
"step": 820
},
{
"epoch": 0.89,
"learning_rate": 1.961371764995243e-07,
"logits/chosen": -0.1870919167995453,
"logits/rejected": -0.18910066783428192,
"logps/chosen": -514.9647827148438,
"logps/rejected": -548.2384643554688,
"loss": 0.182,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.0961606353521347,
"rewards/margins": 0.05352962762117386,
"rewards/rejected": -0.14969027042388916,
"step": 830
},
{
"epoch": 0.9,
"learning_rate": 1.61571099179261e-07,
"logits/chosen": -0.31641727685928345,
"logits/rejected": -0.21929411590099335,
"logps/chosen": -452.8780212402344,
"logps/rejected": -497.7808532714844,
"loss": 0.1887,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08406294882297516,
"rewards/margins": 0.06199796125292778,
"rewards/rejected": -0.14606089890003204,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 1.3025229262312367e-07,
"logits/chosen": -0.2824193239212036,
"logits/rejected": -0.2219020426273346,
"logps/chosen": -516.4136962890625,
"logps/rejected": -553.1406860351562,
"loss": 0.1761,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.09720293432474136,
"rewards/margins": 0.046975888311862946,
"rewards/rejected": -0.1441788375377655,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 1.0222424784546853e-07,
"logits/chosen": -0.09483526647090912,
"logits/rejected": -0.24578902125358582,
"logps/chosen": -496.60028076171875,
"logps/rejected": -558.0816650390625,
"loss": 0.175,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.0964212641119957,
"rewards/margins": 0.050969939678907394,
"rewards/rejected": -0.14739122986793518,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 7.752588612816553e-08,
"logits/chosen": -0.20135729014873505,
"logits/rejected": -0.25128036737442017,
"logps/chosen": -545.1806640625,
"logps/rejected": -568.894775390625,
"loss": 0.1815,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09586036950349808,
"rewards/margins": 0.04870045185089111,
"rewards/rejected": -0.1445608288049698,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 5.619150497236991e-08,
"logits/chosen": -0.2596682012081146,
"logits/rejected": -0.22516381740570068,
"logps/chosen": -498.10028076171875,
"logps/rejected": -564.5962524414062,
"loss": 0.1708,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.09488777071237564,
"rewards/margins": 0.06974340975284576,
"rewards/rejected": -0.164631187915802,
"step": 880
},
{
"epoch": 0.95,
"learning_rate": 3.825073047112743e-08,
"logits/chosen": -0.2944473624229431,
"logits/rejected": -0.2125546932220459,
"logps/chosen": -502.841552734375,
"logps/rejected": -546.7924194335938,
"loss": 0.1826,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08710362762212753,
"rewards/margins": 0.059937745332717896,
"rewards/rejected": -0.14704139530658722,
"step": 890
},
{
"epoch": 0.96,
"learning_rate": 2.372847616895685e-08,
"logits/chosen": -0.2098701447248459,
"logits/rejected": -0.2635635733604431,
"logps/chosen": -481.09478759765625,
"logps/rejected": -569.34326171875,
"loss": 0.1763,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09881605207920074,
"rewards/margins": 0.06372065842151642,
"rewards/rejected": -0.16253669559955597,
"step": 900
},
{
"epoch": 0.97,
"learning_rate": 1.264490846553279e-08,
"logits/chosen": -0.20950980484485626,
"logits/rejected": -0.2304944545030594,
"logps/chosen": -508.41241455078125,
"logps/rejected": -574.2259521484375,
"loss": 0.1727,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.0780680924654007,
"rewards/margins": 0.06727245450019836,
"rewards/rejected": -0.14534054696559906,
"step": 910
},
{
"epoch": 0.98,
"learning_rate": 5.015418611516165e-09,
"logits/chosen": -0.2829793095588684,
"logits/rejected": -0.2987596392631531,
"logps/chosen": -475.51031494140625,
"logps/rejected": -521.2598876953125,
"loss": 0.1865,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.09187036752700806,
"rewards/margins": 0.06315977871417999,
"rewards/rejected": -0.15503014624118805,
"step": 920
},
{
"epoch": 0.99,
"learning_rate": 8.506013354186993e-10,
"logits/chosen": -0.23163847625255585,
"logits/rejected": -0.24427077174186707,
"logps/chosen": -477.51507568359375,
"logps/rejected": -542.298828125,
"loss": 0.1848,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.0930924192070961,
"rewards/margins": 0.06024498865008354,
"rewards/rejected": -0.15333738923072815,
"step": 930
},
{
"epoch": 1.0,
"step": 937,
"total_flos": 0.0,
"train_loss": 0.1881250925163322,
"train_runtime": 7837.4153,
"train_samples_per_second": 3.828,
"train_steps_per_second": 0.12
}
],
"logging_steps": 10,
"max_steps": 937,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}