|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998451213216314, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.9648265838623047, |
|
"logits/rejected": -2.9711227416992188, |
|
"logps/chosen": -256.0919494628906, |
|
"logps/rejected": -234.60708618164062, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -3.049875497817993, |
|
"logits/rejected": -3.0188238620758057, |
|
"logps/chosen": -276.6912536621094, |
|
"logps/rejected": -202.39605712890625, |
|
"loss": 1.0001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0005764114903286099, |
|
"rewards/margins": -0.006484686397016048, |
|
"rewards/rejected": 0.007061097305268049, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.988577127456665, |
|
"logits/rejected": -2.9995627403259277, |
|
"logps/chosen": -312.2018127441406, |
|
"logps/rejected": -246.76266479492188, |
|
"loss": 1.0026, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.003506724489852786, |
|
"rewards/margins": -0.0012849611230194569, |
|
"rewards/rejected": 0.004791685380041599, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -3.063732624053955, |
|
"logits/rejected": -3.0357906818389893, |
|
"logps/chosen": -260.15679931640625, |
|
"logps/rejected": -224.3686065673828, |
|
"loss": 0.9974, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0021156296133995056, |
|
"rewards/margins": 0.0043937130831182, |
|
"rewards/rejected": -0.0022780844010412693, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -3.070286273956299, |
|
"logits/rejected": -3.0322961807250977, |
|
"logps/chosen": -299.5580139160156, |
|
"logps/rejected": -250.05123901367188, |
|
"loss": 0.9971, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00013011172995902598, |
|
"rewards/margins": -0.0006899217842146754, |
|
"rewards/rejected": 0.0008200337179005146, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -3.019392490386963, |
|
"logits/rejected": -3.024167060852051, |
|
"logps/chosen": -289.3621520996094, |
|
"logps/rejected": -224.00979614257812, |
|
"loss": 1.003, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0025630726013332605, |
|
"rewards/margins": -0.0055747563019394875, |
|
"rewards/rejected": 0.003011685097590089, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -3.0416665077209473, |
|
"logits/rejected": -3.020573616027832, |
|
"logps/chosen": -247.55380249023438, |
|
"logps/rejected": -226.4866943359375, |
|
"loss": 0.9977, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0035637759137898684, |
|
"rewards/margins": 0.0046446239575743675, |
|
"rewards/rejected": -0.0010808479273691773, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -3.0761704444885254, |
|
"logits/rejected": -3.058954954147339, |
|
"logps/chosen": -305.7156677246094, |
|
"logps/rejected": -251.75009155273438, |
|
"loss": 0.9938, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004508626647293568, |
|
"rewards/margins": 0.007502266205847263, |
|
"rewards/rejected": -0.0029936402570456266, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -3.049072742462158, |
|
"logits/rejected": -3.0234384536743164, |
|
"logps/chosen": -293.57989501953125, |
|
"logps/rejected": -240.2385711669922, |
|
"loss": 0.9985, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.002101506572216749, |
|
"rewards/margins": 0.002054845681414008, |
|
"rewards/rejected": 4.6660610678372905e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -3.079655885696411, |
|
"logits/rejected": -3.0430655479431152, |
|
"logps/chosen": -259.3849792480469, |
|
"logps/rejected": -216.38330078125, |
|
"loss": 0.9993, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.001981315901502967, |
|
"rewards/margins": 0.003211658913642168, |
|
"rewards/rejected": -0.0012303430121392012, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -3.0111169815063477, |
|
"logits/rejected": -3.006265640258789, |
|
"logps/chosen": -267.73577880859375, |
|
"logps/rejected": -222.9344482421875, |
|
"loss": 1.0009, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005559581331908703, |
|
"rewards/margins": 0.004569889511913061, |
|
"rewards/rejected": 0.000989692285656929, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -3.0228209495544434, |
|
"logits/rejected": -2.9778640270233154, |
|
"logps/chosen": -269.3376770019531, |
|
"logps/rejected": -230.95877075195312, |
|
"loss": 0.9971, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0013723246520385146, |
|
"rewards/margins": 0.0024911228101700544, |
|
"rewards/rejected": -0.0011187975760549307, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -3.0604119300842285, |
|
"logits/rejected": -3.0274159908294678, |
|
"logps/chosen": -310.97454833984375, |
|
"logps/rejected": -232.7030029296875, |
|
"loss": 0.9997, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0033910819329321384, |
|
"rewards/margins": 0.0011182299349457026, |
|
"rewards/rejected": 0.002272851997986436, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -3.1223320960998535, |
|
"logits/rejected": -3.0860095024108887, |
|
"logps/chosen": -286.6527099609375, |
|
"logps/rejected": -241.933349609375, |
|
"loss": 0.996, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0031617667991667986, |
|
"rewards/margins": 0.005714719649404287, |
|
"rewards/rejected": -0.0025529528502374887, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -3.045279026031494, |
|
"logits/rejected": -3.040912628173828, |
|
"logps/chosen": -292.4465637207031, |
|
"logps/rejected": -234.72903442382812, |
|
"loss": 0.9968, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0010984055697917938, |
|
"rewards/margins": 0.002134153386577964, |
|
"rewards/rejected": -0.00103574781678617, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -3.0267093181610107, |
|
"logits/rejected": -3.0170674324035645, |
|
"logps/chosen": -275.6455993652344, |
|
"logps/rejected": -256.4563903808594, |
|
"loss": 0.9998, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0011621230514720082, |
|
"rewards/margins": -2.7875230443896726e-05, |
|
"rewards/rejected": -0.001134247868321836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -3.08597731590271, |
|
"logits/rejected": -3.0747408866882324, |
|
"logps/chosen": -278.91754150390625, |
|
"logps/rejected": -218.88558959960938, |
|
"loss": 0.9949, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.004570486024022102, |
|
"rewards/margins": 0.01147634070366621, |
|
"rewards/rejected": -0.006905855145305395, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -3.0499391555786133, |
|
"logits/rejected": -3.036341905593872, |
|
"logps/chosen": -292.2102966308594, |
|
"logps/rejected": -236.95703125, |
|
"loss": 0.9925, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0019660559482872486, |
|
"rewards/margins": 0.009962075389921665, |
|
"rewards/rejected": -0.00799601897597313, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -3.091757297515869, |
|
"logits/rejected": -3.080824375152588, |
|
"logps/chosen": -248.6970672607422, |
|
"logps/rejected": -211.7375946044922, |
|
"loss": 0.993, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0008403388783335686, |
|
"rewards/margins": 0.004896977450698614, |
|
"rewards/rejected": -0.004056639038026333, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -3.0578689575195312, |
|
"logits/rejected": -3.033844232559204, |
|
"logps/chosen": -246.07040405273438, |
|
"logps/rejected": -200.9595184326172, |
|
"loss": 0.9978, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0035698451101779938, |
|
"rewards/margins": 0.012628579512238503, |
|
"rewards/rejected": -0.009058734402060509, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -3.08526349067688, |
|
"logits/rejected": -3.063560724258423, |
|
"logps/chosen": -252.265869140625, |
|
"logps/rejected": -192.21331787109375, |
|
"loss": 0.99, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009008857421576977, |
|
"rewards/margins": 0.016440508887171745, |
|
"rewards/rejected": -0.007431652396917343, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.9606690406799316, |
|
"logits/rejected": -2.906953811645508, |
|
"logps/chosen": -292.0260925292969, |
|
"logps/rejected": -215.34036254882812, |
|
"loss": 0.9855, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0046757906675338745, |
|
"rewards/margins": 0.013047484681010246, |
|
"rewards/rejected": -0.008371694944798946, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.990668773651123, |
|
"logits/rejected": -2.986696481704712, |
|
"logps/chosen": -260.21832275390625, |
|
"logps/rejected": -237.1192626953125, |
|
"loss": 0.9839, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.006101504433900118, |
|
"rewards/margins": 0.011856775730848312, |
|
"rewards/rejected": -0.005755270831286907, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -3.0254712104797363, |
|
"logits/rejected": -3.0170722007751465, |
|
"logps/chosen": -263.41680908203125, |
|
"logps/rejected": -240.377685546875, |
|
"loss": 0.9888, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.006732765585184097, |
|
"rewards/margins": 0.014450904913246632, |
|
"rewards/rejected": -0.007718136068433523, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -3.078042507171631, |
|
"logits/rejected": -3.0471174716949463, |
|
"logps/chosen": -299.16107177734375, |
|
"logps/rejected": -214.18759155273438, |
|
"loss": 0.9867, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.006704004947096109, |
|
"rewards/margins": 0.01422835886478424, |
|
"rewards/rejected": -0.007524352520704269, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -3.026909589767456, |
|
"logits/rejected": -3.018611431121826, |
|
"logps/chosen": -264.2486572265625, |
|
"logps/rejected": -233.31826782226562, |
|
"loss": 0.9832, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.011163066141307354, |
|
"rewards/margins": 0.019244546070694923, |
|
"rewards/rejected": -0.008081478998064995, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -3.0170772075653076, |
|
"logits/rejected": -3.0285823345184326, |
|
"logps/chosen": -263.9449157714844, |
|
"logps/rejected": -219.4688262939453, |
|
"loss": 0.9828, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.010579807683825493, |
|
"rewards/margins": 0.024952661246061325, |
|
"rewards/rejected": -0.014372853562235832, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -3.0661990642547607, |
|
"logits/rejected": -3.0676910877227783, |
|
"logps/chosen": -274.22003173828125, |
|
"logps/rejected": -229.6044158935547, |
|
"loss": 0.9799, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.004864403046667576, |
|
"rewards/margins": 0.016584355384111404, |
|
"rewards/rejected": -0.011719951406121254, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -3.031026840209961, |
|
"logits/rejected": -3.0113377571105957, |
|
"logps/chosen": -283.8157653808594, |
|
"logps/rejected": -235.0233612060547, |
|
"loss": 0.9754, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.013933306559920311, |
|
"rewards/margins": 0.027073601260781288, |
|
"rewards/rejected": -0.013140290975570679, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -3.094398021697998, |
|
"logits/rejected": -3.0440070629119873, |
|
"logps/chosen": -270.22052001953125, |
|
"logps/rejected": -223.65493774414062, |
|
"loss": 0.9774, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.008421173319220543, |
|
"rewards/margins": 0.019475247710943222, |
|
"rewards/rejected": -0.01105407439172268, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -3.023087978363037, |
|
"logits/rejected": -2.9992034435272217, |
|
"logps/chosen": -244.30337524414062, |
|
"logps/rejected": -218.7770538330078, |
|
"loss": 0.9735, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.008720096200704575, |
|
"rewards/margins": 0.029811996966600418, |
|
"rewards/rejected": -0.021091898903250694, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -3.0621352195739746, |
|
"logits/rejected": -3.0481069087982178, |
|
"logps/chosen": -299.58758544921875, |
|
"logps/rejected": -257.4301452636719, |
|
"loss": 0.9719, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.011654629372060299, |
|
"rewards/margins": 0.02595471777021885, |
|
"rewards/rejected": -0.01430008839815855, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -3.026646137237549, |
|
"logits/rejected": -3.0066604614257812, |
|
"logps/chosen": -242.5664825439453, |
|
"logps/rejected": -187.6553497314453, |
|
"loss": 0.9641, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.014026440680027008, |
|
"rewards/margins": 0.0420592799782753, |
|
"rewards/rejected": -0.02803283929824829, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -3.0689878463745117, |
|
"logits/rejected": -3.052264928817749, |
|
"logps/chosen": -303.94036865234375, |
|
"logps/rejected": -238.4488067626953, |
|
"loss": 0.9634, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02398153766989708, |
|
"rewards/margins": 0.042572326958179474, |
|
"rewards/rejected": -0.018590793013572693, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -2.995407819747925, |
|
"logits/rejected": -2.9780545234680176, |
|
"logps/chosen": -235.09848022460938, |
|
"logps/rejected": -236.380859375, |
|
"loss": 0.9616, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.006325787398964167, |
|
"rewards/margins": 0.03254387527704239, |
|
"rewards/rejected": -0.02621809020638466, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -3.066584348678589, |
|
"logits/rejected": -3.03863263130188, |
|
"logps/chosen": -306.0690612792969, |
|
"logps/rejected": -245.27407836914062, |
|
"loss": 0.9508, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02622169815003872, |
|
"rewards/margins": 0.059129487723112106, |
|
"rewards/rejected": -0.032907791435718536, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -3.0108275413513184, |
|
"logits/rejected": -3.008779525756836, |
|
"logps/chosen": -294.0123596191406, |
|
"logps/rejected": -248.9111785888672, |
|
"loss": 0.9496, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.008144224062561989, |
|
"rewards/margins": 0.045391060411930084, |
|
"rewards/rejected": -0.037246834486722946, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -3.048269510269165, |
|
"logits/rejected": -3.011050224304199, |
|
"logps/chosen": -303.9602355957031, |
|
"logps/rejected": -269.4437561035156, |
|
"loss": 0.9473, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.021487019956111908, |
|
"rewards/margins": 0.06678290665149689, |
|
"rewards/rejected": -0.04529587924480438, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -3.0777668952941895, |
|
"logits/rejected": -3.068040370941162, |
|
"logps/chosen": -282.12713623046875, |
|
"logps/rejected": -236.7052459716797, |
|
"loss": 0.9515, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02360449731349945, |
|
"rewards/margins": 0.05431375652551651, |
|
"rewards/rejected": -0.03070926107466221, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.9710302352905273, |
|
"logits/rejected": -2.983682155609131, |
|
"logps/chosen": -272.12713623046875, |
|
"logps/rejected": -235.8425750732422, |
|
"loss": 0.9475, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.013235519640147686, |
|
"rewards/margins": 0.05444386601448059, |
|
"rewards/rejected": -0.04120834544301033, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -3.0254111289978027, |
|
"logits/rejected": -3.006087303161621, |
|
"logps/chosen": -254.69107055664062, |
|
"logps/rejected": -210.39474487304688, |
|
"loss": 0.9402, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.022726301103830338, |
|
"rewards/margins": 0.06853805482387543, |
|
"rewards/rejected": -0.04581175372004509, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -3.0280232429504395, |
|
"logits/rejected": -2.9936630725860596, |
|
"logps/chosen": -261.80731201171875, |
|
"logps/rejected": -251.14950561523438, |
|
"loss": 0.9398, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.014309520833194256, |
|
"rewards/margins": 0.050246305763721466, |
|
"rewards/rejected": -0.03593678027391434, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -3.0422613620758057, |
|
"logits/rejected": -3.004459857940674, |
|
"logps/chosen": -245.8754425048828, |
|
"logps/rejected": -202.38157653808594, |
|
"loss": 0.9391, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.005309578962624073, |
|
"rewards/margins": 0.0503067672252655, |
|
"rewards/rejected": -0.0449971929192543, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -3.061112880706787, |
|
"logits/rejected": -3.045253038406372, |
|
"logps/chosen": -257.9686584472656, |
|
"logps/rejected": -239.047119140625, |
|
"loss": 0.9323, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.012545446865260601, |
|
"rewards/margins": 0.07196511328220367, |
|
"rewards/rejected": -0.0594196543097496, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -3.0016167163848877, |
|
"logits/rejected": -2.9664511680603027, |
|
"logps/chosen": -278.2596740722656, |
|
"logps/rejected": -246.8672637939453, |
|
"loss": 0.9194, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.00037018657894805074, |
|
"rewards/margins": 0.0750364139676094, |
|
"rewards/rejected": -0.07466623187065125, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -3.0514495372772217, |
|
"logits/rejected": -3.0419204235076904, |
|
"logps/chosen": -272.3708801269531, |
|
"logps/rejected": -227.4208221435547, |
|
"loss": 0.9356, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.01730353757739067, |
|
"rewards/margins": 0.059229202568531036, |
|
"rewards/rejected": -0.04192566126585007, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -3.0307857990264893, |
|
"logits/rejected": -3.0139756202697754, |
|
"logps/chosen": -264.16168212890625, |
|
"logps/rejected": -219.8409881591797, |
|
"loss": 0.9122, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.007891577668488026, |
|
"rewards/margins": 0.07121269404888153, |
|
"rewards/rejected": -0.06332111358642578, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -3.016810178756714, |
|
"logits/rejected": -3.019348621368408, |
|
"logps/chosen": -238.8083038330078, |
|
"logps/rejected": -212.76193237304688, |
|
"loss": 0.9028, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.028794366866350174, |
|
"rewards/margins": 0.08307679742574692, |
|
"rewards/rejected": -0.054282426834106445, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -3.044045925140381, |
|
"logits/rejected": -3.0309886932373047, |
|
"logps/chosen": -282.59814453125, |
|
"logps/rejected": -229.63858032226562, |
|
"loss": 0.9063, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.022610556334257126, |
|
"rewards/margins": 0.09912824630737305, |
|
"rewards/rejected": -0.07651769369840622, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -3.0251693725585938, |
|
"logits/rejected": -3.040748119354248, |
|
"logps/chosen": -289.87896728515625, |
|
"logps/rejected": -243.85952758789062, |
|
"loss": 0.8948, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.03339407593011856, |
|
"rewards/margins": 0.1147690862417221, |
|
"rewards/rejected": -0.08137501776218414, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -3.0105278491973877, |
|
"logits/rejected": -2.9718642234802246, |
|
"logps/chosen": -244.4474334716797, |
|
"logps/rejected": -221.8011474609375, |
|
"loss": 0.8829, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.017204025760293007, |
|
"rewards/margins": 0.0894416943192482, |
|
"rewards/rejected": -0.07223766297101974, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.9197583198547363, |
|
"logits/rejected": -2.915809392929077, |
|
"logps/chosen": -304.55645751953125, |
|
"logps/rejected": -253.95828247070312, |
|
"loss": 0.8834, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02509082481265068, |
|
"rewards/margins": 0.11200642585754395, |
|
"rewards/rejected": -0.08691558986902237, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -3.0240163803100586, |
|
"logits/rejected": -2.998610019683838, |
|
"logps/chosen": -270.31378173828125, |
|
"logps/rejected": -242.4883270263672, |
|
"loss": 0.8813, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02489570900797844, |
|
"rewards/margins": 0.13008132576942444, |
|
"rewards/rejected": -0.1051856279373169, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -3.0373079776763916, |
|
"logits/rejected": -3.0401182174682617, |
|
"logps/chosen": -235.19534301757812, |
|
"logps/rejected": -221.75363159179688, |
|
"loss": 0.8933, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.00027992590912617743, |
|
"rewards/margins": 0.09569151699542999, |
|
"rewards/rejected": -0.09541159123182297, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -3.0647318363189697, |
|
"logits/rejected": -3.054605484008789, |
|
"logps/chosen": -280.6692810058594, |
|
"logps/rejected": -237.3024444580078, |
|
"loss": 0.8826, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03587502986192703, |
|
"rewards/margins": 0.12422885000705719, |
|
"rewards/rejected": -0.08835381269454956, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -3.0048820972442627, |
|
"logits/rejected": -3.0057966709136963, |
|
"logps/chosen": -256.4432373046875, |
|
"logps/rejected": -229.13198852539062, |
|
"loss": 0.878, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.039617545902729034, |
|
"rewards/margins": 0.11413271725177765, |
|
"rewards/rejected": -0.07451517134904861, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -3.0452122688293457, |
|
"logits/rejected": -3.041473865509033, |
|
"logps/chosen": -293.8966369628906, |
|
"logps/rejected": -251.0624542236328, |
|
"loss": 0.8656, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.04036722332239151, |
|
"rewards/margins": 0.1490786224603653, |
|
"rewards/rejected": -0.10871138423681259, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -3.0647714138031006, |
|
"logits/rejected": -3.01206636428833, |
|
"logps/chosen": -260.71343994140625, |
|
"logps/rejected": -221.20632934570312, |
|
"loss": 0.8666, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.011453949846327305, |
|
"rewards/margins": 0.13622693717479706, |
|
"rewards/rejected": -0.12477298080921173, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -3.034562349319458, |
|
"logits/rejected": -2.989062547683716, |
|
"logps/chosen": -291.01446533203125, |
|
"logps/rejected": -241.4232177734375, |
|
"loss": 0.8519, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.035387031733989716, |
|
"rewards/margins": 0.18669307231903076, |
|
"rewards/rejected": -0.15130606293678284, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -3.012864351272583, |
|
"logits/rejected": -3.005479097366333, |
|
"logps/chosen": -243.08975219726562, |
|
"logps/rejected": -216.92074584960938, |
|
"loss": 0.8492, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.002515086205676198, |
|
"rewards/margins": 0.121725894510746, |
|
"rewards/rejected": -0.11921081691980362, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -3.013345718383789, |
|
"logits/rejected": -3.0360682010650635, |
|
"logps/chosen": -248.50326538085938, |
|
"logps/rejected": -203.6788787841797, |
|
"loss": 0.8617, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0035158656537532806, |
|
"rewards/margins": 0.14544394612312317, |
|
"rewards/rejected": -0.1419280618429184, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -3.047393321990967, |
|
"logits/rejected": -3.0524885654449463, |
|
"logps/chosen": -255.6022491455078, |
|
"logps/rejected": -219.2570343017578, |
|
"loss": 0.8256, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03486616909503937, |
|
"rewards/margins": 0.1496470421552658, |
|
"rewards/rejected": -0.11478086560964584, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -3.066741466522217, |
|
"logits/rejected": -3.046435832977295, |
|
"logps/chosen": -305.30712890625, |
|
"logps/rejected": -249.01968383789062, |
|
"loss": 0.8203, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.04162443429231644, |
|
"rewards/margins": 0.18165114521980286, |
|
"rewards/rejected": -0.14002671837806702, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.990051746368408, |
|
"logits/rejected": -2.992987632751465, |
|
"logps/chosen": -251.0054168701172, |
|
"logps/rejected": -238.8704376220703, |
|
"loss": 0.8282, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.028837282210588455, |
|
"rewards/margins": 0.1591511368751526, |
|
"rewards/rejected": -0.13031385838985443, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -3.0534205436706543, |
|
"logits/rejected": -3.0434939861297607, |
|
"logps/chosen": -256.14019775390625, |
|
"logps/rejected": -216.6497802734375, |
|
"loss": 0.8083, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.034832023084163666, |
|
"rewards/margins": 0.20265790820121765, |
|
"rewards/rejected": -0.16782590746879578, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -3.0195870399475098, |
|
"logits/rejected": -3.027886390686035, |
|
"logps/chosen": -277.5207214355469, |
|
"logps/rejected": -232.2840576171875, |
|
"loss": 0.8119, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05330047011375427, |
|
"rewards/margins": 0.21206972002983093, |
|
"rewards/rejected": -0.15876924991607666, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -3.0407612323760986, |
|
"logits/rejected": -3.014266014099121, |
|
"logps/chosen": -271.2508850097656, |
|
"logps/rejected": -233.294189453125, |
|
"loss": 0.792, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02551809512078762, |
|
"rewards/margins": 0.21874013543128967, |
|
"rewards/rejected": -0.1932220160961151, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -2.994800567626953, |
|
"logits/rejected": -2.9698376655578613, |
|
"logps/chosen": -287.5726623535156, |
|
"logps/rejected": -237.8695068359375, |
|
"loss": 0.8042, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.006596171762794256, |
|
"rewards/margins": 0.18577079474925995, |
|
"rewards/rejected": -0.17917463183403015, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -3.0441243648529053, |
|
"logits/rejected": -3.053039073944092, |
|
"logps/chosen": -281.0970153808594, |
|
"logps/rejected": -227.56851196289062, |
|
"loss": 0.8389, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.022974971681833267, |
|
"rewards/margins": 0.15209509432315826, |
|
"rewards/rejected": -0.1291201412677765, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -3.0892813205718994, |
|
"logits/rejected": -3.0816287994384766, |
|
"logps/chosen": -258.57781982421875, |
|
"logps/rejected": -230.27615356445312, |
|
"loss": 0.8064, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.006377282552421093, |
|
"rewards/margins": 0.21148601174354553, |
|
"rewards/rejected": -0.20510873198509216, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -3.0129265785217285, |
|
"logits/rejected": -2.9877090454101562, |
|
"logps/chosen": -275.82568359375, |
|
"logps/rejected": -219.6047821044922, |
|
"loss": 0.8098, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04388806223869324, |
|
"rewards/margins": 0.2091568410396576, |
|
"rewards/rejected": -0.16526879370212555, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -2.9762985706329346, |
|
"logits/rejected": -2.986323833465576, |
|
"logps/chosen": -262.74371337890625, |
|
"logps/rejected": -207.13418579101562, |
|
"loss": 0.8056, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02364834025502205, |
|
"rewards/margins": 0.2192881554365158, |
|
"rewards/rejected": -0.19563981890678406, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -3.0574042797088623, |
|
"logits/rejected": -3.0168094635009766, |
|
"logps/chosen": -296.2746276855469, |
|
"logps/rejected": -256.06854248046875, |
|
"loss": 0.8097, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.013486223295331001, |
|
"rewards/margins": 0.14934802055358887, |
|
"rewards/rejected": -0.13586178421974182, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -3.0353150367736816, |
|
"logits/rejected": -3.0224339962005615, |
|
"logps/chosen": -261.10992431640625, |
|
"logps/rejected": -251.72293090820312, |
|
"loss": 0.7899, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.007094231434166431, |
|
"rewards/margins": 0.16344769299030304, |
|
"rewards/rejected": -0.1705418974161148, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -3.008420944213867, |
|
"logits/rejected": -2.9702980518341064, |
|
"logps/chosen": -269.81903076171875, |
|
"logps/rejected": -199.1494903564453, |
|
"loss": 0.7627, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.01843448355793953, |
|
"rewards/margins": 0.2920437455177307, |
|
"rewards/rejected": -0.2736092209815979, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -3.004692792892456, |
|
"logits/rejected": -2.9878294467926025, |
|
"logps/chosen": -262.6126708984375, |
|
"logps/rejected": -220.02096557617188, |
|
"loss": 0.7653, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.001659922068938613, |
|
"rewards/margins": 0.25197452306747437, |
|
"rewards/rejected": -0.2503146231174469, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -2.9879119396209717, |
|
"logits/rejected": -2.980886459350586, |
|
"logps/chosen": -262.13287353515625, |
|
"logps/rejected": -249.5322723388672, |
|
"loss": 0.782, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.036843255162239075, |
|
"rewards/margins": 0.17954595386981964, |
|
"rewards/rejected": -0.21638920903205872, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -3.0227646827697754, |
|
"logits/rejected": -3.0109972953796387, |
|
"logps/chosen": -266.5917663574219, |
|
"logps/rejected": -237.83108520507812, |
|
"loss": 0.7486, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03099486604332924, |
|
"rewards/margins": 0.23689258098602295, |
|
"rewards/rejected": -0.20589768886566162, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.9976096153259277, |
|
"logits/rejected": -2.9851810932159424, |
|
"logps/chosen": -283.94677734375, |
|
"logps/rejected": -237.71841430664062, |
|
"loss": 0.7486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.020093750208616257, |
|
"rewards/margins": 0.27248382568359375, |
|
"rewards/rejected": -0.2925775945186615, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -3.031033754348755, |
|
"logits/rejected": -3.024602174758911, |
|
"logps/chosen": -261.4927978515625, |
|
"logps/rejected": -222.33651733398438, |
|
"loss": 0.7052, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.016252126544713974, |
|
"rewards/margins": 0.25844720005989075, |
|
"rewards/rejected": -0.24219508469104767, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -3.017972946166992, |
|
"logits/rejected": -2.993112564086914, |
|
"logps/chosen": -277.76177978515625, |
|
"logps/rejected": -241.6676483154297, |
|
"loss": 0.765, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.010549797676503658, |
|
"rewards/margins": 0.2188224494457245, |
|
"rewards/rejected": -0.20827265083789825, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -3.0501556396484375, |
|
"logits/rejected": -3.039248466491699, |
|
"logps/chosen": -261.3684387207031, |
|
"logps/rejected": -231.1971893310547, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.002077583223581314, |
|
"rewards/margins": 0.3389395475387573, |
|
"rewards/rejected": -0.3368619680404663, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -3.063708782196045, |
|
"logits/rejected": -3.0399699211120605, |
|
"logps/chosen": -294.5484924316406, |
|
"logps/rejected": -228.28854370117188, |
|
"loss": 0.7335, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.037904877215623856, |
|
"rewards/margins": 0.2777422368526459, |
|
"rewards/rejected": -0.2398373782634735, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -3.044889211654663, |
|
"logits/rejected": -3.0049989223480225, |
|
"logps/chosen": -280.3213195800781, |
|
"logps/rejected": -223.97787475585938, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.06323406845331192, |
|
"rewards/margins": 0.37538376450538635, |
|
"rewards/rejected": -0.31214970350265503, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -3.0053951740264893, |
|
"logits/rejected": -2.98051118850708, |
|
"logps/chosen": -252.00387573242188, |
|
"logps/rejected": -218.14602661132812, |
|
"loss": 0.7566, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.040188662707805634, |
|
"rewards/margins": 0.23952028155326843, |
|
"rewards/rejected": -0.2797089219093323, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -3.0364222526550293, |
|
"logits/rejected": -3.0096614360809326, |
|
"logps/chosen": -253.96923828125, |
|
"logps/rejected": -229.8144989013672, |
|
"loss": 0.7089, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.007532055489718914, |
|
"rewards/margins": 0.29781442880630493, |
|
"rewards/rejected": -0.29028236865997314, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -3.0369744300842285, |
|
"logits/rejected": -3.0353286266326904, |
|
"logps/chosen": -290.00042724609375, |
|
"logps/rejected": -273.43267822265625, |
|
"loss": 0.7019, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.018222318962216377, |
|
"rewards/margins": 0.31897154450416565, |
|
"rewards/rejected": -0.3007492423057556, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.9672696590423584, |
|
"logits/rejected": -2.9645469188690186, |
|
"logps/chosen": -307.1138610839844, |
|
"logps/rejected": -237.83786010742188, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.005494369193911552, |
|
"rewards/margins": 0.31811192631721497, |
|
"rewards/rejected": -0.31261754035949707, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -3.0339221954345703, |
|
"logits/rejected": -3.0139739513397217, |
|
"logps/chosen": -284.10772705078125, |
|
"logps/rejected": -243.3460235595703, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.01381886936724186, |
|
"rewards/margins": 0.38050729036331177, |
|
"rewards/rejected": -0.39432623982429504, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -3.0409202575683594, |
|
"logits/rejected": -3.017521381378174, |
|
"logps/chosen": -274.10675048828125, |
|
"logps/rejected": -235.25332641601562, |
|
"loss": 0.7214, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.045437611639499664, |
|
"rewards/margins": 0.3723521828651428, |
|
"rewards/rejected": -0.32691454887390137, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -3.0040934085845947, |
|
"logits/rejected": -2.9863858222961426, |
|
"logps/chosen": -264.234130859375, |
|
"logps/rejected": -212.1512908935547, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.005959497299045324, |
|
"rewards/margins": 0.3572615683078766, |
|
"rewards/rejected": -0.3513020873069763, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -3.0645086765289307, |
|
"logits/rejected": -3.007006883621216, |
|
"logps/chosen": -230.1156005859375, |
|
"logps/rejected": -207.70779418945312, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.018168695271015167, |
|
"rewards/margins": 0.38268524408340454, |
|
"rewards/rejected": -0.4008539617061615, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -3.0344386100769043, |
|
"logits/rejected": -3.020028829574585, |
|
"logps/chosen": -273.54412841796875, |
|
"logps/rejected": -222.2449493408203, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.026546839624643326, |
|
"rewards/margins": 0.2530173659324646, |
|
"rewards/rejected": -0.27956423163414, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.9949254989624023, |
|
"logits/rejected": -3.0048325061798096, |
|
"logps/chosen": -247.9457244873047, |
|
"logps/rejected": -203.7804412841797, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05914358049631119, |
|
"rewards/margins": 0.3101976811885834, |
|
"rewards/rejected": -0.3693412244319916, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -3.0347888469696045, |
|
"logits/rejected": -2.994046688079834, |
|
"logps/chosen": -263.569091796875, |
|
"logps/rejected": -231.2215118408203, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.021738069131970406, |
|
"rewards/margins": 0.3827739953994751, |
|
"rewards/rejected": -0.36103588342666626, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -3.0062592029571533, |
|
"logits/rejected": -2.993868589401245, |
|
"logps/chosen": -276.2633056640625, |
|
"logps/rejected": -232.21493530273438, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03908178210258484, |
|
"rewards/margins": 0.2943773865699768, |
|
"rewards/rejected": -0.33345913887023926, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.9820735454559326, |
|
"logits/rejected": -3.0040230751037598, |
|
"logps/chosen": -275.2674865722656, |
|
"logps/rejected": -240.7872314453125, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.020224738866090775, |
|
"rewards/margins": 0.3282891809940338, |
|
"rewards/rejected": -0.30806440114974976, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.0153579711914062, |
|
"eval_logits/rejected": -2.9988856315612793, |
|
"eval_logps/chosen": -271.4433288574219, |
|
"eval_logps/rejected": -232.6822967529297, |
|
"eval_loss": 0.6276752948760986, |
|
"eval_rewards/accuracies": 0.7039999961853027, |
|
"eval_rewards/chosen": -0.028680188581347466, |
|
"eval_rewards/margins": 0.3904646337032318, |
|
"eval_rewards/rejected": -0.4191448390483856, |
|
"eval_runtime": 449.0184, |
|
"eval_samples_per_second": 4.454, |
|
"eval_steps_per_second": 0.278, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -3.0252368450164795, |
|
"logits/rejected": -3.0065531730651855, |
|
"logps/chosen": -275.3318176269531, |
|
"logps/rejected": -244.02072143554688, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.04684365913271904, |
|
"rewards/margins": 0.3803178668022156, |
|
"rewards/rejected": -0.42716145515441895, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -3.0262959003448486, |
|
"logits/rejected": -3.0287508964538574, |
|
"logps/chosen": -271.2191162109375, |
|
"logps/rejected": -233.7063446044922, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.028351956978440285, |
|
"rewards/margins": 0.39485400915145874, |
|
"rewards/rejected": -0.4232059419155121, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -3.0115294456481934, |
|
"logits/rejected": -3.0146679878234863, |
|
"logps/chosen": -251.9388885498047, |
|
"logps/rejected": -229.016357421875, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.07080521434545517, |
|
"rewards/margins": 0.35120025277137756, |
|
"rewards/rejected": -0.42200547456741333, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -3.007986068725586, |
|
"logits/rejected": -2.9926083087921143, |
|
"logps/chosen": -262.0426940917969, |
|
"logps/rejected": -256.99188232421875, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.070511594414711, |
|
"rewards/margins": 0.3111626207828522, |
|
"rewards/rejected": -0.381674200296402, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -2.9853007793426514, |
|
"logits/rejected": -3.0082547664642334, |
|
"logps/chosen": -270.3041076660156, |
|
"logps/rejected": -236.4681854248047, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.022838518023490906, |
|
"rewards/margins": 0.3421871066093445, |
|
"rewards/rejected": -0.3650256097316742, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.9447665214538574, |
|
"logits/rejected": -2.933103084564209, |
|
"logps/chosen": -266.8084411621094, |
|
"logps/rejected": -215.50308227539062, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04661910608410835, |
|
"rewards/margins": 0.4469257891178131, |
|
"rewards/rejected": -0.49354487657546997, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -3.0248591899871826, |
|
"logits/rejected": -2.98162579536438, |
|
"logps/chosen": -277.4466552734375, |
|
"logps/rejected": -250.49893188476562, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.004877015482634306, |
|
"rewards/margins": 0.4228130280971527, |
|
"rewards/rejected": -0.4276900887489319, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -3.0339090824127197, |
|
"logits/rejected": -2.9957573413848877, |
|
"logps/chosen": -275.76910400390625, |
|
"logps/rejected": -235.79830932617188, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.08596549183130264, |
|
"rewards/margins": 0.29382139444351196, |
|
"rewards/rejected": -0.3797869086265564, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.9797756671905518, |
|
"logits/rejected": -2.9523847103118896, |
|
"logps/chosen": -284.53839111328125, |
|
"logps/rejected": -214.0476531982422, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07950444519519806, |
|
"rewards/margins": 0.4745730459690094, |
|
"rewards/rejected": -0.5540775060653687, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -2.946882963180542, |
|
"logits/rejected": -2.9203898906707764, |
|
"logps/chosen": -257.57611083984375, |
|
"logps/rejected": -244.93075561523438, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.020254051312804222, |
|
"rewards/margins": 0.4997115135192871, |
|
"rewards/rejected": -0.5199655294418335, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -2.991321563720703, |
|
"logits/rejected": -3.0015194416046143, |
|
"logps/chosen": -279.9203186035156, |
|
"logps/rejected": -243.14889526367188, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.08546855300664902, |
|
"rewards/margins": 0.4243212342262268, |
|
"rewards/rejected": -0.5097898244857788, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -3.034787893295288, |
|
"logits/rejected": -2.9858031272888184, |
|
"logps/chosen": -259.6640319824219, |
|
"logps/rejected": -238.2606201171875, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11487703025341034, |
|
"rewards/margins": 0.41897639632225037, |
|
"rewards/rejected": -0.5338534116744995, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -2.951054096221924, |
|
"logits/rejected": -2.953270673751831, |
|
"logps/chosen": -288.8511962890625, |
|
"logps/rejected": -241.43490600585938, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.041309602558612823, |
|
"rewards/margins": 0.47613269090652466, |
|
"rewards/rejected": -0.5174422860145569, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -3.0297837257385254, |
|
"logits/rejected": -2.9690465927124023, |
|
"logps/chosen": -249.8524932861328, |
|
"logps/rejected": -211.01486206054688, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.15785838663578033, |
|
"rewards/margins": 0.3855217397212982, |
|
"rewards/rejected": -0.5433801412582397, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -3.023965358734131, |
|
"logits/rejected": -3.012373685836792, |
|
"logps/chosen": -296.8121643066406, |
|
"logps/rejected": -269.64410400390625, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.013493712060153484, |
|
"rewards/margins": 0.45747965574264526, |
|
"rewards/rejected": -0.47097334265708923, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -2.9927139282226562, |
|
"logits/rejected": -2.995068073272705, |
|
"logps/chosen": -238.1875, |
|
"logps/rejected": -238.9112548828125, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.07948704063892365, |
|
"rewards/margins": 0.4464842677116394, |
|
"rewards/rejected": -0.5259712934494019, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -3.00760555267334, |
|
"logits/rejected": -2.992061138153076, |
|
"logps/chosen": -304.2665100097656, |
|
"logps/rejected": -251.5317840576172, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1150180920958519, |
|
"rewards/margins": 0.46640753746032715, |
|
"rewards/rejected": -0.581425666809082, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -3.0006496906280518, |
|
"logits/rejected": -3.0168707370758057, |
|
"logps/chosen": -260.1852111816406, |
|
"logps/rejected": -252.5244598388672, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.16791771352291107, |
|
"rewards/margins": 0.4443301260471344, |
|
"rewards/rejected": -0.6122478246688843, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -3.023045063018799, |
|
"logits/rejected": -2.9913861751556396, |
|
"logps/chosen": -295.21502685546875, |
|
"logps/rejected": -226.15426635742188, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.09654710441827774, |
|
"rewards/margins": 0.504024088382721, |
|
"rewards/rejected": -0.6005711555480957, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -2.9731438159942627, |
|
"logits/rejected": -2.9854893684387207, |
|
"logps/chosen": -260.6565856933594, |
|
"logps/rejected": -235.22476196289062, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0705583468079567, |
|
"rewards/margins": 0.5393149852752686, |
|
"rewards/rejected": -0.6098732948303223, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -3.015777111053467, |
|
"logits/rejected": -2.9891409873962402, |
|
"logps/chosen": -270.416748046875, |
|
"logps/rejected": -232.32089233398438, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13838811218738556, |
|
"rewards/margins": 0.4489242136478424, |
|
"rewards/rejected": -0.5873123407363892, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -2.980250597000122, |
|
"logits/rejected": -2.9568262100219727, |
|
"logps/chosen": -257.48504638671875, |
|
"logps/rejected": -231.20101928710938, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.26241081953048706, |
|
"rewards/margins": 0.4477602541446686, |
|
"rewards/rejected": -0.7101710438728333, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -2.987285614013672, |
|
"logits/rejected": -2.9756875038146973, |
|
"logps/chosen": -250.1853790283203, |
|
"logps/rejected": -234.43313598632812, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1883806735277176, |
|
"rewards/margins": 0.47481974959373474, |
|
"rewards/rejected": -0.6632004380226135, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.9940743446350098, |
|
"logits/rejected": -2.9866111278533936, |
|
"logps/chosen": -261.4402770996094, |
|
"logps/rejected": -236.15670776367188, |
|
"loss": 0.4046, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.11559490859508514, |
|
"rewards/margins": 0.5072110891342163, |
|
"rewards/rejected": -0.6228059530258179, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -3.0217831134796143, |
|
"logits/rejected": -2.9786057472229004, |
|
"logps/chosen": -263.88592529296875, |
|
"logps/rejected": -227.23828125, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1470792442560196, |
|
"rewards/margins": 0.5500885844230652, |
|
"rewards/rejected": -0.6971677541732788, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -2.983037233352661, |
|
"logits/rejected": -2.9867138862609863, |
|
"logps/chosen": -270.10089111328125, |
|
"logps/rejected": -262.2214660644531, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.19438357651233673, |
|
"rewards/margins": 0.3766476809978485, |
|
"rewards/rejected": -0.5710312128067017, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.923466920852661, |
|
"logits/rejected": -2.9358396530151367, |
|
"logps/chosen": -291.2686767578125, |
|
"logps/rejected": -251.558837890625, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.16642309725284576, |
|
"rewards/margins": 0.3848158121109009, |
|
"rewards/rejected": -0.5512388944625854, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -3.007678508758545, |
|
"logits/rejected": -2.988664388656616, |
|
"logps/chosen": -291.9914245605469, |
|
"logps/rejected": -263.6539611816406, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1000869870185852, |
|
"rewards/margins": 0.597449004650116, |
|
"rewards/rejected": -0.6975361108779907, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -3.0168919563293457, |
|
"logits/rejected": -2.987907886505127, |
|
"logps/chosen": -249.50704956054688, |
|
"logps/rejected": -224.2857208251953, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.16220508515834808, |
|
"rewards/margins": 0.5248938798904419, |
|
"rewards/rejected": -0.6870989203453064, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -3.0371620655059814, |
|
"logits/rejected": -3.0139615535736084, |
|
"logps/chosen": -273.0102233886719, |
|
"logps/rejected": -234.0775909423828, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13673745095729828, |
|
"rewards/margins": 0.6153367161750793, |
|
"rewards/rejected": -0.752074122428894, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -3.0206820964813232, |
|
"logits/rejected": -3.0111212730407715, |
|
"logps/chosen": -309.1382751464844, |
|
"logps/rejected": -276.2449645996094, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1778876632452011, |
|
"rewards/margins": 0.6206526756286621, |
|
"rewards/rejected": -0.7985404133796692, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -3.007878303527832, |
|
"logits/rejected": -3.0043411254882812, |
|
"logps/chosen": -238.5860137939453, |
|
"logps/rejected": -227.81234741210938, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1700361967086792, |
|
"rewards/margins": 0.43853870034217834, |
|
"rewards/rejected": -0.6085748672485352, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.970362901687622, |
|
"logits/rejected": -2.969972848892212, |
|
"logps/chosen": -257.76812744140625, |
|
"logps/rejected": -230.56820678710938, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14981916546821594, |
|
"rewards/margins": 0.5685046315193176, |
|
"rewards/rejected": -0.718323826789856, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -3.0062780380249023, |
|
"logits/rejected": -2.9634342193603516, |
|
"logps/chosen": -278.4779968261719, |
|
"logps/rejected": -237.66873168945312, |
|
"loss": 0.3758, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.20812156796455383, |
|
"rewards/margins": 0.5646450519561768, |
|
"rewards/rejected": -0.7727665305137634, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -2.974381923675537, |
|
"logits/rejected": -2.9355669021606445, |
|
"logps/chosen": -286.21453857421875, |
|
"logps/rejected": -236.1365203857422, |
|
"loss": 0.378, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.08342897891998291, |
|
"rewards/margins": 0.6408315896987915, |
|
"rewards/rejected": -0.7242605090141296, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -3.02203369140625, |
|
"logits/rejected": -3.0106561183929443, |
|
"logps/chosen": -244.7522430419922, |
|
"logps/rejected": -214.9333038330078, |
|
"loss": 0.3807, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.24016663432121277, |
|
"rewards/margins": 0.5892859697341919, |
|
"rewards/rejected": -0.829452633857727, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -2.9896299839019775, |
|
"logits/rejected": -2.9215025901794434, |
|
"logps/chosen": -263.9308166503906, |
|
"logps/rejected": -236.4021453857422, |
|
"loss": 0.4104, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.14813081920146942, |
|
"rewards/margins": 0.6776358485221863, |
|
"rewards/rejected": -0.8257666826248169, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -3.0315442085266113, |
|
"logits/rejected": -3.035961866378784, |
|
"logps/chosen": -261.2752990722656, |
|
"logps/rejected": -235.47705078125, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2596365213394165, |
|
"rewards/margins": 0.658948540687561, |
|
"rewards/rejected": -0.9185851812362671, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -3.0204250812530518, |
|
"logits/rejected": -3.015413761138916, |
|
"logps/chosen": -246.57290649414062, |
|
"logps/rejected": -231.91238403320312, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3196146488189697, |
|
"rewards/margins": 0.4760715365409851, |
|
"rewards/rejected": -0.7956861257553101, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -3.016695022583008, |
|
"logits/rejected": -2.9959843158721924, |
|
"logps/chosen": -275.1457824707031, |
|
"logps/rejected": -220.0662078857422, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.33876991271972656, |
|
"rewards/margins": 0.457784503698349, |
|
"rewards/rejected": -0.796554446220398, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -3.059567451477051, |
|
"logits/rejected": -2.9867048263549805, |
|
"logps/chosen": -287.88153076171875, |
|
"logps/rejected": -235.1467742919922, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.264303594827652, |
|
"rewards/margins": 0.6922810673713684, |
|
"rewards/rejected": -0.956584632396698, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.975642442703247, |
|
"logits/rejected": -2.9326682090759277, |
|
"logps/chosen": -251.0466766357422, |
|
"logps/rejected": -223.5084686279297, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1487085521221161, |
|
"rewards/margins": 0.689045250415802, |
|
"rewards/rejected": -0.8377537727355957, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -3.0104973316192627, |
|
"logits/rejected": -2.9991507530212402, |
|
"logps/chosen": -296.5973205566406, |
|
"logps/rejected": -226.55111694335938, |
|
"loss": 0.3786, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.20407001674175262, |
|
"rewards/margins": 0.6605352163314819, |
|
"rewards/rejected": -0.8646053075790405, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -2.9817981719970703, |
|
"logits/rejected": -2.988542079925537, |
|
"logps/chosen": -285.8086853027344, |
|
"logps/rejected": -253.98934936523438, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.25984063744544983, |
|
"rewards/margins": 0.5409084558486938, |
|
"rewards/rejected": -0.8007491230964661, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.99088978767395, |
|
"logits/rejected": -2.9880213737487793, |
|
"logps/chosen": -269.12982177734375, |
|
"logps/rejected": -245.9726104736328, |
|
"loss": 0.3797, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.3341473937034607, |
|
"rewards/margins": 0.6310809254646301, |
|
"rewards/rejected": -0.9652281999588013, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -3.031111717224121, |
|
"logits/rejected": -3.011475086212158, |
|
"logps/chosen": -265.3778381347656, |
|
"logps/rejected": -236.4595947265625, |
|
"loss": 0.3353, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.23791718482971191, |
|
"rewards/margins": 0.7006685733795166, |
|
"rewards/rejected": -0.938585638999939, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -2.972764492034912, |
|
"logits/rejected": -2.9663169384002686, |
|
"logps/chosen": -261.602294921875, |
|
"logps/rejected": -261.4017639160156, |
|
"loss": 0.3131, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2686450779438019, |
|
"rewards/margins": 0.671106219291687, |
|
"rewards/rejected": -0.9397512674331665, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -3.018611192703247, |
|
"logits/rejected": -2.9920201301574707, |
|
"logps/chosen": -253.02285766601562, |
|
"logps/rejected": -222.1746826171875, |
|
"loss": 0.3765, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.35335665941238403, |
|
"rewards/margins": 0.5318818092346191, |
|
"rewards/rejected": -0.8852384686470032, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -2.9993600845336914, |
|
"logits/rejected": -2.953437328338623, |
|
"logps/chosen": -272.62347412109375, |
|
"logps/rejected": -223.8088836669922, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3333335518836975, |
|
"rewards/margins": 0.6397331953048706, |
|
"rewards/rejected": -0.9730666875839233, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -3.0080342292785645, |
|
"logits/rejected": -2.9877243041992188, |
|
"logps/chosen": -285.1849670410156, |
|
"logps/rejected": -218.37954711914062, |
|
"loss": 0.3865, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.35278716683387756, |
|
"rewards/margins": 0.6462265253067017, |
|
"rewards/rejected": -0.9990137815475464, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.989492177963257, |
|
"logits/rejected": -2.9886727333068848, |
|
"logps/chosen": -276.1953430175781, |
|
"logps/rejected": -228.0172882080078, |
|
"loss": 0.4094, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3594445586204529, |
|
"rewards/margins": 0.6459834575653076, |
|
"rewards/rejected": -1.0054280757904053, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -2.9692909717559814, |
|
"logits/rejected": -2.978031635284424, |
|
"logps/chosen": -260.3178405761719, |
|
"logps/rejected": -236.1215362548828, |
|
"loss": 0.3159, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.33140963315963745, |
|
"rewards/margins": 0.5777403116226196, |
|
"rewards/rejected": -0.9091499447822571, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -2.9462051391601562, |
|
"logits/rejected": -2.9286155700683594, |
|
"logps/chosen": -289.8208923339844, |
|
"logps/rejected": -247.3198699951172, |
|
"loss": 0.3091, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3897753953933716, |
|
"rewards/margins": 0.5884745717048645, |
|
"rewards/rejected": -0.978249728679657, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -3.018886089324951, |
|
"logits/rejected": -2.9545936584472656, |
|
"logps/chosen": -265.15240478515625, |
|
"logps/rejected": -227.3736114501953, |
|
"loss": 0.3156, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.2435026615858078, |
|
"rewards/margins": 0.8812161684036255, |
|
"rewards/rejected": -1.1247189044952393, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -3.019289016723633, |
|
"logits/rejected": -2.9821600914001465, |
|
"logps/chosen": -288.81646728515625, |
|
"logps/rejected": -226.24755859375, |
|
"loss": 0.262, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3103589713573456, |
|
"rewards/margins": 0.6744868159294128, |
|
"rewards/rejected": -0.9848458170890808, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -2.918996572494507, |
|
"logits/rejected": -2.9260575771331787, |
|
"logps/chosen": -229.50146484375, |
|
"logps/rejected": -233.02432250976562, |
|
"loss": 0.3571, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3969925045967102, |
|
"rewards/margins": 0.5756487250328064, |
|
"rewards/rejected": -0.9726413488388062, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -3.0124337673187256, |
|
"logits/rejected": -2.9816842079162598, |
|
"logps/chosen": -272.998779296875, |
|
"logps/rejected": -232.07638549804688, |
|
"loss": 0.2496, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3978312909603119, |
|
"rewards/margins": 0.6203471422195435, |
|
"rewards/rejected": -1.0181784629821777, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -3.0306756496429443, |
|
"logits/rejected": -3.016284465789795, |
|
"logps/chosen": -293.61407470703125, |
|
"logps/rejected": -241.7076873779297, |
|
"loss": 0.2565, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3279929757118225, |
|
"rewards/margins": 0.6071383953094482, |
|
"rewards/rejected": -0.9351313710212708, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -2.9170010089874268, |
|
"logits/rejected": -2.904465436935425, |
|
"logps/chosen": -251.91354370117188, |
|
"logps/rejected": -234.70751953125, |
|
"loss": 0.2908, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.33537206053733826, |
|
"rewards/margins": 0.6658948063850403, |
|
"rewards/rejected": -1.0012669563293457, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -3.0143392086029053, |
|
"logits/rejected": -2.9897260665893555, |
|
"logps/chosen": -301.2428894042969, |
|
"logps/rejected": -244.96145629882812, |
|
"loss": 0.2721, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3570322096347809, |
|
"rewards/margins": 0.7769542336463928, |
|
"rewards/rejected": -1.133986473083496, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -3.022491216659546, |
|
"logits/rejected": -2.9913883209228516, |
|
"logps/chosen": -284.7639465332031, |
|
"logps/rejected": -248.6448211669922, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3536800742149353, |
|
"rewards/margins": 0.7084030508995056, |
|
"rewards/rejected": -1.0620832443237305, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -3.0248942375183105, |
|
"logits/rejected": -2.9973325729370117, |
|
"logps/chosen": -284.40472412109375, |
|
"logps/rejected": -241.6700897216797, |
|
"loss": 0.1589, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3532782196998596, |
|
"rewards/margins": 0.7998963594436646, |
|
"rewards/rejected": -1.153174638748169, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.9913268089294434, |
|
"logits/rejected": -2.971450090408325, |
|
"logps/chosen": -319.8451232910156, |
|
"logps/rejected": -254.3859100341797, |
|
"loss": 0.3029, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.31969529390335083, |
|
"rewards/margins": 0.671107292175293, |
|
"rewards/rejected": -0.9908025860786438, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -2.959479331970215, |
|
"logits/rejected": -2.908963441848755, |
|
"logps/chosen": -272.8207092285156, |
|
"logps/rejected": -235.93020629882812, |
|
"loss": 0.2796, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.34670349955558777, |
|
"rewards/margins": 0.7621387243270874, |
|
"rewards/rejected": -1.108842134475708, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -2.9981067180633545, |
|
"logits/rejected": -2.9972851276397705, |
|
"logps/chosen": -278.3575744628906, |
|
"logps/rejected": -239.57223510742188, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5294784307479858, |
|
"rewards/margins": 0.6004038453102112, |
|
"rewards/rejected": -1.1298822164535522, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -3.030169725418091, |
|
"logits/rejected": -2.9914770126342773, |
|
"logps/chosen": -289.63116455078125, |
|
"logps/rejected": -249.74649047851562, |
|
"loss": 0.3592, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.44787079095840454, |
|
"rewards/margins": 0.63862544298172, |
|
"rewards/rejected": -1.086496114730835, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -2.9960074424743652, |
|
"logits/rejected": -2.980121374130249, |
|
"logps/chosen": -267.38446044921875, |
|
"logps/rejected": -244.11593627929688, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.34468650817871094, |
|
"rewards/margins": 0.8173492550849915, |
|
"rewards/rejected": -1.1620357036590576, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -2.913470506668091, |
|
"logits/rejected": -2.90181040763855, |
|
"logps/chosen": -268.4359436035156, |
|
"logps/rejected": -238.85806274414062, |
|
"loss": 0.2227, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.43024927377700806, |
|
"rewards/margins": 0.583633303642273, |
|
"rewards/rejected": -1.0138825178146362, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.9995226860046387, |
|
"logits/rejected": -2.971139430999756, |
|
"logps/chosen": -276.39031982421875, |
|
"logps/rejected": -242.7020263671875, |
|
"loss": 0.2584, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5093586444854736, |
|
"rewards/margins": 0.657143771648407, |
|
"rewards/rejected": -1.1665024757385254, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -2.95731258392334, |
|
"logits/rejected": -2.9587581157684326, |
|
"logps/chosen": -248.0506134033203, |
|
"logps/rejected": -226.7265625, |
|
"loss": 0.2349, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3466361165046692, |
|
"rewards/margins": 0.8473577499389648, |
|
"rewards/rejected": -1.1939939260482788, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -3.0136914253234863, |
|
"logits/rejected": -2.9863269329071045, |
|
"logps/chosen": -285.3213195800781, |
|
"logps/rejected": -233.94619750976562, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3250366151332855, |
|
"rewards/margins": 0.8868409991264343, |
|
"rewards/rejected": -1.2118775844573975, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.9790773391723633, |
|
"logits/rejected": -2.952543020248413, |
|
"logps/chosen": -276.2936706542969, |
|
"logps/rejected": -257.79718017578125, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4911643862724304, |
|
"rewards/margins": 0.6523909568786621, |
|
"rewards/rejected": -1.1435552835464478, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -2.9914324283599854, |
|
"logits/rejected": -2.98734974861145, |
|
"logps/chosen": -322.0937805175781, |
|
"logps/rejected": -261.5361633300781, |
|
"loss": 0.1863, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3622845411300659, |
|
"rewards/margins": 0.9477508664131165, |
|
"rewards/rejected": -1.3100353479385376, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -2.9850406646728516, |
|
"logits/rejected": -2.972008228302002, |
|
"logps/chosen": -262.46197509765625, |
|
"logps/rejected": -237.19375610351562, |
|
"loss": 0.1809, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.53041011095047, |
|
"rewards/margins": 0.7410814762115479, |
|
"rewards/rejected": -1.2714916467666626, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.983722448348999, |
|
"logits/rejected": -2.956228256225586, |
|
"logps/chosen": -272.92156982421875, |
|
"logps/rejected": -256.91107177734375, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5018629431724548, |
|
"rewards/margins": 0.8895123600959778, |
|
"rewards/rejected": -1.391375184059143, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -2.9394657611846924, |
|
"logits/rejected": -2.92991304397583, |
|
"logps/chosen": -273.16815185546875, |
|
"logps/rejected": -262.08917236328125, |
|
"loss": 0.1644, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5341050624847412, |
|
"rewards/margins": 0.7611141800880432, |
|
"rewards/rejected": -1.2952191829681396, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -2.946911334991455, |
|
"logits/rejected": -2.931849718093872, |
|
"logps/chosen": -308.5828552246094, |
|
"logps/rejected": -247.25338745117188, |
|
"loss": 0.1064, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5544737577438354, |
|
"rewards/margins": 0.9120496511459351, |
|
"rewards/rejected": -1.46652352809906, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.951788902282715, |
|
"logits/rejected": -2.909658908843994, |
|
"logps/chosen": -295.8861389160156, |
|
"logps/rejected": -243.9182891845703, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5971349477767944, |
|
"rewards/margins": 0.7489143013954163, |
|
"rewards/rejected": -1.3460490703582764, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -3.023937702178955, |
|
"logits/rejected": -2.9923651218414307, |
|
"logps/chosen": -301.7521057128906, |
|
"logps/rejected": -257.3502197265625, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5676156282424927, |
|
"rewards/margins": 0.8253081440925598, |
|
"rewards/rejected": -1.3929237127304077, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -2.979418992996216, |
|
"logits/rejected": -2.979773998260498, |
|
"logps/chosen": -278.7250061035156, |
|
"logps/rejected": -281.96417236328125, |
|
"loss": 0.1553, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5305044651031494, |
|
"rewards/margins": 0.8628344535827637, |
|
"rewards/rejected": -1.393338918685913, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.93343448638916, |
|
"logits/rejected": -2.9477057456970215, |
|
"logps/chosen": -259.96685791015625, |
|
"logps/rejected": -260.68145751953125, |
|
"loss": 0.2389, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5586804151535034, |
|
"rewards/margins": 0.8505626916885376, |
|
"rewards/rejected": -1.409243106842041, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -3.001537799835205, |
|
"logits/rejected": -3.000216007232666, |
|
"logps/chosen": -253.8172607421875, |
|
"logps/rejected": -229.71261596679688, |
|
"loss": 0.117, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6867496371269226, |
|
"rewards/margins": 0.839769184589386, |
|
"rewards/rejected": -1.5265188217163086, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -2.9645118713378906, |
|
"logits/rejected": -2.921504497528076, |
|
"logps/chosen": -249.779296875, |
|
"logps/rejected": -217.2705078125, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6474353075027466, |
|
"rewards/margins": 0.7807954549789429, |
|
"rewards/rejected": -1.428230881690979, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.995269298553467, |
|
"logits/rejected": -2.9834041595458984, |
|
"logps/chosen": -293.59716796875, |
|
"logps/rejected": -229.48092651367188, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6189619302749634, |
|
"rewards/margins": 0.7833685874938965, |
|
"rewards/rejected": -1.4023306369781494, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -2.9907360076904297, |
|
"logits/rejected": -2.963219404220581, |
|
"logps/chosen": -274.832275390625, |
|
"logps/rejected": -237.86355590820312, |
|
"loss": 0.2011, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7243114709854126, |
|
"rewards/margins": 0.5377382636070251, |
|
"rewards/rejected": -1.2620497941970825, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -2.9230685234069824, |
|
"logits/rejected": -2.88169264793396, |
|
"logps/chosen": -309.228515625, |
|
"logps/rejected": -246.1265411376953, |
|
"loss": -0.039, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5196012258529663, |
|
"rewards/margins": 1.0259116888046265, |
|
"rewards/rejected": -1.5455129146575928, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.9902000427246094, |
|
"logits/rejected": -2.9472696781158447, |
|
"logps/chosen": -300.9866027832031, |
|
"logps/rejected": -249.76412963867188, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6899394989013672, |
|
"rewards/margins": 0.7042897343635559, |
|
"rewards/rejected": -1.3942292928695679, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -3.0022895336151123, |
|
"logits/rejected": -2.973423719406128, |
|
"logps/chosen": -313.137939453125, |
|
"logps/rejected": -254.30709838867188, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5544772744178772, |
|
"rewards/margins": 1.1537253856658936, |
|
"rewards/rejected": -1.7082027196884155, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -2.976815700531006, |
|
"logits/rejected": -2.958930492401123, |
|
"logps/chosen": -285.1069641113281, |
|
"logps/rejected": -275.8247985839844, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5782260894775391, |
|
"rewards/margins": 0.8343210220336914, |
|
"rewards/rejected": -1.41254723072052, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.997415065765381, |
|
"logits/rejected": -2.95662784576416, |
|
"logps/chosen": -302.427734375, |
|
"logps/rejected": -215.8485565185547, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.4519409239292145, |
|
"rewards/margins": 1.185298204421997, |
|
"rewards/rejected": -1.6372392177581787, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -2.943821668624878, |
|
"logits/rejected": -2.9366466999053955, |
|
"logps/chosen": -267.79620361328125, |
|
"logps/rejected": -246.8135223388672, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6127734184265137, |
|
"rewards/margins": 0.7575459480285645, |
|
"rewards/rejected": -1.3703193664550781, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -2.941462278366089, |
|
"logits/rejected": -2.9433281421661377, |
|
"logps/chosen": -253.16256713867188, |
|
"logps/rejected": -241.6164093017578, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5904272198677063, |
|
"rewards/margins": 0.8854333758354187, |
|
"rewards/rejected": -1.475860595703125, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.990996837615967, |
|
"logits/rejected": -2.9396445751190186, |
|
"logps/chosen": -286.08404541015625, |
|
"logps/rejected": -246.9237823486328, |
|
"loss": 0.0619, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7161077260971069, |
|
"rewards/margins": 0.9890392422676086, |
|
"rewards/rejected": -1.7051467895507812, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -2.9665896892547607, |
|
"logits/rejected": -2.985443353652954, |
|
"logps/chosen": -273.56964111328125, |
|
"logps/rejected": -255.6343536376953, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7685881853103638, |
|
"rewards/margins": 0.8922961354255676, |
|
"rewards/rejected": -1.6608844995498657, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -2.993872880935669, |
|
"logits/rejected": -2.9838452339172363, |
|
"logps/chosen": -268.5338134765625, |
|
"logps/rejected": -235.5106658935547, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5740541219711304, |
|
"rewards/margins": 0.8445619344711304, |
|
"rewards/rejected": -1.4186161756515503, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.918458938598633, |
|
"logits/rejected": -2.924969434738159, |
|
"logps/chosen": -275.4915466308594, |
|
"logps/rejected": -230.2731170654297, |
|
"loss": -0.0004, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.717719316482544, |
|
"rewards/margins": 1.1288950443267822, |
|
"rewards/rejected": -1.8466142416000366, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -3.0128183364868164, |
|
"logits/rejected": -3.0055766105651855, |
|
"logps/chosen": -273.6071472167969, |
|
"logps/rejected": -248.68997192382812, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7629455327987671, |
|
"rewards/margins": 1.0152510404586792, |
|
"rewards/rejected": -1.7781963348388672, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.983034372329712, |
|
"eval_logits/rejected": -2.960925817489624, |
|
"eval_logps/chosen": -277.86474609375, |
|
"eval_logps/rejected": -245.16693115234375, |
|
"eval_loss": 0.05700839310884476, |
|
"eval_rewards/accuracies": 0.6959999799728394, |
|
"eval_rewards/chosen": -0.6708189845085144, |
|
"eval_rewards/margins": 0.9967920184135437, |
|
"eval_rewards/rejected": -1.6676111221313477, |
|
"eval_runtime": 448.2745, |
|
"eval_samples_per_second": 4.462, |
|
"eval_steps_per_second": 0.279, |
|
"step": 1937 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -2.99873423576355, |
|
"logits/rejected": -2.9760866165161133, |
|
"logps/chosen": -270.5379943847656, |
|
"logps/rejected": -233.87255859375, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6196664571762085, |
|
"rewards/margins": 0.8855097889900208, |
|
"rewards/rejected": -1.505176305770874, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.972491502761841, |
|
"logits/rejected": -2.9654781818389893, |
|
"logps/chosen": -272.2441711425781, |
|
"logps/rejected": -247.496337890625, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8281705975532532, |
|
"rewards/margins": 0.8387929201126099, |
|
"rewards/rejected": -1.6669635772705078, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -2.9022774696350098, |
|
"logits/rejected": -2.8986656665802, |
|
"logps/chosen": -272.8119201660156, |
|
"logps/rejected": -286.4425964355469, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6966532468795776, |
|
"rewards/margins": 1.0244591236114502, |
|
"rewards/rejected": -1.7211124897003174, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -3.0008738040924072, |
|
"logits/rejected": -2.9414420127868652, |
|
"logps/chosen": -269.3557434082031, |
|
"logps/rejected": -228.98892211914062, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6135867238044739, |
|
"rewards/margins": 1.0139942169189453, |
|
"rewards/rejected": -1.627581000328064, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.97796893119812, |
|
"logits/rejected": -2.9451377391815186, |
|
"logps/chosen": -290.0522155761719, |
|
"logps/rejected": -247.7688446044922, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7555999755859375, |
|
"rewards/margins": 0.919529139995575, |
|
"rewards/rejected": -1.6751289367675781, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -2.9556636810302734, |
|
"logits/rejected": -2.9225146770477295, |
|
"logps/chosen": -281.0018615722656, |
|
"logps/rejected": -257.39471435546875, |
|
"loss": -0.0304, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7463997006416321, |
|
"rewards/margins": 0.9945909380912781, |
|
"rewards/rejected": -1.7409906387329102, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -2.9892733097076416, |
|
"logits/rejected": -2.9609320163726807, |
|
"logps/chosen": -265.5694885253906, |
|
"logps/rejected": -227.8389434814453, |
|
"loss": 0.068, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8352034687995911, |
|
"rewards/margins": 1.0397050380706787, |
|
"rewards/rejected": -1.874908685684204, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.9671616554260254, |
|
"logits/rejected": -2.937986373901367, |
|
"logps/chosen": -281.4359436035156, |
|
"logps/rejected": -241.79080200195312, |
|
"loss": -0.0155, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6781337857246399, |
|
"rewards/margins": 1.3058927059173584, |
|
"rewards/rejected": -1.984026312828064, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -2.9833037853240967, |
|
"logits/rejected": -2.943441152572632, |
|
"logps/chosen": -295.3065490722656, |
|
"logps/rejected": -253.66360473632812, |
|
"loss": 0.0664, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7511407136917114, |
|
"rewards/margins": 0.9544817209243774, |
|
"rewards/rejected": -1.7056224346160889, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -2.9694552421569824, |
|
"logits/rejected": -2.9318954944610596, |
|
"logps/chosen": -254.4512939453125, |
|
"logps/rejected": -235.69595336914062, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7740479111671448, |
|
"rewards/margins": 0.9885379672050476, |
|
"rewards/rejected": -1.762585997581482, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.9927315711975098, |
|
"logits/rejected": -2.9617929458618164, |
|
"logps/chosen": -286.9222412109375, |
|
"logps/rejected": -262.54559326171875, |
|
"loss": -0.0387, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7180510759353638, |
|
"rewards/margins": 1.0184663534164429, |
|
"rewards/rejected": -1.7365175485610962, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -2.9783358573913574, |
|
"logits/rejected": -2.9684910774230957, |
|
"logps/chosen": -288.4617614746094, |
|
"logps/rejected": -269.187255859375, |
|
"loss": -0.0243, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.75580233335495, |
|
"rewards/margins": 0.9117294549942017, |
|
"rewards/rejected": -1.6675317287445068, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -3.0292530059814453, |
|
"logits/rejected": -2.9775586128234863, |
|
"logps/chosen": -298.93585205078125, |
|
"logps/rejected": -247.0900115966797, |
|
"loss": -0.0146, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6352591514587402, |
|
"rewards/margins": 1.129183053970337, |
|
"rewards/rejected": -1.7644420862197876, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -3.0193440914154053, |
|
"logits/rejected": -2.99593186378479, |
|
"logps/chosen": -263.82464599609375, |
|
"logps/rejected": -277.60418701171875, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6851625442504883, |
|
"rewards/margins": 0.9985010027885437, |
|
"rewards/rejected": -1.6836636066436768, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -3.0217273235321045, |
|
"logits/rejected": -2.970132350921631, |
|
"logps/chosen": -307.5477600097656, |
|
"logps/rejected": -248.69448852539062, |
|
"loss": -0.024, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.65143883228302, |
|
"rewards/margins": 1.1803841590881348, |
|
"rewards/rejected": -1.8318227529525757, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -2.931065559387207, |
|
"logits/rejected": -2.9219722747802734, |
|
"logps/chosen": -276.0883483886719, |
|
"logps/rejected": -247.0487060546875, |
|
"loss": -0.052, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.64451664686203, |
|
"rewards/margins": 1.1405279636383057, |
|
"rewards/rejected": -1.7850444316864014, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -3.0177485942840576, |
|
"logits/rejected": -3.0004374980926514, |
|
"logps/chosen": -263.8681640625, |
|
"logps/rejected": -226.38119506835938, |
|
"loss": -0.047, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8407020568847656, |
|
"rewards/margins": 1.1751278638839722, |
|
"rewards/rejected": -2.0158302783966064, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -2.9868111610412598, |
|
"logits/rejected": -2.961195468902588, |
|
"logps/chosen": -294.43402099609375, |
|
"logps/rejected": -237.2691650390625, |
|
"loss": -0.1127, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.678774356842041, |
|
"rewards/margins": 1.201111078262329, |
|
"rewards/rejected": -1.8798853158950806, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -3.0007481575012207, |
|
"logits/rejected": -2.9801931381225586, |
|
"logps/chosen": -290.3331298828125, |
|
"logps/rejected": -263.9862976074219, |
|
"loss": -0.071, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.908436119556427, |
|
"rewards/margins": 0.9944915771484375, |
|
"rewards/rejected": -1.9029273986816406, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -3.011329174041748, |
|
"logits/rejected": -2.980834484100342, |
|
"logps/chosen": -259.76031494140625, |
|
"logps/rejected": -239.0861053466797, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.774993360042572, |
|
"rewards/margins": 1.0526189804077148, |
|
"rewards/rejected": -1.8276125192642212, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -2.9631924629211426, |
|
"logits/rejected": -2.954556703567505, |
|
"logps/chosen": -248.605224609375, |
|
"logps/rejected": -215.9401397705078, |
|
"loss": -0.0457, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8637608289718628, |
|
"rewards/margins": 1.0648829936981201, |
|
"rewards/rejected": -1.9286441802978516, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -2.950859308242798, |
|
"logits/rejected": -2.961940288543701, |
|
"logps/chosen": -328.16314697265625, |
|
"logps/rejected": -277.8406982421875, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7997218370437622, |
|
"rewards/margins": 1.051257848739624, |
|
"rewards/rejected": -1.8509798049926758, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -3.0021846294403076, |
|
"logits/rejected": -2.9544687271118164, |
|
"logps/chosen": -284.53570556640625, |
|
"logps/rejected": -254.57559204101562, |
|
"loss": -0.1516, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8068240880966187, |
|
"rewards/margins": 1.2634921073913574, |
|
"rewards/rejected": -2.0703163146972656, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -2.968425750732422, |
|
"logits/rejected": -2.9926857948303223, |
|
"logps/chosen": -295.5940246582031, |
|
"logps/rejected": -265.8125, |
|
"loss": -0.1379, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7207796573638916, |
|
"rewards/margins": 1.3109276294708252, |
|
"rewards/rejected": -2.0317070484161377, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -3.0091452598571777, |
|
"logits/rejected": -2.9704108238220215, |
|
"logps/chosen": -298.40948486328125, |
|
"logps/rejected": -241.05337524414062, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9363592863082886, |
|
"rewards/margins": 0.984167754650116, |
|
"rewards/rejected": -1.9205271005630493, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.924776554107666, |
|
"logits/rejected": -2.920172691345215, |
|
"logps/chosen": -259.19580078125, |
|
"logps/rejected": -247.6489715576172, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8932672739028931, |
|
"rewards/margins": 0.9966402053833008, |
|
"rewards/rejected": -1.8899074792861938, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -2.9807791709899902, |
|
"logits/rejected": -2.9884274005889893, |
|
"logps/chosen": -244.3290252685547, |
|
"logps/rejected": -231.4929656982422, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9376288652420044, |
|
"rewards/margins": 1.005837082862854, |
|
"rewards/rejected": -1.9434658288955688, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -2.9636523723602295, |
|
"logits/rejected": -2.898196220397949, |
|
"logps/chosen": -302.7827453613281, |
|
"logps/rejected": -254.11367797851562, |
|
"loss": -0.0107, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0612095594406128, |
|
"rewards/margins": 0.9029472470283508, |
|
"rewards/rejected": -1.9641568660736084, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.962622880935669, |
|
"logits/rejected": -2.9335360527038574, |
|
"logps/chosen": -321.44342041015625, |
|
"logps/rejected": -250.2067108154297, |
|
"loss": -0.2537, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8546531796455383, |
|
"rewards/margins": 1.267183780670166, |
|
"rewards/rejected": -2.1218371391296387, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -2.961509943008423, |
|
"logits/rejected": -2.923830509185791, |
|
"logps/chosen": -294.05279541015625, |
|
"logps/rejected": -256.3243103027344, |
|
"loss": -0.0748, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8142738342285156, |
|
"rewards/margins": 1.3003737926483154, |
|
"rewards/rejected": -2.114647626876831, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -2.9922921657562256, |
|
"logits/rejected": -2.9666550159454346, |
|
"logps/chosen": -281.14788818359375, |
|
"logps/rejected": -261.50445556640625, |
|
"loss": -0.103, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.889809787273407, |
|
"rewards/margins": 1.2682868242263794, |
|
"rewards/rejected": -2.1580967903137207, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -3.009636878967285, |
|
"logits/rejected": -2.9912712574005127, |
|
"logps/chosen": -262.1191101074219, |
|
"logps/rejected": -254.8567352294922, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9876982569694519, |
|
"rewards/margins": 0.9436071515083313, |
|
"rewards/rejected": -1.9313055276870728, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -3.0256762504577637, |
|
"logits/rejected": -3.006740093231201, |
|
"logps/chosen": -309.2908630371094, |
|
"logps/rejected": -252.59323120117188, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9725021123886108, |
|
"rewards/margins": 1.1422688961029053, |
|
"rewards/rejected": -2.1147711277008057, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -2.9900639057159424, |
|
"logits/rejected": -2.9898974895477295, |
|
"logps/chosen": -299.51177978515625, |
|
"logps/rejected": -267.8308410644531, |
|
"loss": -0.1218, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0168074369430542, |
|
"rewards/margins": 1.148054838180542, |
|
"rewards/rejected": -2.1648621559143066, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.986194610595703, |
|
"logits/rejected": -2.9787726402282715, |
|
"logps/chosen": -285.0082702636719, |
|
"logps/rejected": -241.463134765625, |
|
"loss": -0.0902, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0312423706054688, |
|
"rewards/margins": 1.1112394332885742, |
|
"rewards/rejected": -2.142481803894043, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -2.949153184890747, |
|
"logits/rejected": -2.939624786376953, |
|
"logps/chosen": -294.7286682128906, |
|
"logps/rejected": -233.32864379882812, |
|
"loss": -0.2292, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8459509015083313, |
|
"rewards/margins": 1.125945806503296, |
|
"rewards/rejected": -1.9718964099884033, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -3.0164811611175537, |
|
"logits/rejected": -3.015223741531372, |
|
"logps/chosen": -283.99652099609375, |
|
"logps/rejected": -248.39602661132812, |
|
"loss": -0.175, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.068522334098816, |
|
"rewards/margins": 0.9977639317512512, |
|
"rewards/rejected": -2.066286325454712, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.9585719108581543, |
|
"logits/rejected": -2.9644482135772705, |
|
"logps/chosen": -282.3656921386719, |
|
"logps/rejected": -257.11041259765625, |
|
"loss": -0.04, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0202432870864868, |
|
"rewards/margins": 1.0421122312545776, |
|
"rewards/rejected": -2.0623555183410645, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -2.976578712463379, |
|
"logits/rejected": -2.958589792251587, |
|
"logps/chosen": -243.4874725341797, |
|
"logps/rejected": -247.5500030517578, |
|
"loss": -0.1438, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.131103277206421, |
|
"rewards/margins": 1.045607328414917, |
|
"rewards/rejected": -2.176710605621338, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -2.966090440750122, |
|
"logits/rejected": -2.963219165802002, |
|
"logps/chosen": -265.3279113769531, |
|
"logps/rejected": -245.90011596679688, |
|
"loss": -0.3008, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1297423839569092, |
|
"rewards/margins": 1.0481914281845093, |
|
"rewards/rejected": -2.177933931350708, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.8891921043395996, |
|
"logits/rejected": -2.8980748653411865, |
|
"logps/chosen": -245.95260620117188, |
|
"logps/rejected": -274.68511962890625, |
|
"loss": -0.0161, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.029191255569458, |
|
"rewards/margins": 1.0524277687072754, |
|
"rewards/rejected": -2.0816187858581543, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -2.954099178314209, |
|
"logits/rejected": -2.9609456062316895, |
|
"logps/chosen": -305.5159606933594, |
|
"logps/rejected": -262.00189208984375, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0684045553207397, |
|
"rewards/margins": 1.0408602952957153, |
|
"rewards/rejected": -2.109265089035034, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -2.95751690864563, |
|
"logits/rejected": -2.9540932178497314, |
|
"logps/chosen": -272.47802734375, |
|
"logps/rejected": -252.9427490234375, |
|
"loss": -0.1484, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9607385396957397, |
|
"rewards/margins": 1.1778024435043335, |
|
"rewards/rejected": -2.1385409832000732, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.9053263664245605, |
|
"logits/rejected": -2.8971660137176514, |
|
"logps/chosen": -288.0937805175781, |
|
"logps/rejected": -249.78091430664062, |
|
"loss": -0.0786, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9809226989746094, |
|
"rewards/margins": 1.1126328706741333, |
|
"rewards/rejected": -2.0935556888580322, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -2.902618169784546, |
|
"logits/rejected": -2.8822264671325684, |
|
"logps/chosen": -282.31939697265625, |
|
"logps/rejected": -234.1844940185547, |
|
"loss": -0.0294, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0446959733963013, |
|
"rewards/margins": 1.026447057723999, |
|
"rewards/rejected": -2.0711429119110107, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -2.979097366333008, |
|
"logits/rejected": -2.939335584640503, |
|
"logps/chosen": -292.04156494140625, |
|
"logps/rejected": -249.23880004882812, |
|
"loss": -0.0569, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1423760652542114, |
|
"rewards/margins": 1.2263530492782593, |
|
"rewards/rejected": -2.3687291145324707, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.9777472019195557, |
|
"logits/rejected": -2.9249491691589355, |
|
"logps/chosen": -300.14605712890625, |
|
"logps/rejected": -261.2189025878906, |
|
"loss": -0.1677, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.053171992301941, |
|
"rewards/margins": 1.085288166999817, |
|
"rewards/rejected": -2.1384599208831787, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -2.9415881633758545, |
|
"logits/rejected": -2.920991897583008, |
|
"logps/chosen": -283.33758544921875, |
|
"logps/rejected": -262.96820068359375, |
|
"loss": -0.1276, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8991168737411499, |
|
"rewards/margins": 1.2601044178009033, |
|
"rewards/rejected": -2.1592211723327637, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -2.9756853580474854, |
|
"logits/rejected": -2.946068525314331, |
|
"logps/chosen": -280.66241455078125, |
|
"logps/rejected": -240.4520263671875, |
|
"loss": -0.126, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0988253355026245, |
|
"rewards/margins": 0.9154653549194336, |
|
"rewards/rejected": -2.0142908096313477, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.981102466583252, |
|
"logits/rejected": -2.961693286895752, |
|
"logps/chosen": -277.573974609375, |
|
"logps/rejected": -287.72515869140625, |
|
"loss": -0.0153, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1206835508346558, |
|
"rewards/margins": 1.1004483699798584, |
|
"rewards/rejected": -2.2211318016052246, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -2.969409465789795, |
|
"logits/rejected": -2.9719467163085938, |
|
"logps/chosen": -269.2553405761719, |
|
"logps/rejected": -276.8422546386719, |
|
"loss": -0.2463, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0086815357208252, |
|
"rewards/margins": 1.2824007272720337, |
|
"rewards/rejected": -2.2910819053649902, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -2.9511923789978027, |
|
"logits/rejected": -2.945295810699463, |
|
"logps/chosen": -259.01507568359375, |
|
"logps/rejected": -261.44781494140625, |
|
"loss": -0.1105, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.238194465637207, |
|
"rewards/margins": 0.8837486505508423, |
|
"rewards/rejected": -2.1219429969787598, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.9451937675476074, |
|
"logits/rejected": -2.9014077186584473, |
|
"logps/chosen": -306.05902099609375, |
|
"logps/rejected": -251.77072143554688, |
|
"loss": -0.0642, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9629167318344116, |
|
"rewards/margins": 1.1698607206344604, |
|
"rewards/rejected": -2.132777452468872, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -2.968233108520508, |
|
"logits/rejected": -2.9597315788269043, |
|
"logps/chosen": -272.21258544921875, |
|
"logps/rejected": -263.3721008300781, |
|
"loss": -0.1204, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0844926834106445, |
|
"rewards/margins": 1.1088446378707886, |
|
"rewards/rejected": -2.1933372020721436, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -3.0234591960906982, |
|
"logits/rejected": -2.9642250537872314, |
|
"logps/chosen": -313.2720947265625, |
|
"logps/rejected": -251.24020385742188, |
|
"loss": -0.1713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7700778245925903, |
|
"rewards/margins": 1.3790977001190186, |
|
"rewards/rejected": -2.1491756439208984, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.968733549118042, |
|
"logits/rejected": -2.9718379974365234, |
|
"logps/chosen": -277.2424621582031, |
|
"logps/rejected": -242.42172241210938, |
|
"loss": -0.1563, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.095190167427063, |
|
"rewards/margins": 1.008971095085144, |
|
"rewards/rejected": -2.104161262512207, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -2.9798028469085693, |
|
"logits/rejected": -2.9650533199310303, |
|
"logps/chosen": -290.0262756347656, |
|
"logps/rejected": -264.9449157714844, |
|
"loss": -0.2883, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0747302770614624, |
|
"rewards/margins": 1.2478855848312378, |
|
"rewards/rejected": -2.3226161003112793, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -2.945732593536377, |
|
"logits/rejected": -2.94990611076355, |
|
"logps/chosen": -305.3632507324219, |
|
"logps/rejected": -274.59722900390625, |
|
"loss": -0.2619, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0448615550994873, |
|
"rewards/margins": 1.2908263206481934, |
|
"rewards/rejected": -2.3356876373291016, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.9896063804626465, |
|
"logits/rejected": -2.9576222896575928, |
|
"logps/chosen": -272.3258361816406, |
|
"logps/rejected": -214.65298461914062, |
|
"loss": -0.1643, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9911131858825684, |
|
"rewards/margins": 1.24199640750885, |
|
"rewards/rejected": -2.23310923576355, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -2.9311654567718506, |
|
"logits/rejected": -2.9140172004699707, |
|
"logps/chosen": -268.74444580078125, |
|
"logps/rejected": -236.22988891601562, |
|
"loss": -0.0763, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.111546277999878, |
|
"rewards/margins": 1.021420955657959, |
|
"rewards/rejected": -2.132966995239258, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -2.9497828483581543, |
|
"logits/rejected": -2.9311363697052, |
|
"logps/chosen": -305.40240478515625, |
|
"logps/rejected": -264.26177978515625, |
|
"loss": -0.1631, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7808889746665955, |
|
"rewards/margins": 1.4971510171890259, |
|
"rewards/rejected": -2.2780401706695557, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.8948826789855957, |
|
"logits/rejected": -2.8769936561584473, |
|
"logps/chosen": -272.2402648925781, |
|
"logps/rejected": -246.4182891845703, |
|
"loss": -0.2377, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8501654863357544, |
|
"rewards/margins": 1.4621152877807617, |
|
"rewards/rejected": -2.3122806549072266, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -2.9354665279388428, |
|
"logits/rejected": -2.8916828632354736, |
|
"logps/chosen": -272.43682861328125, |
|
"logps/rejected": -274.33819580078125, |
|
"loss": -0.0427, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1567806005477905, |
|
"rewards/margins": 0.9909998178482056, |
|
"rewards/rejected": -2.147780179977417, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -2.9822142124176025, |
|
"logits/rejected": -2.982553482055664, |
|
"logps/chosen": -282.8048400878906, |
|
"logps/rejected": -264.55401611328125, |
|
"loss": -0.2413, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2675011157989502, |
|
"rewards/margins": 1.1336963176727295, |
|
"rewards/rejected": -2.4011974334716797, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.946916103363037, |
|
"logits/rejected": -2.921715497970581, |
|
"logps/chosen": -295.14581298828125, |
|
"logps/rejected": -264.893798828125, |
|
"loss": -0.1178, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0069832801818848, |
|
"rewards/margins": 1.1966971158981323, |
|
"rewards/rejected": -2.2036805152893066, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -2.996048927307129, |
|
"logits/rejected": -2.9277799129486084, |
|
"logps/chosen": -288.4164733886719, |
|
"logps/rejected": -240.5204315185547, |
|
"loss": -0.1271, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2253119945526123, |
|
"rewards/margins": 0.9576913714408875, |
|
"rewards/rejected": -2.1830034255981445, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -2.979665756225586, |
|
"logits/rejected": -3.006110668182373, |
|
"logps/chosen": -309.9973449707031, |
|
"logps/rejected": -266.8681945800781, |
|
"loss": -0.1954, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.072455644607544, |
|
"rewards/margins": 1.285042643547058, |
|
"rewards/rejected": -2.3574986457824707, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.995903968811035, |
|
"logits/rejected": -2.985555410385132, |
|
"logps/chosen": -255.1632537841797, |
|
"logps/rejected": -262.6217346191406, |
|
"loss": -0.1921, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.135871171951294, |
|
"rewards/margins": 1.1645699739456177, |
|
"rewards/rejected": -2.300441026687622, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -2.947361946105957, |
|
"logits/rejected": -2.9222099781036377, |
|
"logps/chosen": -283.39483642578125, |
|
"logps/rejected": -236.3388214111328, |
|
"loss": -0.2522, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0431753396987915, |
|
"rewards/margins": 1.4626045227050781, |
|
"rewards/rejected": -2.50577974319458, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -2.934537649154663, |
|
"logits/rejected": -2.903463840484619, |
|
"logps/chosen": -297.0821228027344, |
|
"logps/rejected": -241.7076416015625, |
|
"loss": -0.1882, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.178007960319519, |
|
"rewards/margins": 1.0751577615737915, |
|
"rewards/rejected": -2.2531659603118896, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.9361720085144043, |
|
"logits/rejected": -2.882931709289551, |
|
"logps/chosen": -270.83148193359375, |
|
"logps/rejected": -255.24337768554688, |
|
"loss": -0.1525, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2923060655593872, |
|
"rewards/margins": 1.1520329713821411, |
|
"rewards/rejected": -2.444338798522949, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -2.9840073585510254, |
|
"logits/rejected": -2.9296116828918457, |
|
"logps/chosen": -283.15643310546875, |
|
"logps/rejected": -250.57608032226562, |
|
"loss": -0.1079, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0855220556259155, |
|
"rewards/margins": 1.3181664943695068, |
|
"rewards/rejected": -2.4036881923675537, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -2.9590346813201904, |
|
"logits/rejected": -2.9196345806121826, |
|
"logps/chosen": -256.04315185546875, |
|
"logps/rejected": -279.31787109375, |
|
"loss": -0.1673, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3440836668014526, |
|
"rewards/margins": 1.061953067779541, |
|
"rewards/rejected": -2.406036853790283, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.9351706504821777, |
|
"logits/rejected": -2.9165854454040527, |
|
"logps/chosen": -302.4515075683594, |
|
"logps/rejected": -260.8388366699219, |
|
"loss": -0.4567, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1327465772628784, |
|
"rewards/margins": 1.2759268283843994, |
|
"rewards/rejected": -2.4086735248565674, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -3.013035297393799, |
|
"logits/rejected": -2.989163398742676, |
|
"logps/chosen": -270.7334899902344, |
|
"logps/rejected": -254.8367156982422, |
|
"loss": -0.1713, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0498313903808594, |
|
"rewards/margins": 1.175312876701355, |
|
"rewards/rejected": -2.225144147872925, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -2.880460023880005, |
|
"logits/rejected": -2.861250400543213, |
|
"logps/chosen": -269.2752990722656, |
|
"logps/rejected": -265.5714111328125, |
|
"loss": -0.0863, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1616452932357788, |
|
"rewards/margins": 1.2222042083740234, |
|
"rewards/rejected": -2.383849620819092, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.9040732383728027, |
|
"logits/rejected": -2.920821189880371, |
|
"logps/chosen": -267.18621826171875, |
|
"logps/rejected": -227.2675018310547, |
|
"loss": -0.3545, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.007155179977417, |
|
"rewards/margins": 1.408747911453247, |
|
"rewards/rejected": -2.415903091430664, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -2.9555585384368896, |
|
"logits/rejected": -2.891514778137207, |
|
"logps/chosen": -293.35809326171875, |
|
"logps/rejected": -241.7179412841797, |
|
"loss": -0.0836, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3558762073516846, |
|
"rewards/margins": 0.7381645441055298, |
|
"rewards/rejected": -2.094040632247925, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -2.9766342639923096, |
|
"logits/rejected": -2.9516689777374268, |
|
"logps/chosen": -288.6914978027344, |
|
"logps/rejected": -282.8217468261719, |
|
"loss": -0.079, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.2634086608886719, |
|
"rewards/margins": 0.9370874166488647, |
|
"rewards/rejected": -2.200496196746826, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.9203312397003174, |
|
"logits/rejected": -2.923161268234253, |
|
"logps/chosen": -278.013916015625, |
|
"logps/rejected": -267.54168701171875, |
|
"loss": -0.1356, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0634254217147827, |
|
"rewards/margins": 1.14893639087677, |
|
"rewards/rejected": -2.212362051010132, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -3.015432834625244, |
|
"logits/rejected": -2.9883949756622314, |
|
"logps/chosen": -306.4672546386719, |
|
"logps/rejected": -284.0825500488281, |
|
"loss": -0.0924, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2742321491241455, |
|
"rewards/margins": 1.3397479057312012, |
|
"rewards/rejected": -2.6139800548553467, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -2.9456729888916016, |
|
"logits/rejected": -2.9101481437683105, |
|
"logps/chosen": -281.6629638671875, |
|
"logps/rejected": -247.2931671142578, |
|
"loss": -0.2122, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8153074383735657, |
|
"rewards/margins": 1.5913559198379517, |
|
"rewards/rejected": -2.406663417816162, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.8928332328796387, |
|
"logits/rejected": -2.8488776683807373, |
|
"logps/chosen": -262.3460693359375, |
|
"logps/rejected": -253.6611328125, |
|
"loss": -0.3582, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2655181884765625, |
|
"rewards/margins": 1.1255066394805908, |
|
"rewards/rejected": -2.3910248279571533, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -2.9541738033294678, |
|
"logits/rejected": -2.9483609199523926, |
|
"logps/chosen": -281.6482849121094, |
|
"logps/rejected": -263.13116455078125, |
|
"loss": -0.2229, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.135956048965454, |
|
"rewards/margins": 1.4537389278411865, |
|
"rewards/rejected": -2.5896952152252197, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -2.96710467338562, |
|
"logits/rejected": -2.935039520263672, |
|
"logps/chosen": -314.2068176269531, |
|
"logps/rejected": -288.13592529296875, |
|
"loss": -0.0912, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2961633205413818, |
|
"rewards/margins": 1.0811083316802979, |
|
"rewards/rejected": -2.3772716522216797, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.9459080696105957, |
|
"logits/rejected": -2.89158296585083, |
|
"logps/chosen": -286.0101318359375, |
|
"logps/rejected": -239.49160766601562, |
|
"loss": -0.115, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2027372121810913, |
|
"rewards/margins": 1.0859975814819336, |
|
"rewards/rejected": -2.2887349128723145, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -2.9392828941345215, |
|
"logits/rejected": -2.9443650245666504, |
|
"logps/chosen": -291.0428771972656, |
|
"logps/rejected": -242.24203491210938, |
|
"loss": -0.2681, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.150545358657837, |
|
"rewards/margins": 1.183664083480835, |
|
"rewards/rejected": -2.334209680557251, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -2.936471462249756, |
|
"logits/rejected": -2.891524076461792, |
|
"logps/chosen": -287.06024169921875, |
|
"logps/rejected": -288.4886169433594, |
|
"loss": -0.2539, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1829683780670166, |
|
"rewards/margins": 1.063999891281128, |
|
"rewards/rejected": -2.2469685077667236, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.9530060291290283, |
|
"logits/rejected": -2.9200854301452637, |
|
"logps/chosen": -283.59716796875, |
|
"logps/rejected": -236.90316772460938, |
|
"loss": -0.1698, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1723743677139282, |
|
"rewards/margins": 1.1659984588623047, |
|
"rewards/rejected": -2.3383727073669434, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -2.9582297801971436, |
|
"logits/rejected": -2.9348087310791016, |
|
"logps/chosen": -269.26531982421875, |
|
"logps/rejected": -241.8841094970703, |
|
"loss": -0.3465, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1937872171401978, |
|
"rewards/margins": 1.0226060152053833, |
|
"rewards/rejected": -2.216393232345581, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -2.985964775085449, |
|
"logits/rejected": -2.9667985439300537, |
|
"logps/chosen": -297.0205078125, |
|
"logps/rejected": -251.09805297851562, |
|
"loss": -0.1413, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.407981276512146, |
|
"rewards/margins": 0.9606062173843384, |
|
"rewards/rejected": -2.3685877323150635, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.9180400371551514, |
|
"logits/rejected": -2.9097437858581543, |
|
"logps/chosen": -291.8419494628906, |
|
"logps/rejected": -265.8045349121094, |
|
"loss": -0.116, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1430332660675049, |
|
"rewards/margins": 1.2068586349487305, |
|
"rewards/rejected": -2.3498916625976562, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -2.9281063079833984, |
|
"logits/rejected": -2.8641605377197266, |
|
"logps/chosen": -270.81256103515625, |
|
"logps/rejected": -246.7694549560547, |
|
"loss": -0.2278, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.335326910018921, |
|
"rewards/margins": 1.1297729015350342, |
|
"rewards/rejected": -2.465099811553955, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -2.948106050491333, |
|
"logits/rejected": -2.921739339828491, |
|
"logps/chosen": -276.6488952636719, |
|
"logps/rejected": -248.07608032226562, |
|
"loss": -0.1698, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1927988529205322, |
|
"rewards/margins": 1.302239179611206, |
|
"rewards/rejected": -2.4950382709503174, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.9207305908203125, |
|
"logits/rejected": -2.8921730518341064, |
|
"logps/chosen": -288.192138671875, |
|
"logps/rejected": -244.62954711914062, |
|
"loss": -0.1019, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.206674337387085, |
|
"rewards/margins": 0.8675975799560547, |
|
"rewards/rejected": -2.0742716789245605, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -3.0061116218566895, |
|
"logits/rejected": -2.972447156906128, |
|
"logps/chosen": -306.70977783203125, |
|
"logps/rejected": -261.82244873046875, |
|
"loss": -0.1622, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2506418228149414, |
|
"rewards/margins": 1.2404968738555908, |
|
"rewards/rejected": -2.4911389350891113, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -2.974818706512451, |
|
"logits/rejected": -2.963379383087158, |
|
"logps/chosen": -263.7333679199219, |
|
"logps/rejected": -262.7867126464844, |
|
"loss": -0.2602, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.150649070739746, |
|
"rewards/margins": 1.2455689907073975, |
|
"rewards/rejected": -2.3962180614471436, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.965512990951538, |
|
"eval_logits/rejected": -2.9399757385253906, |
|
"eval_logps/chosen": -282.7847900390625, |
|
"eval_logps/rejected": -252.9479217529297, |
|
"eval_loss": -0.203842431306839, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": -1.1628247499465942, |
|
"eval_rewards/margins": 1.2828813791275024, |
|
"eval_rewards/rejected": -2.4457061290740967, |
|
"eval_runtime": 446.4813, |
|
"eval_samples_per_second": 4.479, |
|
"eval_steps_per_second": 0.28, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.36701411283100355, |
|
"train_runtime": 84636.1866, |
|
"train_samples_per_second": 2.196, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|