|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": 0.5248222947120667, |
|
"logits/rejected": 0.7921571731567383, |
|
"logps/chosen": -341.40020751953125, |
|
"logps/rejected": -250.28689575195312, |
|
"loss": 2500.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": 0.6701858043670654, |
|
"logits/rejected": 0.7214743494987488, |
|
"logps/chosen": -282.2621765136719, |
|
"logps/rejected": -253.0035858154297, |
|
"loss": 2503.3785, |
|
"rewards/accuracies": 0.3472222089767456, |
|
"rewards/chosen": -0.0003065296623390168, |
|
"rewards/margins": -0.0004795099375769496, |
|
"rewards/rejected": 0.00017298036254942417, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": 0.6246625781059265, |
|
"logits/rejected": 0.6360182166099548, |
|
"logps/chosen": -314.952392578125, |
|
"logps/rejected": -267.18927001953125, |
|
"loss": 2498.9023, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0006705065025016665, |
|
"rewards/margins": -0.00035223033046349883, |
|
"rewards/rejected": 0.0010227367747575045, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": 0.621583104133606, |
|
"logits/rejected": 0.6869794726371765, |
|
"logps/chosen": -260.4806213378906, |
|
"logps/rejected": -254.1117401123047, |
|
"loss": 2501.4164, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0003680586232803762, |
|
"rewards/margins": 8.779224299360067e-05, |
|
"rewards/rejected": -0.00045585090992972255, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": 0.6395634412765503, |
|
"logits/rejected": 0.7919565439224243, |
|
"logps/chosen": -240.0687255859375, |
|
"logps/rejected": -229.44149780273438, |
|
"loss": 2499.6219, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.001351092243567109, |
|
"rewards/margins": 0.0006501252064481378, |
|
"rewards/rejected": 0.0007009669207036495, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": 0.5841951370239258, |
|
"logits/rejected": 0.6311219334602356, |
|
"logps/chosen": -292.2647705078125, |
|
"logps/rejected": -244.11605834960938, |
|
"loss": 2499.3211, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0004217842943035066, |
|
"rewards/margins": -0.0002647504734341055, |
|
"rewards/rejected": -0.00015703373355790973, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": 0.5548856854438782, |
|
"logits/rejected": 0.6343160271644592, |
|
"logps/chosen": -250.15615844726562, |
|
"logps/rejected": -239.6744842529297, |
|
"loss": 2497.8898, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.00014420936349779367, |
|
"rewards/margins": -0.00012744043488055468, |
|
"rewards/rejected": -1.6768904970376752e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": 0.5806199312210083, |
|
"logits/rejected": 0.6849480867385864, |
|
"logps/chosen": -263.5242004394531, |
|
"logps/rejected": -242.7255096435547, |
|
"loss": 2495.5641, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0010010639671236277, |
|
"rewards/margins": 0.0013473776634782553, |
|
"rewards/rejected": -0.0023484418634325266, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": 0.6474049687385559, |
|
"logits/rejected": 0.6796275973320007, |
|
"logps/chosen": -265.99652099609375, |
|
"logps/rejected": -255.4057159423828, |
|
"loss": 2483.6129, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00029454095056280494, |
|
"rewards/margins": 0.0018218889599666, |
|
"rewards/rejected": -0.0015273483004420996, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": 0.6297029256820679, |
|
"logits/rejected": 0.6693249344825745, |
|
"logps/chosen": -259.20013427734375, |
|
"logps/rejected": -260.8564453125, |
|
"loss": 2468.257, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0004229500482324511, |
|
"rewards/margins": 0.0030678685288876295, |
|
"rewards/rejected": -0.002644918393343687, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": 0.6495063304901123, |
|
"logits/rejected": 0.6480900645256042, |
|
"logps/chosen": -265.02508544921875, |
|
"logps/rejected": -234.58029174804688, |
|
"loss": 2468.3684, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.000877298996783793, |
|
"rewards/margins": 0.0035102677065879107, |
|
"rewards/rejected": -0.0043875668197870255, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": 0.6897019147872925, |
|
"logits/rejected": 0.7566229701042175, |
|
"logps/chosen": -304.11102294921875, |
|
"logps/rejected": -264.30621337890625, |
|
"loss": 2465.5504, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.004195456858724356, |
|
"rewards/margins": 0.0027198302559554577, |
|
"rewards/rejected": -0.006915287580341101, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": 0.6134337186813354, |
|
"logits/rejected": 0.7231487035751343, |
|
"logps/chosen": -311.39984130859375, |
|
"logps/rejected": -257.10650634765625, |
|
"loss": 2428.1793, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0012777966912835836, |
|
"rewards/margins": 0.009219733066856861, |
|
"rewards/rejected": -0.010497529059648514, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": 0.7281027436256409, |
|
"logits/rejected": 0.7294681668281555, |
|
"logps/chosen": -287.9706115722656, |
|
"logps/rejected": -253.9458770751953, |
|
"loss": 2393.4254, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0035368993412703276, |
|
"rewards/margins": 0.01360202394425869, |
|
"rewards/rejected": -0.010065125301480293, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": 0.6975444555282593, |
|
"logits/rejected": 0.7464872598648071, |
|
"logps/chosen": -283.344970703125, |
|
"logps/rejected": -269.69134521484375, |
|
"loss": 2391.2451, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0033713714219629765, |
|
"rewards/margins": 0.01122850738465786, |
|
"rewards/rejected": -0.0145998764783144, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": 0.6442640423774719, |
|
"logits/rejected": 0.7055094838142395, |
|
"logps/chosen": -297.76904296875, |
|
"logps/rejected": -262.79010009765625, |
|
"loss": 2339.1273, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.006125102750957012, |
|
"rewards/margins": 0.016310054808855057, |
|
"rewards/rejected": -0.022435154765844345, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": 0.6234780550003052, |
|
"logits/rejected": 0.5896192789077759, |
|
"logps/chosen": -279.10498046875, |
|
"logps/rejected": -247.49490356445312, |
|
"loss": 2336.1186, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.01037096418440342, |
|
"rewards/margins": 0.019876617938280106, |
|
"rewards/rejected": -0.030247583985328674, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": 0.6194897890090942, |
|
"logits/rejected": 0.6363841891288757, |
|
"logps/chosen": -293.1652526855469, |
|
"logps/rejected": -242.565185546875, |
|
"loss": 2349.7029, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.013055374845862389, |
|
"rewards/margins": 0.02146710641682148, |
|
"rewards/rejected": -0.03452248126268387, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": 0.6403040885925293, |
|
"logits/rejected": 0.7126356363296509, |
|
"logps/chosen": -291.5748596191406, |
|
"logps/rejected": -251.09121704101562, |
|
"loss": 2273.3473, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.030034661293029785, |
|
"rewards/margins": 0.02929743006825447, |
|
"rewards/rejected": -0.05933208391070366, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": 0.7060214281082153, |
|
"logits/rejected": 0.7062759399414062, |
|
"logps/chosen": -251.7637939453125, |
|
"logps/rejected": -224.1781768798828, |
|
"loss": 2288.9881, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.039670929312705994, |
|
"rewards/margins": 0.025101035833358765, |
|
"rewards/rejected": -0.06477196514606476, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": 0.6065430045127869, |
|
"logits/rejected": 0.5599089860916138, |
|
"logps/chosen": -303.3594970703125, |
|
"logps/rejected": -281.65301513671875, |
|
"loss": 2323.0852, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.051525335758924484, |
|
"rewards/margins": 0.03378116711974144, |
|
"rewards/rejected": -0.08530650287866592, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": 0.6741048097610474, |
|
"logits/rejected": 0.7067887187004089, |
|
"logps/chosen": -256.2054138183594, |
|
"logps/rejected": -225.0240478515625, |
|
"loss": 2257.4254, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.05382019281387329, |
|
"rewards/margins": 0.03237619996070862, |
|
"rewards/rejected": -0.08619637787342072, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": 0.6408742070198059, |
|
"logits/rejected": 0.7459608316421509, |
|
"logps/chosen": -280.1336975097656, |
|
"logps/rejected": -261.680908203125, |
|
"loss": 2225.691, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06642502546310425, |
|
"rewards/margins": 0.03849685937166214, |
|
"rewards/rejected": -0.10492189228534698, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": 0.511016845703125, |
|
"logits/rejected": 0.6876882910728455, |
|
"logps/chosen": -295.7177429199219, |
|
"logps/rejected": -278.0430908203125, |
|
"loss": 2274.0725, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0953628420829773, |
|
"rewards/margins": 0.025789355859160423, |
|
"rewards/rejected": -0.12115219980478287, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": 0.6104953289031982, |
|
"logits/rejected": 0.6206714510917664, |
|
"logps/chosen": -269.36212158203125, |
|
"logps/rejected": -230.29348754882812, |
|
"loss": 2182.6342, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0956740602850914, |
|
"rewards/margins": 0.05557785555720329, |
|
"rewards/rejected": -0.1512519270181656, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": 0.5841513872146606, |
|
"logits/rejected": 0.7289382815361023, |
|
"logps/chosen": -297.09185791015625, |
|
"logps/rejected": -283.0509338378906, |
|
"loss": 2300.1652, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12446895986795425, |
|
"rewards/margins": 0.020662058144807816, |
|
"rewards/rejected": -0.14513102173805237, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": 0.594504177570343, |
|
"logits/rejected": 0.659439742565155, |
|
"logps/chosen": -318.9085693359375, |
|
"logps/rejected": -282.30548095703125, |
|
"loss": 2157.8906, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0681561678647995, |
|
"rewards/margins": 0.05495098978281021, |
|
"rewards/rejected": -0.12310715764760971, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": 0.660437822341919, |
|
"logits/rejected": 0.6371047496795654, |
|
"logps/chosen": -278.40045166015625, |
|
"logps/rejected": -276.32794189453125, |
|
"loss": 2100.2004, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0825929194688797, |
|
"rewards/margins": 0.06175379827618599, |
|
"rewards/rejected": -0.1443466991186142, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": 0.5346761345863342, |
|
"logits/rejected": 0.653354287147522, |
|
"logps/chosen": -274.6492919921875, |
|
"logps/rejected": -256.0135192871094, |
|
"loss": 2231.843, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.10598695278167725, |
|
"rewards/margins": 0.04181862622499466, |
|
"rewards/rejected": -0.1478056013584137, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": 0.6121161580085754, |
|
"logits/rejected": 0.6786028742790222, |
|
"logps/chosen": -291.69268798828125, |
|
"logps/rejected": -270.1987609863281, |
|
"loss": 2302.0896, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11571818590164185, |
|
"rewards/margins": 0.04277648404240608, |
|
"rewards/rejected": -0.15849466621875763, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": 0.6503465175628662, |
|
"logits/rejected": 0.6750337481498718, |
|
"logps/chosen": -297.166259765625, |
|
"logps/rejected": -264.58905029296875, |
|
"loss": 2290.2439, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1382652223110199, |
|
"rewards/margins": 0.050003498792648315, |
|
"rewards/rejected": -0.1882687211036682, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": 0.6111949682235718, |
|
"logits/rejected": 0.5789340734481812, |
|
"logps/chosen": -335.71209716796875, |
|
"logps/rejected": -271.94281005859375, |
|
"loss": 2030.3432, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1214783638715744, |
|
"rewards/margins": 0.07279713451862335, |
|
"rewards/rejected": -0.19427552819252014, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": 0.6168066263198853, |
|
"logits/rejected": 0.6404728293418884, |
|
"logps/chosen": -267.7217102050781, |
|
"logps/rejected": -254.76156616210938, |
|
"loss": 2180.659, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1399281919002533, |
|
"rewards/margins": 0.05871574953198433, |
|
"rewards/rejected": -0.19864396750926971, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": 0.6463326215744019, |
|
"logits/rejected": 0.6055666208267212, |
|
"logps/chosen": -288.9108581542969, |
|
"logps/rejected": -271.7781066894531, |
|
"loss": 2136.0844, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.18461689352989197, |
|
"rewards/margins": 0.058290112763643265, |
|
"rewards/rejected": -0.24290700256824493, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": 0.5913775563240051, |
|
"logits/rejected": 0.6087537407875061, |
|
"logps/chosen": -348.69317626953125, |
|
"logps/rejected": -276.8719787597656, |
|
"loss": 2095.2922, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.21791012585163116, |
|
"rewards/margins": 0.08299825340509415, |
|
"rewards/rejected": -0.3009083867073059, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": 0.5306932926177979, |
|
"logits/rejected": 0.5599890947341919, |
|
"logps/chosen": -277.52801513671875, |
|
"logps/rejected": -282.8912048339844, |
|
"loss": 2027.6721, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2483474463224411, |
|
"rewards/margins": 0.08130116015672684, |
|
"rewards/rejected": -0.32964861392974854, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": 0.5017831921577454, |
|
"logits/rejected": 0.6145971417427063, |
|
"logps/chosen": -247.94332885742188, |
|
"logps/rejected": -276.70562744140625, |
|
"loss": 2096.8201, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2660738229751587, |
|
"rewards/margins": 0.07246068120002747, |
|
"rewards/rejected": -0.33853450417518616, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": 0.5044198632240295, |
|
"logits/rejected": 0.610668957233429, |
|
"logps/chosen": -370.6680908203125, |
|
"logps/rejected": -315.87445068359375, |
|
"loss": 2240.1234, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2816161513328552, |
|
"rewards/margins": 0.0706692636013031, |
|
"rewards/rejected": -0.3522854149341583, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": 0.5787937045097351, |
|
"logits/rejected": 0.6844087839126587, |
|
"logps/chosen": -314.65386962890625, |
|
"logps/rejected": -279.57464599609375, |
|
"loss": 2000.0223, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.28157955408096313, |
|
"rewards/margins": 0.09683749079704285, |
|
"rewards/rejected": -0.3784170150756836, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": 0.5943303108215332, |
|
"logits/rejected": 0.5367528200149536, |
|
"logps/chosen": -326.36322021484375, |
|
"logps/rejected": -322.28851318359375, |
|
"loss": 2038.491, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.31988343596458435, |
|
"rewards/margins": 0.08327943086624146, |
|
"rewards/rejected": -0.4031628668308258, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": 0.4382709562778473, |
|
"logits/rejected": 0.5377568006515503, |
|
"logps/chosen": -268.5506286621094, |
|
"logps/rejected": -256.25457763671875, |
|
"loss": 2150.4953, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3451124131679535, |
|
"rewards/margins": 0.06852660328149796, |
|
"rewards/rejected": -0.41363900899887085, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": 0.5234228372573853, |
|
"logits/rejected": 0.5555657744407654, |
|
"logps/chosen": -306.6415100097656, |
|
"logps/rejected": -284.44061279296875, |
|
"loss": 2237.9328, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.3614524304866791, |
|
"rewards/margins": 0.05758042261004448, |
|
"rewards/rejected": -0.41903290152549744, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": 0.477716863155365, |
|
"logits/rejected": 0.5133184194564819, |
|
"logps/chosen": -334.1138916015625, |
|
"logps/rejected": -301.9080505371094, |
|
"loss": 2092.0188, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3627592921257019, |
|
"rewards/margins": 0.10072964429855347, |
|
"rewards/rejected": -0.463488906621933, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": 0.5422715544700623, |
|
"logits/rejected": 0.6406744122505188, |
|
"logps/chosen": -305.5355529785156, |
|
"logps/rejected": -282.97210693359375, |
|
"loss": 2014.3465, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.29455187916755676, |
|
"rewards/margins": 0.07915346324443817, |
|
"rewards/rejected": -0.3737053871154785, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": 0.43309369683265686, |
|
"logits/rejected": 0.5271438360214233, |
|
"logps/chosen": -287.7671813964844, |
|
"logps/rejected": -281.0647888183594, |
|
"loss": 2185.2459, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.34788942337036133, |
|
"rewards/margins": 0.05820406228303909, |
|
"rewards/rejected": -0.4060935080051422, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": 0.45388108491897583, |
|
"logits/rejected": 0.5083224177360535, |
|
"logps/chosen": -268.25958251953125, |
|
"logps/rejected": -266.2535400390625, |
|
"loss": 2137.6896, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3282170295715332, |
|
"rewards/margins": 0.05696592479944229, |
|
"rewards/rejected": -0.3851829469203949, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": 0.5513511896133423, |
|
"logits/rejected": 0.510746955871582, |
|
"logps/chosen": -370.2562255859375, |
|
"logps/rejected": -305.41265869140625, |
|
"loss": 2088.2188, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.31895995140075684, |
|
"rewards/margins": 0.08234255015850067, |
|
"rewards/rejected": -0.4013025164604187, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": 0.5287891626358032, |
|
"logits/rejected": 0.5082263350486755, |
|
"logps/chosen": -355.8836364746094, |
|
"logps/rejected": -318.6692810058594, |
|
"loss": 2073.2719, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.327432781457901, |
|
"rewards/margins": 0.10078072547912598, |
|
"rewards/rejected": -0.42821353673934937, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": 0.553450345993042, |
|
"logits/rejected": 0.5118339657783508, |
|
"logps/chosen": -307.2048645019531, |
|
"logps/rejected": -274.0884094238281, |
|
"loss": 2053.993, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3153889775276184, |
|
"rewards/margins": 0.08515409380197525, |
|
"rewards/rejected": -0.40054306387901306, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": 0.49377956986427307, |
|
"logits/rejected": 0.4585692286491394, |
|
"logps/chosen": -312.8728942871094, |
|
"logps/rejected": -320.9603271484375, |
|
"loss": 2090.7937, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3384076952934265, |
|
"rewards/margins": 0.10788736492395401, |
|
"rewards/rejected": -0.4462950825691223, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": 0.4848670959472656, |
|
"logits/rejected": 0.42902207374572754, |
|
"logps/chosen": -343.72515869140625, |
|
"logps/rejected": -293.16815185546875, |
|
"loss": 2090.8078, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.37585896253585815, |
|
"rewards/margins": 0.0655459612607956, |
|
"rewards/rejected": -0.44140490889549255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": 0.4169999957084656, |
|
"logits/rejected": 0.4936888813972473, |
|
"logps/chosen": -311.93963623046875, |
|
"logps/rejected": -320.00408935546875, |
|
"loss": 2092.8064, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.42008256912231445, |
|
"rewards/margins": 0.07488216459751129, |
|
"rewards/rejected": -0.49496474862098694, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": 0.46682390570640564, |
|
"logits/rejected": 0.4480462074279785, |
|
"logps/chosen": -331.2510070800781, |
|
"logps/rejected": -289.58355712890625, |
|
"loss": 2058.4975, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4290853440761566, |
|
"rewards/margins": 0.08971880376338959, |
|
"rewards/rejected": -0.5188041925430298, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": 0.4296157956123352, |
|
"logits/rejected": 0.4146192967891693, |
|
"logps/chosen": -331.16595458984375, |
|
"logps/rejected": -309.8785705566406, |
|
"loss": 2106.7164, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5114426612854004, |
|
"rewards/margins": 0.09367333352565765, |
|
"rewards/rejected": -0.6051160097122192, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": 0.5072197318077087, |
|
"logits/rejected": 0.5970960855484009, |
|
"logps/chosen": -322.4172668457031, |
|
"logps/rejected": -315.7408752441406, |
|
"loss": 2065.5746, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.49308618903160095, |
|
"rewards/margins": 0.08288516104221344, |
|
"rewards/rejected": -0.5759714245796204, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": 0.5679532289505005, |
|
"logits/rejected": 0.6028575897216797, |
|
"logps/chosen": -318.32794189453125, |
|
"logps/rejected": -275.52716064453125, |
|
"loss": 2036.5631, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5034439563751221, |
|
"rewards/margins": 0.1076350212097168, |
|
"rewards/rejected": -0.6110790371894836, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": 0.3957621455192566, |
|
"logits/rejected": 0.5071176290512085, |
|
"logps/chosen": -377.244873046875, |
|
"logps/rejected": -318.47821044921875, |
|
"loss": 2035.9066, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4797874093055725, |
|
"rewards/margins": 0.0891679972410202, |
|
"rewards/rejected": -0.5689553022384644, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": 0.5428576469421387, |
|
"logits/rejected": 0.5582025051116943, |
|
"logps/chosen": -325.265869140625, |
|
"logps/rejected": -279.44622802734375, |
|
"loss": 2099.3277, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4889600872993469, |
|
"rewards/margins": 0.08092018216848373, |
|
"rewards/rejected": -0.5698802471160889, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": 0.47033435106277466, |
|
"logits/rejected": 0.4538179039955139, |
|
"logps/chosen": -338.0190124511719, |
|
"logps/rejected": -284.2043151855469, |
|
"loss": 1990.1314, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5050845742225647, |
|
"rewards/margins": 0.07665327191352844, |
|
"rewards/rejected": -0.5817378163337708, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": 0.43608421087265015, |
|
"logits/rejected": 0.588505208492279, |
|
"logps/chosen": -355.6614685058594, |
|
"logps/rejected": -311.62603759765625, |
|
"loss": 1957.0082, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5208545327186584, |
|
"rewards/margins": 0.10252735763788223, |
|
"rewards/rejected": -0.6233818531036377, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": 0.5493937730789185, |
|
"logits/rejected": 0.5916265249252319, |
|
"logps/chosen": -346.2372131347656, |
|
"logps/rejected": -297.2783203125, |
|
"loss": 1937.0328, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4746910631656647, |
|
"rewards/margins": 0.10906670242547989, |
|
"rewards/rejected": -0.583757758140564, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": 0.5612093210220337, |
|
"logits/rejected": 0.5712814331054688, |
|
"logps/chosen": -299.03192138671875, |
|
"logps/rejected": -295.932373046875, |
|
"loss": 1899.6357, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.45807942748069763, |
|
"rewards/margins": 0.11209867149591446, |
|
"rewards/rejected": -0.5701780915260315, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": 0.4900333285331726, |
|
"logits/rejected": 0.6181058287620544, |
|
"logps/chosen": -345.901123046875, |
|
"logps/rejected": -315.23150634765625, |
|
"loss": 1829.7244, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.44543027877807617, |
|
"rewards/margins": 0.11854176223278046, |
|
"rewards/rejected": -0.563971996307373, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": 0.5171164274215698, |
|
"logits/rejected": 0.6000061631202698, |
|
"logps/chosen": -305.6175842285156, |
|
"logps/rejected": -288.61529541015625, |
|
"loss": 1928.8063, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.41954368352890015, |
|
"rewards/margins": 0.10498888790607452, |
|
"rewards/rejected": -0.5245326161384583, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": 0.5002217292785645, |
|
"logits/rejected": 0.4931250512599945, |
|
"logps/chosen": -374.89447021484375, |
|
"logps/rejected": -323.4224853515625, |
|
"loss": 2052.2016, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.4047914445400238, |
|
"rewards/margins": 0.10931004583835602, |
|
"rewards/rejected": -0.5141014456748962, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": 0.5889243483543396, |
|
"logits/rejected": 0.49473047256469727, |
|
"logps/chosen": -311.41204833984375, |
|
"logps/rejected": -297.2741394042969, |
|
"loss": 2161.5602, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4144001007080078, |
|
"rewards/margins": 0.09458984434604645, |
|
"rewards/rejected": -0.5089899301528931, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": 0.46171918511390686, |
|
"logits/rejected": 0.5521343350410461, |
|
"logps/chosen": -260.4045715332031, |
|
"logps/rejected": -253.0673370361328, |
|
"loss": 2058.6592, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.35439711809158325, |
|
"rewards/margins": 0.0739353746175766, |
|
"rewards/rejected": -0.42833250761032104, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": 0.5095491409301758, |
|
"logits/rejected": 0.5851413607597351, |
|
"logps/chosen": -333.0138244628906, |
|
"logps/rejected": -299.2201232910156, |
|
"loss": 2114.274, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.3759954273700714, |
|
"rewards/margins": 0.07854921370744705, |
|
"rewards/rejected": -0.45454463362693787, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": 0.5400400161743164, |
|
"logits/rejected": 0.6057177782058716, |
|
"logps/chosen": -314.9399108886719, |
|
"logps/rejected": -299.5291442871094, |
|
"loss": 2165.3699, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3974139094352722, |
|
"rewards/margins": 0.0817367285490036, |
|
"rewards/rejected": -0.4791506826877594, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": 0.46717318892478943, |
|
"logits/rejected": 0.5683552622795105, |
|
"logps/chosen": -311.09478759765625, |
|
"logps/rejected": -286.71856689453125, |
|
"loss": 1970.309, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.36804935336112976, |
|
"rewards/margins": 0.09274602681398392, |
|
"rewards/rejected": -0.46079540252685547, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": 0.5168116688728333, |
|
"logits/rejected": 0.5277969241142273, |
|
"logps/chosen": -331.3283996582031, |
|
"logps/rejected": -316.9101257324219, |
|
"loss": 2094.1586, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3705582618713379, |
|
"rewards/margins": 0.07725582271814346, |
|
"rewards/rejected": -0.44781407713890076, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": 0.4891355633735657, |
|
"logits/rejected": 0.5829272270202637, |
|
"logps/chosen": -341.33270263671875, |
|
"logps/rejected": -326.12237548828125, |
|
"loss": 2015.8363, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3461948037147522, |
|
"rewards/margins": 0.1047135442495346, |
|
"rewards/rejected": -0.450908362865448, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": 0.5361688733100891, |
|
"logits/rejected": 0.4950820505619049, |
|
"logps/chosen": -301.51470947265625, |
|
"logps/rejected": -264.515625, |
|
"loss": 2067.884, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.31028127670288086, |
|
"rewards/margins": 0.07867135107517242, |
|
"rewards/rejected": -0.38895267248153687, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": 0.5467667579650879, |
|
"logits/rejected": 0.6214498281478882, |
|
"logps/chosen": -310.25006103515625, |
|
"logps/rejected": -304.9784240722656, |
|
"loss": 1854.108, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.31836968660354614, |
|
"rewards/margins": 0.10814164578914642, |
|
"rewards/rejected": -0.42651137709617615, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": 0.5906549692153931, |
|
"logits/rejected": 0.5477146506309509, |
|
"logps/chosen": -336.2082214355469, |
|
"logps/rejected": -320.98419189453125, |
|
"loss": 1870.9252, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4078896641731262, |
|
"rewards/margins": 0.12890958786010742, |
|
"rewards/rejected": -0.5367991924285889, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": 0.5637394189834595, |
|
"logits/rejected": 0.5951513648033142, |
|
"logps/chosen": -312.58380126953125, |
|
"logps/rejected": -294.388671875, |
|
"loss": 2077.7766, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.4279128909111023, |
|
"rewards/margins": 0.0825895294547081, |
|
"rewards/rejected": -0.5105024576187134, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": 0.49519261717796326, |
|
"logits/rejected": 0.6189637184143066, |
|
"logps/chosen": -306.1346435546875, |
|
"logps/rejected": -292.5857238769531, |
|
"loss": 1969.0184, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.36586111783981323, |
|
"rewards/margins": 0.12026441097259521, |
|
"rewards/rejected": -0.48612555861473083, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": 0.4921692907810211, |
|
"logits/rejected": 0.501875102519989, |
|
"logps/chosen": -299.2643127441406, |
|
"logps/rejected": -286.10308837890625, |
|
"loss": 2031.8379, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4021153450012207, |
|
"rewards/margins": 0.09204810857772827, |
|
"rewards/rejected": -0.494163453578949, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": 0.4793570935726166, |
|
"logits/rejected": 0.529563307762146, |
|
"logps/chosen": -292.5688171386719, |
|
"logps/rejected": -271.9722595214844, |
|
"loss": 2042.7758, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.35684093832969666, |
|
"rewards/margins": 0.09431228041648865, |
|
"rewards/rejected": -0.4511532187461853, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": 0.5109771490097046, |
|
"logits/rejected": 0.5633312463760376, |
|
"logps/chosen": -340.49176025390625, |
|
"logps/rejected": -291.06756591796875, |
|
"loss": 1950.8371, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.33940690755844116, |
|
"rewards/margins": 0.10259196907281876, |
|
"rewards/rejected": -0.4419988691806793, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": 0.5391252636909485, |
|
"logits/rejected": 0.5337890982627869, |
|
"logps/chosen": -289.8169860839844, |
|
"logps/rejected": -260.87677001953125, |
|
"loss": 1936.325, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.32662123441696167, |
|
"rewards/margins": 0.08759995549917221, |
|
"rewards/rejected": -0.4142211973667145, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": 0.5750656127929688, |
|
"logits/rejected": 0.6396702527999878, |
|
"logps/chosen": -315.6180114746094, |
|
"logps/rejected": -275.87847900390625, |
|
"loss": 1988.6426, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3871499300003052, |
|
"rewards/margins": 0.11068111658096313, |
|
"rewards/rejected": -0.4978310167789459, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": 0.5217655897140503, |
|
"logits/rejected": 0.5655398368835449, |
|
"logps/chosen": -312.867919921875, |
|
"logps/rejected": -288.189697265625, |
|
"loss": 2051.176, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.36912721395492554, |
|
"rewards/margins": 0.11765513569116592, |
|
"rewards/rejected": -0.48678237199783325, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": 0.5041700005531311, |
|
"logits/rejected": 0.5614610314369202, |
|
"logps/chosen": -299.4610595703125, |
|
"logps/rejected": -294.58990478515625, |
|
"loss": 1867.1797, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3925167918205261, |
|
"rewards/margins": 0.11273722350597382, |
|
"rewards/rejected": -0.5052539706230164, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": 0.5158332586288452, |
|
"logits/rejected": 0.5967845916748047, |
|
"logps/chosen": -292.78387451171875, |
|
"logps/rejected": -286.22259521484375, |
|
"loss": 2125.177, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.38172584772109985, |
|
"rewards/margins": 0.08153903484344482, |
|
"rewards/rejected": -0.4632648825645447, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": 0.5155011415481567, |
|
"logits/rejected": 0.6147416234016418, |
|
"logps/chosen": -309.46826171875, |
|
"logps/rejected": -292.97857666015625, |
|
"loss": 2040.8613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3627835512161255, |
|
"rewards/margins": 0.08301069587469101, |
|
"rewards/rejected": -0.4457942843437195, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": 0.49975937604904175, |
|
"logits/rejected": 0.5919264554977417, |
|
"logps/chosen": -314.7627868652344, |
|
"logps/rejected": -304.3371276855469, |
|
"loss": 2078.6633, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3559049963951111, |
|
"rewards/margins": 0.08720938861370087, |
|
"rewards/rejected": -0.44311434030532837, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": 0.5549314618110657, |
|
"logits/rejected": 0.45847368240356445, |
|
"logps/chosen": -302.06573486328125, |
|
"logps/rejected": -272.84136962890625, |
|
"loss": 2036.8318, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.35362687706947327, |
|
"rewards/margins": 0.08619710803031921, |
|
"rewards/rejected": -0.4398239552974701, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": 0.5452480912208557, |
|
"logits/rejected": 0.6346302628517151, |
|
"logps/chosen": -334.15423583984375, |
|
"logps/rejected": -286.4046630859375, |
|
"loss": 2093.8855, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.37248533964157104, |
|
"rewards/margins": 0.08576939254999161, |
|
"rewards/rejected": -0.45825472474098206, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": 0.535306990146637, |
|
"logits/rejected": 0.4712342321872711, |
|
"logps/chosen": -282.85968017578125, |
|
"logps/rejected": -274.203125, |
|
"loss": 2098.0096, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.34499186277389526, |
|
"rewards/margins": 0.08891385793685913, |
|
"rewards/rejected": -0.433905690908432, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": 0.6342380046844482, |
|
"logits/rejected": 0.629281759262085, |
|
"logps/chosen": -286.6862487792969, |
|
"logps/rejected": -271.05792236328125, |
|
"loss": 1953.5867, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3626910150051117, |
|
"rewards/margins": 0.0837903618812561, |
|
"rewards/rejected": -0.4464813768863678, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": 0.5246872901916504, |
|
"logits/rejected": 0.5165098905563354, |
|
"logps/chosen": -342.14508056640625, |
|
"logps/rejected": -293.86541748046875, |
|
"loss": 1883.041, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.3446907103061676, |
|
"rewards/margins": 0.09030432999134064, |
|
"rewards/rejected": -0.43499502539634705, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": 0.5085369944572449, |
|
"logits/rejected": 0.6210469603538513, |
|
"logps/chosen": -324.135498046875, |
|
"logps/rejected": -288.7994079589844, |
|
"loss": 2054.7223, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4166959822177887, |
|
"rewards/margins": 0.06678648293018341, |
|
"rewards/rejected": -0.4834825098514557, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": 0.4991762638092041, |
|
"logits/rejected": 0.5716468691825867, |
|
"logps/chosen": -322.47100830078125, |
|
"logps/rejected": -305.85247802734375, |
|
"loss": 1875.9535, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.4109939634799957, |
|
"rewards/margins": 0.1062377318739891, |
|
"rewards/rejected": -0.5172317028045654, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": 0.5478680729866028, |
|
"logits/rejected": 0.5703476667404175, |
|
"logps/chosen": -349.9249572753906, |
|
"logps/rejected": -293.72625732421875, |
|
"loss": 2017.5168, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.42143529653549194, |
|
"rewards/margins": 0.10333251953125, |
|
"rewards/rejected": -0.5247678160667419, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": 0.5334219932556152, |
|
"logits/rejected": 0.5596605539321899, |
|
"logps/chosen": -266.18426513671875, |
|
"logps/rejected": -274.7750244140625, |
|
"loss": 2175.4988, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4017234444618225, |
|
"rewards/margins": 0.06810633838176727, |
|
"rewards/rejected": -0.469829797744751, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": 0.602304995059967, |
|
"logits/rejected": 0.5311247110366821, |
|
"logps/chosen": -277.86669921875, |
|
"logps/rejected": -269.7265319824219, |
|
"loss": 2177.3363, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.4107086658477783, |
|
"rewards/margins": 0.04552067071199417, |
|
"rewards/rejected": -0.4562292993068695, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": 0.503399670124054, |
|
"logits/rejected": 0.639790415763855, |
|
"logps/chosen": -273.44622802734375, |
|
"logps/rejected": -260.8460693359375, |
|
"loss": 2045.4781, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.35943537950515747, |
|
"rewards/margins": 0.06276627629995346, |
|
"rewards/rejected": -0.42220163345336914, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": 0.5885658860206604, |
|
"logits/rejected": 0.5756844282150269, |
|
"logps/chosen": -325.32952880859375, |
|
"logps/rejected": -282.21453857421875, |
|
"loss": 1891.5383, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.33539459109306335, |
|
"rewards/margins": 0.12442357838153839, |
|
"rewards/rejected": -0.45981818437576294, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": 0.6374679803848267, |
|
"logits/rejected": 0.6624254584312439, |
|
"logps/chosen": -295.37738037109375, |
|
"logps/rejected": -270.3449401855469, |
|
"loss": 1947.8027, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.31836217641830444, |
|
"rewards/margins": 0.11628556251525879, |
|
"rewards/rejected": -0.43464773893356323, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": 0.5361552238464355, |
|
"logits/rejected": 0.5510913133621216, |
|
"logps/chosen": -301.1198425292969, |
|
"logps/rejected": -294.21234130859375, |
|
"loss": 2041.7438, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.34723925590515137, |
|
"rewards/margins": 0.09882111847400665, |
|
"rewards/rejected": -0.44606032967567444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": 0.4825092852115631, |
|
"logits/rejected": 0.5265048742294312, |
|
"logps/chosen": -304.63140869140625, |
|
"logps/rejected": -276.94464111328125, |
|
"loss": 1806.9965, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.360734760761261, |
|
"rewards/margins": 0.12996909022331238, |
|
"rewards/rejected": -0.49070388078689575, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": 0.5663384199142456, |
|
"logits/rejected": 0.6073741912841797, |
|
"logps/chosen": -316.7065124511719, |
|
"logps/rejected": -322.63494873046875, |
|
"loss": 2131.3072, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.40306559205055237, |
|
"rewards/margins": 0.07923634350299835, |
|
"rewards/rejected": -0.48230189085006714, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": 0.5961092710494995, |
|
"logits/rejected": 0.6114310622215271, |
|
"logps/chosen": -321.1643981933594, |
|
"logps/rejected": -285.0345153808594, |
|
"loss": 1963.3941, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3763844072818756, |
|
"rewards/margins": 0.11694605648517609, |
|
"rewards/rejected": -0.4933304190635681, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": 0.5694259405136108, |
|
"logits/rejected": 0.5002479553222656, |
|
"logps/chosen": -326.5146484375, |
|
"logps/rejected": -302.0265808105469, |
|
"loss": 2105.7691, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.40365272760391235, |
|
"rewards/margins": 0.09866581857204437, |
|
"rewards/rejected": -0.5023185014724731, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": 0.48704004287719727, |
|
"logits/rejected": 0.5804616808891296, |
|
"logps/chosen": -332.91162109375, |
|
"logps/rejected": -303.14617919921875, |
|
"loss": 1834.8215, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3842596411705017, |
|
"rewards/margins": 0.12224143743515015, |
|
"rewards/rejected": -0.5065010786056519, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": 0.4348418116569519, |
|
"logits/rejected": 0.5593420267105103, |
|
"logps/chosen": -335.1719055175781, |
|
"logps/rejected": -308.69830322265625, |
|
"loss": 1984.7055, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3781774044036865, |
|
"rewards/margins": 0.09801622480154037, |
|
"rewards/rejected": -0.4761936068534851, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": 0.5774113535881042, |
|
"logits/rejected": 0.6447092294692993, |
|
"logps/chosen": -314.6346740722656, |
|
"logps/rejected": -309.253173828125, |
|
"loss": 2091.5516, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3814038932323456, |
|
"rewards/margins": 0.10535021126270294, |
|
"rewards/rejected": -0.48675408959388733, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": 0.5727014541625977, |
|
"logits/rejected": 0.5744349360466003, |
|
"logps/chosen": -324.27154541015625, |
|
"logps/rejected": -283.59686279296875, |
|
"loss": 1978.2016, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3670370876789093, |
|
"rewards/margins": 0.101667121052742, |
|
"rewards/rejected": -0.46870413422584534, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": 0.51801598072052, |
|
"logits/rejected": 0.5624555349349976, |
|
"logps/chosen": -300.46734619140625, |
|
"logps/rejected": -287.84307861328125, |
|
"loss": 1968.3572, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.34267181158065796, |
|
"rewards/margins": 0.11022396385669708, |
|
"rewards/rejected": -0.45289579033851624, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": 0.5614932775497437, |
|
"logits/rejected": 0.5673717856407166, |
|
"logps/chosen": -306.4109191894531, |
|
"logps/rejected": -280.79205322265625, |
|
"loss": 2043.6598, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3364596962928772, |
|
"rewards/margins": 0.0902407318353653, |
|
"rewards/rejected": -0.42670050263404846, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": 0.5359445810317993, |
|
"logits/rejected": 0.6252576112747192, |
|
"logps/chosen": -322.9229736328125, |
|
"logps/rejected": -268.3149108886719, |
|
"loss": 2021.5035, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3492088317871094, |
|
"rewards/margins": 0.09384563565254211, |
|
"rewards/rejected": -0.4430544972419739, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": 0.4629904627799988, |
|
"logits/rejected": 0.6062875390052795, |
|
"logps/chosen": -305.40814208984375, |
|
"logps/rejected": -251.60107421875, |
|
"loss": 2015.4504, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.33251887559890747, |
|
"rewards/margins": 0.09136182069778442, |
|
"rewards/rejected": -0.4238806664943695, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": 0.5688912868499756, |
|
"logits/rejected": 0.5831522941589355, |
|
"logps/chosen": -300.88079833984375, |
|
"logps/rejected": -278.1544494628906, |
|
"loss": 2065.1121, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.35972604155540466, |
|
"rewards/margins": 0.08174722641706467, |
|
"rewards/rejected": -0.4414733052253723, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": 0.5642494559288025, |
|
"logits/rejected": 0.5858667492866516, |
|
"logps/chosen": -290.29217529296875, |
|
"logps/rejected": -271.4120788574219, |
|
"loss": 1997.6766, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.33833402395248413, |
|
"rewards/margins": 0.11596833169460297, |
|
"rewards/rejected": -0.4543024003505707, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": 0.6026065349578857, |
|
"logits/rejected": 0.637354850769043, |
|
"logps/chosen": -318.32611083984375, |
|
"logps/rejected": -278.863037109375, |
|
"loss": 2020.9238, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3429223895072937, |
|
"rewards/margins": 0.10237185657024384, |
|
"rewards/rejected": -0.4452942907810211, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": 0.5387909412384033, |
|
"logits/rejected": 0.5283801555633545, |
|
"logps/chosen": -281.98944091796875, |
|
"logps/rejected": -282.2474060058594, |
|
"loss": 2121.0545, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.40112051367759705, |
|
"rewards/margins": 0.060013122856616974, |
|
"rewards/rejected": -0.4611336290836334, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": 0.5407668352127075, |
|
"logits/rejected": 0.5168458223342896, |
|
"logps/chosen": -271.4942321777344, |
|
"logps/rejected": -262.18310546875, |
|
"loss": 1999.9113, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3525828719139099, |
|
"rewards/margins": 0.07946565002202988, |
|
"rewards/rejected": -0.4320485591888428, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": 0.5828490257263184, |
|
"logits/rejected": 0.6618238091468811, |
|
"logps/chosen": -355.33355712890625, |
|
"logps/rejected": -293.50323486328125, |
|
"loss": 2052.1723, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3654106557369232, |
|
"rewards/margins": 0.09124873578548431, |
|
"rewards/rejected": -0.45665937662124634, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": 0.5557527542114258, |
|
"logits/rejected": 0.6298555135726929, |
|
"logps/chosen": -312.16094970703125, |
|
"logps/rejected": -288.3011779785156, |
|
"loss": 1976.1777, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3318810760974884, |
|
"rewards/margins": 0.12243027985095978, |
|
"rewards/rejected": -0.4543113708496094, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": 0.49655452370643616, |
|
"logits/rejected": 0.6068973541259766, |
|
"logps/chosen": -294.2212829589844, |
|
"logps/rejected": -318.2776794433594, |
|
"loss": 2004.0912, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.33194708824157715, |
|
"rewards/margins": 0.09898443520069122, |
|
"rewards/rejected": -0.43093156814575195, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": 0.5596984028816223, |
|
"logits/rejected": 0.5853079557418823, |
|
"logps/chosen": -322.6697082519531, |
|
"logps/rejected": -272.7930603027344, |
|
"loss": 1955.9035, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.369145929813385, |
|
"rewards/margins": 0.12208880484104156, |
|
"rewards/rejected": -0.491234689950943, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": 0.5217611193656921, |
|
"logits/rejected": 0.5789054036140442, |
|
"logps/chosen": -336.248779296875, |
|
"logps/rejected": -306.63067626953125, |
|
"loss": 1938.6953, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3923150599002838, |
|
"rewards/margins": 0.09614621102809906, |
|
"rewards/rejected": -0.48846131563186646, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": 0.5122044682502747, |
|
"logits/rejected": 0.54069584608078, |
|
"logps/chosen": -292.97711181640625, |
|
"logps/rejected": -275.40069580078125, |
|
"loss": 2029.4551, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4053107798099518, |
|
"rewards/margins": 0.08846473693847656, |
|
"rewards/rejected": -0.49377545714378357, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": 0.5897082090377808, |
|
"logits/rejected": 0.5889968276023865, |
|
"logps/chosen": -332.0108947753906, |
|
"logps/rejected": -292.29119873046875, |
|
"loss": 1970.8734, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.38728633522987366, |
|
"rewards/margins": 0.11461566388607025, |
|
"rewards/rejected": -0.5019019842147827, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": 0.5301613807678223, |
|
"logits/rejected": 0.6047118902206421, |
|
"logps/chosen": -335.94598388671875, |
|
"logps/rejected": -308.5770263671875, |
|
"loss": 1999.6207, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.41463202238082886, |
|
"rewards/margins": 0.1080860048532486, |
|
"rewards/rejected": -0.522718071937561, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": 0.46664172410964966, |
|
"logits/rejected": 0.6063970923423767, |
|
"logps/chosen": -292.8914489746094, |
|
"logps/rejected": -287.7743835449219, |
|
"loss": 1998.873, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.41233953833580017, |
|
"rewards/margins": 0.0787600427865982, |
|
"rewards/rejected": -0.49109959602355957, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": 0.5474873185157776, |
|
"logits/rejected": 0.6035341024398804, |
|
"logps/chosen": -316.2177734375, |
|
"logps/rejected": -293.90936279296875, |
|
"loss": 2059.1195, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4276704788208008, |
|
"rewards/margins": 0.09831003844738007, |
|
"rewards/rejected": -0.525980532169342, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": 0.631860077381134, |
|
"logits/rejected": 0.604947566986084, |
|
"logps/chosen": -280.0582275390625, |
|
"logps/rejected": -259.1899108886719, |
|
"loss": 1954.3059, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.42631417512893677, |
|
"rewards/margins": 0.09757888317108154, |
|
"rewards/rejected": -0.5238930583000183, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": 0.5674291253089905, |
|
"logits/rejected": 0.5523722767829895, |
|
"logps/chosen": -313.6990661621094, |
|
"logps/rejected": -303.3889465332031, |
|
"loss": 1995.9789, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4305340647697449, |
|
"rewards/margins": 0.10536874830722809, |
|
"rewards/rejected": -0.5359027981758118, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": 0.5443872809410095, |
|
"logits/rejected": 0.629615843296051, |
|
"logps/chosen": -343.49072265625, |
|
"logps/rejected": -309.73480224609375, |
|
"loss": 1837.8883, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43309682607650757, |
|
"rewards/margins": 0.1318664252758026, |
|
"rewards/rejected": -0.5649632811546326, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": 0.43076056241989136, |
|
"logits/rejected": 0.4598851799964905, |
|
"logps/chosen": -333.4667053222656, |
|
"logps/rejected": -277.95086669921875, |
|
"loss": 1996.2734, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.44571250677108765, |
|
"rewards/margins": 0.10792438685894012, |
|
"rewards/rejected": -0.553636908531189, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": 0.5987478494644165, |
|
"logits/rejected": 0.571110725402832, |
|
"logps/chosen": -277.62542724609375, |
|
"logps/rejected": -271.3750305175781, |
|
"loss": 2000.3379, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4611617624759674, |
|
"rewards/margins": 0.09958922863006592, |
|
"rewards/rejected": -0.5607509613037109, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": 0.5397824048995972, |
|
"logits/rejected": 0.5152544379234314, |
|
"logps/chosen": -285.2364501953125, |
|
"logps/rejected": -273.76214599609375, |
|
"loss": 1866.8488, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.44938772916793823, |
|
"rewards/margins": 0.10914802551269531, |
|
"rewards/rejected": -0.5585357546806335, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": 0.4960288107395172, |
|
"logits/rejected": 0.5434283018112183, |
|
"logps/chosen": -351.0420837402344, |
|
"logps/rejected": -310.51483154296875, |
|
"loss": 2042.0777, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.45654526352882385, |
|
"rewards/margins": 0.11480891704559326, |
|
"rewards/rejected": -0.5713542103767395, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": 0.5680415034294128, |
|
"logits/rejected": 0.5574635863304138, |
|
"logps/chosen": -313.9222106933594, |
|
"logps/rejected": -292.32843017578125, |
|
"loss": 2134.3227, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.43487685918807983, |
|
"rewards/margins": 0.0749758630990982, |
|
"rewards/rejected": -0.5098527669906616, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": 0.49776411056518555, |
|
"logits/rejected": 0.5919879078865051, |
|
"logps/chosen": -329.8394775390625, |
|
"logps/rejected": -288.6294860839844, |
|
"loss": 1978.9926, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.38458770513534546, |
|
"rewards/margins": 0.08599748462438583, |
|
"rewards/rejected": -0.4705851972103119, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": 0.4789052903652191, |
|
"logits/rejected": 0.5946076512336731, |
|
"logps/chosen": -343.82440185546875, |
|
"logps/rejected": -312.8480224609375, |
|
"loss": 1838.2496, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.41200247406959534, |
|
"rewards/margins": 0.13190023601055145, |
|
"rewards/rejected": -0.5439027547836304, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": 0.518727719783783, |
|
"logits/rejected": 0.5448898673057556, |
|
"logps/chosen": -308.6346130371094, |
|
"logps/rejected": -262.6923828125, |
|
"loss": 2192.4686, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.45256251096725464, |
|
"rewards/margins": 0.08636941015720367, |
|
"rewards/rejected": -0.5389319062232971, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": 0.5441064834594727, |
|
"logits/rejected": 0.6444369554519653, |
|
"logps/chosen": -301.1471862792969, |
|
"logps/rejected": -272.04296875, |
|
"loss": 2016.5557, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3905426263809204, |
|
"rewards/margins": 0.11363613605499268, |
|
"rewards/rejected": -0.5041787028312683, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": 0.5234050750732422, |
|
"logits/rejected": 0.6232806444168091, |
|
"logps/chosen": -278.05462646484375, |
|
"logps/rejected": -237.6455078125, |
|
"loss": 1943.2496, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4108065664768219, |
|
"rewards/margins": 0.09128499031066895, |
|
"rewards/rejected": -0.5020915269851685, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": 0.5350190997123718, |
|
"logits/rejected": 0.5485085248947144, |
|
"logps/chosen": -287.7547607421875, |
|
"logps/rejected": -263.4801025390625, |
|
"loss": 2019.2094, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3987637162208557, |
|
"rewards/margins": 0.07709892094135284, |
|
"rewards/rejected": -0.47586268186569214, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": 0.5190201997756958, |
|
"logits/rejected": 0.5698527693748474, |
|
"logps/chosen": -345.8193664550781, |
|
"logps/rejected": -291.22821044921875, |
|
"loss": 1915.1037, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.39381805062294006, |
|
"rewards/margins": 0.11871011555194855, |
|
"rewards/rejected": -0.512528121471405, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": 0.5621356964111328, |
|
"logits/rejected": 0.574070155620575, |
|
"logps/chosen": -301.27276611328125, |
|
"logps/rejected": -268.485595703125, |
|
"loss": 2027.6629, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4134562611579895, |
|
"rewards/margins": 0.08553650230169296, |
|
"rewards/rejected": -0.49899277091026306, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": 0.5577922463417053, |
|
"logits/rejected": 0.6254245042800903, |
|
"logps/chosen": -307.9657897949219, |
|
"logps/rejected": -278.01068115234375, |
|
"loss": 1957.0965, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.38403937220573425, |
|
"rewards/margins": 0.10890159755945206, |
|
"rewards/rejected": -0.4929409921169281, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": 0.5165051817893982, |
|
"logits/rejected": 0.463472455739975, |
|
"logps/chosen": -325.6259460449219, |
|
"logps/rejected": -282.59246826171875, |
|
"loss": 1931.7, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3894692063331604, |
|
"rewards/margins": 0.11373750865459442, |
|
"rewards/rejected": -0.5032067894935608, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": 0.5601966977119446, |
|
"logits/rejected": 0.5657812356948853, |
|
"logps/chosen": -329.68304443359375, |
|
"logps/rejected": -279.58612060546875, |
|
"loss": 2044.8211, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4129094183444977, |
|
"rewards/margins": 0.08905430138111115, |
|
"rewards/rejected": -0.5019636750221252, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": 0.5333635210990906, |
|
"logits/rejected": 0.6559829711914062, |
|
"logps/chosen": -334.6247863769531, |
|
"logps/rejected": -293.94403076171875, |
|
"loss": 1924.4891, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3982022702693939, |
|
"rewards/margins": 0.12732462584972382, |
|
"rewards/rejected": -0.5255268812179565, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": 0.44074922800064087, |
|
"logits/rejected": 0.5341771245002747, |
|
"logps/chosen": -325.88763427734375, |
|
"logps/rejected": -301.38427734375, |
|
"loss": 2064.7137, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.39723503589630127, |
|
"rewards/margins": 0.09469970315694809, |
|
"rewards/rejected": -0.49193471670150757, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": 0.6108728647232056, |
|
"logits/rejected": 0.5816964507102966, |
|
"logps/chosen": -280.2751159667969, |
|
"logps/rejected": -272.6864929199219, |
|
"loss": 2012.7195, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.39345765113830566, |
|
"rewards/margins": 0.10830279439687729, |
|
"rewards/rejected": -0.5017604827880859, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": 0.5402048826217651, |
|
"logits/rejected": 0.5511065721511841, |
|
"logps/chosen": -293.0211486816406, |
|
"logps/rejected": -299.3897399902344, |
|
"loss": 2051.2686, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.38481405377388, |
|
"rewards/margins": 0.09063725918531418, |
|
"rewards/rejected": -0.4754512906074524, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": 0.4866926670074463, |
|
"logits/rejected": 0.553752064704895, |
|
"logps/chosen": -323.8518371582031, |
|
"logps/rejected": -315.7661437988281, |
|
"loss": 1892.2789, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.40690216422080994, |
|
"rewards/margins": 0.1288827359676361, |
|
"rewards/rejected": -0.535784900188446, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": 0.5334519743919373, |
|
"logits/rejected": 0.629740834236145, |
|
"logps/chosen": -342.43341064453125, |
|
"logps/rejected": -304.27178955078125, |
|
"loss": 1996.1777, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.4247370660305023, |
|
"rewards/margins": 0.07812504470348358, |
|
"rewards/rejected": -0.5028620958328247, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": 0.536971926689148, |
|
"logits/rejected": 0.6389668583869934, |
|
"logps/chosen": -344.0328063964844, |
|
"logps/rejected": -307.81195068359375, |
|
"loss": 2099.0133, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4052516520023346, |
|
"rewards/margins": 0.1204022616147995, |
|
"rewards/rejected": -0.5256539583206177, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": 0.501252293586731, |
|
"logits/rejected": 0.582123875617981, |
|
"logps/chosen": -350.5921936035156, |
|
"logps/rejected": -305.71551513671875, |
|
"loss": 2080.3439, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.386319637298584, |
|
"rewards/margins": 0.1132698804140091, |
|
"rewards/rejected": -0.4995895326137543, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": 0.5797770619392395, |
|
"logits/rejected": 0.5843828916549683, |
|
"logps/chosen": -311.27313232421875, |
|
"logps/rejected": -272.329833984375, |
|
"loss": 1840.9139, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4069671630859375, |
|
"rewards/margins": 0.10061927884817123, |
|
"rewards/rejected": -0.5075864195823669, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": 0.4943598806858063, |
|
"logits/rejected": 0.5981740355491638, |
|
"logps/chosen": -339.5876770019531, |
|
"logps/rejected": -323.6890563964844, |
|
"loss": 2091.2941, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.42342454195022583, |
|
"rewards/margins": 0.09093138575553894, |
|
"rewards/rejected": -0.5143559575080872, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": 0.5075241327285767, |
|
"logits/rejected": 0.5773884654045105, |
|
"logps/chosen": -301.22906494140625, |
|
"logps/rejected": -278.94293212890625, |
|
"loss": 2206.433, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4186634421348572, |
|
"rewards/margins": 0.08012911677360535, |
|
"rewards/rejected": -0.4987925887107849, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": 0.47776398062705994, |
|
"logits/rejected": 0.6287192106246948, |
|
"logps/chosen": -321.4864196777344, |
|
"logps/rejected": -324.2401428222656, |
|
"loss": 2054.8516, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.4205222725868225, |
|
"rewards/margins": 0.08562308549880981, |
|
"rewards/rejected": -0.5061453580856323, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": 0.5759707093238831, |
|
"logits/rejected": 0.6366346478462219, |
|
"logps/chosen": -310.05487060546875, |
|
"logps/rejected": -309.42303466796875, |
|
"loss": 1960.8133, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.40062469244003296, |
|
"rewards/margins": 0.12129988521337509, |
|
"rewards/rejected": -0.521924614906311, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": 0.5741788148880005, |
|
"logits/rejected": 0.6168212294578552, |
|
"logps/chosen": -313.01531982421875, |
|
"logps/rejected": -290.6816101074219, |
|
"loss": 1803.5051, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.39632636308670044, |
|
"rewards/margins": 0.0985620766878128, |
|
"rewards/rejected": -0.49488845467567444, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": 0.5977301001548767, |
|
"logits/rejected": 0.5980079770088196, |
|
"logps/chosen": -305.8802185058594, |
|
"logps/rejected": -245.7880859375, |
|
"loss": 1887.7617, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.38649195432662964, |
|
"rewards/margins": 0.1288624256849289, |
|
"rewards/rejected": -0.5153544545173645, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": 0.4545535147190094, |
|
"logits/rejected": 0.656732439994812, |
|
"logps/chosen": -348.40594482421875, |
|
"logps/rejected": -304.6084899902344, |
|
"loss": 2087.083, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.39987581968307495, |
|
"rewards/margins": 0.09592723846435547, |
|
"rewards/rejected": -0.4958030581474304, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": 0.5257354974746704, |
|
"logits/rejected": 0.5875495076179504, |
|
"logps/chosen": -321.76397705078125, |
|
"logps/rejected": -286.9283142089844, |
|
"loss": 2075.5064, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.39552539587020874, |
|
"rewards/margins": 0.10027774423360825, |
|
"rewards/rejected": -0.4958031177520752, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": 0.4653220772743225, |
|
"logits/rejected": 0.5480079650878906, |
|
"logps/chosen": -309.8276672363281, |
|
"logps/rejected": -274.49029541015625, |
|
"loss": 1948.8629, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3729521930217743, |
|
"rewards/margins": 0.1282852441072464, |
|
"rewards/rejected": -0.5012374520301819, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": 0.5885381698608398, |
|
"logits/rejected": 0.5182594060897827, |
|
"logps/chosen": -282.14544677734375, |
|
"logps/rejected": -316.86639404296875, |
|
"loss": 1897.5818, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.4102315902709961, |
|
"rewards/margins": 0.10423590242862701, |
|
"rewards/rejected": -0.5144674777984619, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": 0.5538032650947571, |
|
"logits/rejected": 0.6108819246292114, |
|
"logps/chosen": -317.8446960449219, |
|
"logps/rejected": -322.9876403808594, |
|
"loss": 1870.0818, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.40971916913986206, |
|
"rewards/margins": 0.10465312004089355, |
|
"rewards/rejected": -0.5143723487854004, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": 0.5508009791374207, |
|
"logits/rejected": 0.6037713885307312, |
|
"logps/chosen": -357.86907958984375, |
|
"logps/rejected": -307.35723876953125, |
|
"loss": 2033.6316, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4212369918823242, |
|
"rewards/margins": 0.09935277700424194, |
|
"rewards/rejected": -0.5205897688865662, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": 0.5105335712432861, |
|
"logits/rejected": 0.5201815366744995, |
|
"logps/chosen": -263.2312316894531, |
|
"logps/rejected": -266.52801513671875, |
|
"loss": 2212.5818, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4164590835571289, |
|
"rewards/margins": 0.04953977093100548, |
|
"rewards/rejected": -0.4659988284111023, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": 0.5417348742485046, |
|
"logits/rejected": 0.5702573657035828, |
|
"logps/chosen": -342.66839599609375, |
|
"logps/rejected": -324.40557861328125, |
|
"loss": 2001.3066, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.42196089029312134, |
|
"rewards/margins": 0.09920088946819305, |
|
"rewards/rejected": -0.5211617350578308, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": 0.5118446350097656, |
|
"logits/rejected": 0.6164907217025757, |
|
"logps/chosen": -285.0245056152344, |
|
"logps/rejected": -270.7235412597656, |
|
"loss": 1936.7527, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.39391833543777466, |
|
"rewards/margins": 0.10235867649316788, |
|
"rewards/rejected": -0.49627700448036194, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": 0.5716298818588257, |
|
"logits/rejected": 0.533431351184845, |
|
"logps/chosen": -298.3558654785156, |
|
"logps/rejected": -315.1064453125, |
|
"loss": 2030.7699, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.444709837436676, |
|
"rewards/margins": 0.07402367889881134, |
|
"rewards/rejected": -0.5187335014343262, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": 0.41161495447158813, |
|
"logits/rejected": 0.5431499481201172, |
|
"logps/chosen": -335.1160583496094, |
|
"logps/rejected": -285.2781677246094, |
|
"loss": 2117.8803, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.382477343082428, |
|
"rewards/margins": 0.10167716443538666, |
|
"rewards/rejected": -0.48415452241897583, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": 0.5718288421630859, |
|
"logits/rejected": 0.5717177987098694, |
|
"logps/chosen": -286.6661682128906, |
|
"logps/rejected": -286.0840759277344, |
|
"loss": 2009.7559, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.40986448526382446, |
|
"rewards/margins": 0.08955219388008118, |
|
"rewards/rejected": -0.49941664934158325, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": 0.5395318865776062, |
|
"logits/rejected": 0.6408040523529053, |
|
"logps/chosen": -318.8950500488281, |
|
"logps/rejected": -298.7578125, |
|
"loss": 2034.2352, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4017771780490875, |
|
"rewards/margins": 0.11808891594409943, |
|
"rewards/rejected": -0.5198661088943481, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": 0.4985600411891937, |
|
"logits/rejected": 0.5432295799255371, |
|
"logps/chosen": -286.3262634277344, |
|
"logps/rejected": -289.0979919433594, |
|
"loss": 1954.6828, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.39057815074920654, |
|
"rewards/margins": 0.0826260969042778, |
|
"rewards/rejected": -0.47320422530174255, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": 0.5725753307342529, |
|
"logits/rejected": 0.6116470098495483, |
|
"logps/chosen": -335.08843994140625, |
|
"logps/rejected": -312.4408264160156, |
|
"loss": 2050.2887, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.40838685631752014, |
|
"rewards/margins": 0.0921243280172348, |
|
"rewards/rejected": -0.5005111694335938, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": 0.6018707156181335, |
|
"logits/rejected": 0.5222383141517639, |
|
"logps/chosen": -253.42379760742188, |
|
"logps/rejected": -274.9010925292969, |
|
"loss": 2175.2457, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3636482059955597, |
|
"rewards/margins": 0.072720468044281, |
|
"rewards/rejected": -0.4363686442375183, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": 0.5884579420089722, |
|
"logits/rejected": 0.5674210786819458, |
|
"logps/chosen": -338.01837158203125, |
|
"logps/rejected": -324.35980224609375, |
|
"loss": 2028.5854, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.42924776673316956, |
|
"rewards/margins": 0.09629428386688232, |
|
"rewards/rejected": -0.5255420804023743, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": 0.5171129703521729, |
|
"logits/rejected": 0.6137627363204956, |
|
"logps/chosen": -315.68609619140625, |
|
"logps/rejected": -290.91619873046875, |
|
"loss": 2091.5113, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.41310811042785645, |
|
"rewards/margins": 0.09795816987752914, |
|
"rewards/rejected": -0.5110663175582886, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": 0.5506697297096252, |
|
"logits/rejected": 0.5700551867485046, |
|
"logps/chosen": -306.2592468261719, |
|
"logps/rejected": -284.780029296875, |
|
"loss": 2043.7551, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3679637312889099, |
|
"rewards/margins": 0.10389117151498795, |
|
"rewards/rejected": -0.4718549847602844, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": 0.4897570013999939, |
|
"logits/rejected": 0.565123438835144, |
|
"logps/chosen": -326.5579833984375, |
|
"logps/rejected": -310.0860595703125, |
|
"loss": 1972.2996, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4172098636627197, |
|
"rewards/margins": 0.09725239127874374, |
|
"rewards/rejected": -0.5144622921943665, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": 0.4947708249092102, |
|
"logits/rejected": 0.6116417050361633, |
|
"logps/chosen": -369.77789306640625, |
|
"logps/rejected": -310.3260192871094, |
|
"loss": 1986.4135, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3952573835849762, |
|
"rewards/margins": 0.10770855844020844, |
|
"rewards/rejected": -0.5029659271240234, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": 0.49206972122192383, |
|
"logits/rejected": 0.6127623915672302, |
|
"logps/chosen": -326.89813232421875, |
|
"logps/rejected": -278.120849609375, |
|
"loss": 1947.9119, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4299922585487366, |
|
"rewards/margins": 0.10199449956417084, |
|
"rewards/rejected": -0.5319867730140686, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": 0.5761113166809082, |
|
"logits/rejected": 0.5587304830551147, |
|
"logps/chosen": -328.021240234375, |
|
"logps/rejected": -305.7353210449219, |
|
"loss": 1986.5195, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4139803349971771, |
|
"rewards/margins": 0.09623098373413086, |
|
"rewards/rejected": -0.5102113485336304, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": 0.533464252948761, |
|
"logits/rejected": 0.5722233057022095, |
|
"logps/chosen": -323.434814453125, |
|
"logps/rejected": -295.4156188964844, |
|
"loss": 1949.7422, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3916718363761902, |
|
"rewards/margins": 0.10514561086893082, |
|
"rewards/rejected": -0.4968174397945404, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": 0.5557342171669006, |
|
"logits/rejected": 0.5775563716888428, |
|
"logps/chosen": -320.87158203125, |
|
"logps/rejected": -306.94561767578125, |
|
"loss": 1832.5422, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.39497238397598267, |
|
"rewards/margins": 0.13699549436569214, |
|
"rewards/rejected": -0.53196781873703, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": 0.5044248104095459, |
|
"logits/rejected": 0.5328904986381531, |
|
"logps/chosen": -296.86865234375, |
|
"logps/rejected": -287.9560546875, |
|
"loss": 1983.6154, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.39695119857788086, |
|
"rewards/margins": 0.09827554225921631, |
|
"rewards/rejected": -0.49522677063941956, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": 0.481318861246109, |
|
"logits/rejected": 0.6070166230201721, |
|
"logps/chosen": -329.2176818847656, |
|
"logps/rejected": -315.81561279296875, |
|
"loss": 1846.1697, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.404033899307251, |
|
"rewards/margins": 0.11680416762828827, |
|
"rewards/rejected": -0.5208381414413452, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": 0.5581511855125427, |
|
"logits/rejected": 0.5964113473892212, |
|
"logps/chosen": -296.1249694824219, |
|
"logps/rejected": -285.54205322265625, |
|
"loss": 1886.4951, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.41859906911849976, |
|
"rewards/margins": 0.10544377565383911, |
|
"rewards/rejected": -0.5240427851676941, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": 0.5473756194114685, |
|
"logits/rejected": 0.5344858765602112, |
|
"logps/chosen": -310.27728271484375, |
|
"logps/rejected": -314.9624328613281, |
|
"loss": 2101.4789, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.41071709990501404, |
|
"rewards/margins": 0.09045806527137756, |
|
"rewards/rejected": -0.5011752247810364, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.6098369359970093, |
|
"logits/rejected": 0.6253079771995544, |
|
"logps/chosen": -299.2195739746094, |
|
"logps/rejected": -283.7442932128906, |
|
"loss": 2031.7352, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4044032096862793, |
|
"rewards/margins": 0.08278089016675949, |
|
"rewards/rejected": -0.4871840476989746, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 2071.8518089414265, |
|
"train_runtime": 14310.5789, |
|
"train_samples_per_second": 4.272, |
|
"train_steps_per_second": 0.133 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|