|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994601079784043, |
|
"eval_steps": 500, |
|
"global_step": 1248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02399520095980804, |
|
"grad_norm": 24.58741331565172, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -0.5075146555900574, |
|
"logits/rejected": -0.31934085488319397, |
|
"logps/chosen": -1.394007921218872, |
|
"logps/rejected": -1.3630257844924927, |
|
"loss": 1.3501, |
|
"odds_ratio_loss": 0.8239962458610535, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.06970040500164032, |
|
"rewards/margins": -0.0015491036465391517, |
|
"rewards/rejected": -0.06815129518508911, |
|
"sft_loss": 1.394007921218872, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04799040191961608, |
|
"grad_norm": 4.281683015852783, |
|
"learning_rate": 3.5e-06, |
|
"logits/chosen": 0.08614908158779144, |
|
"logits/rejected": 0.3013238310813904, |
|
"logps/chosen": -1.3080074787139893, |
|
"logps/rejected": -1.334457278251648, |
|
"loss": 1.2858, |
|
"odds_ratio_loss": 0.7804475426673889, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0654003769159317, |
|
"rewards/margins": 0.0013224859721958637, |
|
"rewards/rejected": -0.06672286242246628, |
|
"sft_loss": 1.3080074787139893, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07198560287942411, |
|
"grad_norm": 3.830958349381369, |
|
"learning_rate": 4.99986910314335e-06, |
|
"logits/chosen": 0.3485943675041199, |
|
"logits/rejected": 0.6042150855064392, |
|
"logps/chosen": -0.9540683627128601, |
|
"logps/rejected": -1.1750730276107788, |
|
"loss": 0.9904, |
|
"odds_ratio_loss": 0.6533687710762024, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.047703422605991364, |
|
"rewards/margins": 0.011050237342715263, |
|
"rewards/rejected": -0.05875365808606148, |
|
"sft_loss": 0.9540683627128601, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09598080383923216, |
|
"grad_norm": 3.6776666943951675, |
|
"learning_rate": 4.998396670920005e-06, |
|
"logits/chosen": 0.17601105570793152, |
|
"logits/rejected": 0.5272272229194641, |
|
"logps/chosen": -0.898045539855957, |
|
"logps/rejected": -1.0136868953704834, |
|
"loss": 0.9614, |
|
"odds_ratio_loss": 0.6860688328742981, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04490227997303009, |
|
"rewards/margins": 0.005782057531177998, |
|
"rewards/rejected": -0.05068434029817581, |
|
"sft_loss": 0.898045539855957, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11997600479904019, |
|
"grad_norm": 2.636908991979515, |
|
"learning_rate": 4.995289152254744e-06, |
|
"logits/chosen": 0.2309066355228424, |
|
"logits/rejected": 0.22152824699878693, |
|
"logps/chosen": -0.9074997901916504, |
|
"logps/rejected": -1.0551084280014038, |
|
"loss": 0.9374, |
|
"odds_ratio_loss": 0.663613498210907, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.04537498578429222, |
|
"rewards/margins": 0.007380434311926365, |
|
"rewards/rejected": -0.05275542289018631, |
|
"sft_loss": 0.9074997901916504, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14397120575884823, |
|
"grad_norm": 1.8300107701302537, |
|
"learning_rate": 4.990548580876516e-06, |
|
"logits/chosen": 0.307407021522522, |
|
"logits/rejected": 0.37507694959640503, |
|
"logps/chosen": -0.9279610514640808, |
|
"logps/rejected": -0.986476719379425, |
|
"loss": 0.9464, |
|
"odds_ratio_loss": 0.7063499093055725, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04639805108308792, |
|
"rewards/margins": 0.00292578199878335, |
|
"rewards/rejected": -0.04932383447885513, |
|
"sft_loss": 0.9279610514640808, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16796640671865626, |
|
"grad_norm": 3.8157191209486507, |
|
"learning_rate": 4.9841780592726385e-06, |
|
"logits/chosen": 0.19509825110435486, |
|
"logits/rejected": 0.2650177776813507, |
|
"logps/chosen": -0.9848098754882812, |
|
"logps/rejected": -1.0149097442626953, |
|
"loss": 0.9578, |
|
"odds_ratio_loss": 0.726799488067627, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04924049228429794, |
|
"rewards/margins": 0.0015049913199618459, |
|
"rewards/rejected": -0.050745487213134766, |
|
"sft_loss": 0.9848098754882812, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19196160767846432, |
|
"grad_norm": 4.078587531391316, |
|
"learning_rate": 4.976181756658363e-06, |
|
"logits/chosen": 0.061622969806194305, |
|
"logits/rejected": 0.2444450408220291, |
|
"logps/chosen": -0.8894473910331726, |
|
"logps/rejected": -1.0614734888076782, |
|
"loss": 0.9675, |
|
"odds_ratio_loss": 0.6382969617843628, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04447237029671669, |
|
"rewards/margins": 0.008601305074989796, |
|
"rewards/rejected": -0.05307367444038391, |
|
"sft_loss": 0.8894473910331726, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.21595680863827235, |
|
"grad_norm": 2.9874023740770363, |
|
"learning_rate": 4.9665649062483115e-06, |
|
"logits/chosen": 0.6337467432022095, |
|
"logits/rejected": 0.7902036905288696, |
|
"logps/chosen": -0.9439412951469421, |
|
"logps/rejected": -0.9588793516159058, |
|
"loss": 0.9635, |
|
"odds_ratio_loss": 0.7716476917266846, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.047197069972753525, |
|
"rewards/margins": 0.0007468975381925702, |
|
"rewards/rejected": -0.047943972051143646, |
|
"sft_loss": 0.9439412951469421, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23995200959808038, |
|
"grad_norm": 2.3029148332001745, |
|
"learning_rate": 4.955333801831578e-06, |
|
"logits/chosen": 0.49920982122421265, |
|
"logits/rejected": 0.6337569355964661, |
|
"logps/chosen": -0.8333128094673157, |
|
"logps/rejected": -1.059599757194519, |
|
"loss": 0.9453, |
|
"odds_ratio_loss": 0.6517213582992554, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.041665639728307724, |
|
"rewards/margins": 0.011314347386360168, |
|
"rewards/rejected": -0.05297998711466789, |
|
"sft_loss": 0.8333128094673157, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26394721055788845, |
|
"grad_norm": 2.8766587489414395, |
|
"learning_rate": 4.9424957936527295e-06, |
|
"logits/chosen": -0.28645992279052734, |
|
"logits/rejected": 0.04107431694865227, |
|
"logps/chosen": -0.9429195523262024, |
|
"logps/rejected": -0.9936224222183228, |
|
"loss": 0.9526, |
|
"odds_ratio_loss": 0.705885112285614, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04714598134160042, |
|
"rewards/margins": 0.002535139676183462, |
|
"rewards/rejected": -0.04968111589550972, |
|
"sft_loss": 0.9429195523262024, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.28794241151769645, |
|
"grad_norm": 2.1411106644617703, |
|
"learning_rate": 4.92805928360141e-06, |
|
"logits/chosen": -0.29608479142189026, |
|
"logits/rejected": -0.21111997961997986, |
|
"logps/chosen": -0.888851523399353, |
|
"logps/rejected": -1.0842912197113037, |
|
"loss": 0.8904, |
|
"odds_ratio_loss": 0.5968859195709229, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04444257169961929, |
|
"rewards/margins": 0.009771987795829773, |
|
"rewards/rejected": -0.054214559495449066, |
|
"sft_loss": 0.888851523399353, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3119376124775045, |
|
"grad_norm": 2.1891227152981347, |
|
"learning_rate": 4.912033719713687e-06, |
|
"logits/chosen": 0.49228960275650024, |
|
"logits/rejected": 0.5680336952209473, |
|
"logps/chosen": -0.9152839779853821, |
|
"logps/rejected": -1.0058788061141968, |
|
"loss": 0.9427, |
|
"odds_ratio_loss": 0.6943625807762146, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04576420038938522, |
|
"rewards/margins": 0.004529745317995548, |
|
"rewards/rejected": -0.0502939410507679, |
|
"sft_loss": 0.9152839779853821, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3359328134373125, |
|
"grad_norm": 2.5131225459939, |
|
"learning_rate": 4.894429589988739e-06, |
|
"logits/chosen": -1.2468726634979248, |
|
"logits/rejected": -1.0485397577285767, |
|
"logps/chosen": -1.0104249715805054, |
|
"logps/rejected": -1.0477244853973389, |
|
"loss": 0.949, |
|
"odds_ratio_loss": 0.7160865068435669, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.05052124708890915, |
|
"rewards/margins": 0.0018649749690666795, |
|
"rewards/rejected": -0.05238622426986694, |
|
"sft_loss": 1.0104249715805054, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3599280143971206, |
|
"grad_norm": 2.696319834123575, |
|
"learning_rate": 4.875258415524945e-06, |
|
"logits/chosen": 0.039508234709501266, |
|
"logits/rejected": 0.23594827950000763, |
|
"logps/chosen": -0.904223620891571, |
|
"logps/rejected": -1.032157063484192, |
|
"loss": 0.9533, |
|
"odds_ratio_loss": 0.6739581823348999, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04521118476986885, |
|
"rewards/margins": 0.0063966671004891396, |
|
"rewards/rejected": -0.051607854664325714, |
|
"sft_loss": 0.904223620891571, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.38392321535692864, |
|
"grad_norm": 2.241170193835809, |
|
"learning_rate": 4.85453274297985e-06, |
|
"logits/chosen": 0.4507044851779938, |
|
"logits/rejected": 0.7088828682899475, |
|
"logps/chosen": -0.9252007603645325, |
|
"logps/rejected": -1.0105345249176025, |
|
"loss": 0.9187, |
|
"odds_ratio_loss": 0.6664329171180725, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0462600402534008, |
|
"rewards/margins": 0.004266692791134119, |
|
"rewards/rejected": -0.050526730716228485, |
|
"sft_loss": 0.9252007603645325, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.40791841631673664, |
|
"grad_norm": 1.759854296483571, |
|
"learning_rate": 4.832266136358951e-06, |
|
"logits/chosen": -0.12876208126544952, |
|
"logits/rejected": 0.014335835352540016, |
|
"logps/chosen": -0.8540490865707397, |
|
"logps/rejected": -0.9863293766975403, |
|
"loss": 0.926, |
|
"odds_ratio_loss": 0.6714656352996826, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04270245134830475, |
|
"rewards/margins": 0.006614011712372303, |
|
"rewards/rejected": -0.04931646212935448, |
|
"sft_loss": 0.8540490865707397, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4319136172765447, |
|
"grad_norm": 2.793191882203603, |
|
"learning_rate": 4.808473168138675e-06, |
|
"logits/chosen": 0.3617595136165619, |
|
"logits/rejected": 0.3396950364112854, |
|
"logps/chosen": -0.8613064885139465, |
|
"logps/rejected": -1.0067331790924072, |
|
"loss": 0.9162, |
|
"odds_ratio_loss": 0.6582903861999512, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04306532442569733, |
|
"rewards/margins": 0.007271329872310162, |
|
"rewards/rejected": -0.050336651504039764, |
|
"sft_loss": 0.8613064885139465, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4559088182363527, |
|
"grad_norm": 1.7774141067161418, |
|
"learning_rate": 4.783169409729363e-06, |
|
"logits/chosen": 0.9685203433036804, |
|
"logits/rejected": 1.1009634733200073, |
|
"logps/chosen": -0.8521540760993958, |
|
"logps/rejected": -0.9150575399398804, |
|
"loss": 0.9004, |
|
"odds_ratio_loss": 0.7224193811416626, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04260770231485367, |
|
"rewards/margins": 0.0031451724935323, |
|
"rewards/rejected": -0.0457528755068779, |
|
"sft_loss": 0.8521540760993958, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.47990401919616077, |
|
"grad_norm": 2.052107783396207, |
|
"learning_rate": 4.756371421284482e-06, |
|
"logits/chosen": 0.33597105741500854, |
|
"logits/rejected": 0.44187426567077637, |
|
"logps/chosen": -0.8725342750549316, |
|
"logps/rejected": -0.9003400802612305, |
|
"loss": 0.919, |
|
"odds_ratio_loss": 0.7135496735572815, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04362671449780464, |
|
"rewards/margins": 0.0013902939390391111, |
|
"rewards/rejected": -0.04501700773835182, |
|
"sft_loss": 0.8725342750549316, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5038992201559688, |
|
"grad_norm": 2.3000145040966973, |
|
"learning_rate": 4.728096740862778e-06, |
|
"logits/chosen": 0.16287042200565338, |
|
"logits/rejected": 0.35098087787628174, |
|
"logps/chosen": -0.8514264822006226, |
|
"logps/rejected": -0.9913795590400696, |
|
"loss": 0.9096, |
|
"odds_ratio_loss": 0.6634506583213806, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.042571328580379486, |
|
"rewards/margins": 0.006997650023549795, |
|
"rewards/rejected": -0.04956897348165512, |
|
"sft_loss": 0.8514264822006226, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5278944211157769, |
|
"grad_norm": 1.581079267248328, |
|
"learning_rate": 4.698363872950406e-06, |
|
"logits/chosen": 0.298981636762619, |
|
"logits/rejected": 0.49268895387649536, |
|
"logps/chosen": -0.8895601034164429, |
|
"logps/rejected": -1.026539921760559, |
|
"loss": 0.8744, |
|
"odds_ratio_loss": 0.6685082316398621, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04447800666093826, |
|
"rewards/margins": 0.0068489923141896725, |
|
"rewards/rejected": -0.051326997578144073, |
|
"sft_loss": 0.8895601034164429, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5518896220755849, |
|
"grad_norm": 1.7094822098553022, |
|
"learning_rate": 4.6671922763505915e-06, |
|
"logits/chosen": 0.34609514474868774, |
|
"logits/rejected": 0.5052930116653442, |
|
"logps/chosen": -0.863084614276886, |
|
"logps/rejected": -0.9836879968643188, |
|
"loss": 0.8905, |
|
"odds_ratio_loss": 0.6813028454780579, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.043154239654541016, |
|
"rewards/margins": 0.006030158139765263, |
|
"rewards/rejected": -0.049184400588274, |
|
"sft_loss": 0.863084614276886, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5758848230353929, |
|
"grad_norm": 1.9367159826113498, |
|
"learning_rate": 4.634602351448738e-06, |
|
"logits/chosen": 0.286350816488266, |
|
"logits/rejected": 0.3788919448852539, |
|
"logps/chosen": -0.8919585943222046, |
|
"logps/rejected": -0.9452742338180542, |
|
"loss": 0.9133, |
|
"odds_ratio_loss": 0.6905114650726318, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04459793120622635, |
|
"rewards/margins": 0.0026657807175070047, |
|
"rewards/rejected": -0.04726371169090271, |
|
"sft_loss": 0.8919585943222046, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5998800239952009, |
|
"grad_norm": 2.0772847936555636, |
|
"learning_rate": 4.6006154268613015e-06, |
|
"logits/chosen": 0.4635019898414612, |
|
"logits/rejected": 0.5444530248641968, |
|
"logps/chosen": -0.8181222081184387, |
|
"logps/rejected": -0.9908831715583801, |
|
"loss": 0.8927, |
|
"odds_ratio_loss": 0.6295598149299622, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04090610891580582, |
|
"rewards/margins": 0.008638045750558376, |
|
"rewards/rejected": -0.04954415559768677, |
|
"sft_loss": 0.8181222081184387, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.623875224955009, |
|
"grad_norm": 2.084215689408855, |
|
"learning_rate": 4.565253745477187e-06, |
|
"logits/chosen": 0.40253886580467224, |
|
"logits/rejected": 0.4625183045864105, |
|
"logps/chosen": -0.9301355481147766, |
|
"logps/rejected": -1.0306508541107178, |
|
"loss": 0.9162, |
|
"odds_ratio_loss": 0.6872043609619141, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04650677740573883, |
|
"rewards/margins": 0.005025765858590603, |
|
"rewards/rejected": -0.05153254419565201, |
|
"sft_loss": 0.9301355481147766, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.647870425914817, |
|
"grad_norm": 1.9031984888179019, |
|
"learning_rate": 4.528540449900799e-06, |
|
"logits/chosen": 0.4078219532966614, |
|
"logits/rejected": 0.6789823174476624, |
|
"logps/chosen": -0.8785255551338196, |
|
"logps/rejected": -0.9139087796211243, |
|
"loss": 0.9176, |
|
"odds_ratio_loss": 0.7333613038063049, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04392627626657486, |
|
"rewards/margins": 0.0017691642278805375, |
|
"rewards/rejected": -0.04569543898105621, |
|
"sft_loss": 0.8785255551338196, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.671865626874625, |
|
"grad_norm": 2.3067419173621113, |
|
"learning_rate": 4.490499567306256e-06, |
|
"logits/chosen": 0.304252564907074, |
|
"logits/rejected": 0.5160123109817505, |
|
"logps/chosen": -0.8951358795166016, |
|
"logps/rejected": -0.9636558294296265, |
|
"loss": 0.8917, |
|
"odds_ratio_loss": 0.69621342420578, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.04475679248571396, |
|
"rewards/margins": 0.0034259993117302656, |
|
"rewards/rejected": -0.04818279296159744, |
|
"sft_loss": 0.8951358795166016, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6958608278344331, |
|
"grad_norm": 3.1297290877323003, |
|
"learning_rate": 4.451155993712711e-06, |
|
"logits/chosen": 0.25184166431427, |
|
"logits/rejected": 0.43299436569213867, |
|
"logps/chosen": -0.808620810508728, |
|
"logps/rejected": -0.9780584573745728, |
|
"loss": 0.9379, |
|
"odds_ratio_loss": 0.6151310205459595, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04043104499578476, |
|
"rewards/margins": 0.008471880108118057, |
|
"rewards/rejected": -0.048902928829193115, |
|
"sft_loss": 0.808620810508728, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7198560287942412, |
|
"grad_norm": 2.001570442654457, |
|
"learning_rate": 4.410535477691041e-06, |
|
"logits/chosen": 0.6736063957214355, |
|
"logits/rejected": 0.8922637104988098, |
|
"logps/chosen": -0.8743098974227905, |
|
"logps/rejected": -1.0198915004730225, |
|
"loss": 0.8962, |
|
"odds_ratio_loss": 0.6545746326446533, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.043715499341487885, |
|
"rewards/margins": 0.0072790831327438354, |
|
"rewards/rejected": -0.05099458247423172, |
|
"sft_loss": 0.8743098974227905, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7438512297540492, |
|
"grad_norm": 3.088640251108737, |
|
"learning_rate": 4.368664603512586e-06, |
|
"logits/chosen": -0.10074709355831146, |
|
"logits/rejected": 0.08682968467473984, |
|
"logps/chosen": -0.7929955720901489, |
|
"logps/rejected": -0.9449365735054016, |
|
"loss": 0.8789, |
|
"odds_ratio_loss": 0.6474851369857788, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03964977711439133, |
|
"rewards/margins": 0.007597046438604593, |
|
"rewards/rejected": -0.047246821224689484, |
|
"sft_loss": 0.7929955720901489, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7678464307138573, |
|
"grad_norm": 2.278875813822025, |
|
"learning_rate": 4.325570773750952e-06, |
|
"logits/chosen": -0.22130906581878662, |
|
"logits/rejected": -0.028980206698179245, |
|
"logps/chosen": -0.8826779127120972, |
|
"logps/rejected": -1.0213041305541992, |
|
"loss": 0.9204, |
|
"odds_ratio_loss": 0.6443883180618286, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04413389414548874, |
|
"rewards/margins": 0.006931307725608349, |
|
"rewards/rejected": -0.05106520652770996, |
|
"sft_loss": 0.8826779127120972, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7918416316736653, |
|
"grad_norm": 1.6952516043840655, |
|
"learning_rate": 4.281282191348289e-06, |
|
"logits/chosen": 0.45927032828330994, |
|
"logits/rejected": 0.6593443751335144, |
|
"logps/chosen": -0.8378440141677856, |
|
"logps/rejected": -0.9682254791259766, |
|
"loss": 0.8995, |
|
"odds_ratio_loss": 0.6620376110076904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04189220070838928, |
|
"rewards/margins": 0.006519075483083725, |
|
"rewards/rejected": -0.04841126874089241, |
|
"sft_loss": 0.8378440141677856, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8158368326334733, |
|
"grad_norm": 2.4806806819218794, |
|
"learning_rate": 4.235827841157748e-06, |
|
"logits/chosen": 0.01970214769244194, |
|
"logits/rejected": 0.11670324951410294, |
|
"logps/chosen": -0.8856766819953918, |
|
"logps/rejected": -1.0817759037017822, |
|
"loss": 0.8834, |
|
"odds_ratio_loss": 0.6194185018539429, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04428383335471153, |
|
"rewards/margins": 0.009804959408938885, |
|
"rewards/rejected": -0.054088789969682693, |
|
"sft_loss": 0.8856766819953918, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8398320335932813, |
|
"grad_norm": 1.5265892877639438, |
|
"learning_rate": 4.1892374709742186e-06, |
|
"logits/chosen": -0.7483745813369751, |
|
"logits/rejected": -0.42045336961746216, |
|
"logps/chosen": -0.7948485016822815, |
|
"logps/rejected": -0.9918915033340454, |
|
"loss": 0.9474, |
|
"odds_ratio_loss": 0.5842909812927246, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03974242880940437, |
|
"rewards/margins": 0.009852146729826927, |
|
"rewards/rejected": -0.04959457367658615, |
|
"sft_loss": 0.7948485016822815, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8638272345530894, |
|
"grad_norm": 2.1051154185205543, |
|
"learning_rate": 4.141541572065762e-06, |
|
"logits/chosen": 0.41192498803138733, |
|
"logits/rejected": 0.5341157913208008, |
|
"logps/chosen": -0.7971394658088684, |
|
"logps/rejected": -0.9216561317443848, |
|
"loss": 0.8881, |
|
"odds_ratio_loss": 0.69920814037323, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.03985697776079178, |
|
"rewards/margins": 0.0062258280813694, |
|
"rewards/rejected": -0.04608280584216118, |
|
"sft_loss": 0.7971394658088684, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8878224355128974, |
|
"grad_norm": 2.049071087536336, |
|
"learning_rate": 4.092771359218462e-06, |
|
"logits/chosen": 0.2649831771850586, |
|
"logits/rejected": 0.45568495988845825, |
|
"logps/chosen": -0.8466150164604187, |
|
"logps/rejected": -1.0025365352630615, |
|
"loss": 0.9065, |
|
"odds_ratio_loss": 0.629971444606781, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.042330749332904816, |
|
"rewards/margins": 0.007796071469783783, |
|
"rewards/rejected": -0.0501268208026886, |
|
"sft_loss": 0.8466150164604187, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9118176364727054, |
|
"grad_norm": 3.597524104140319, |
|
"learning_rate": 4.04295875030778e-06, |
|
"logits/chosen": -0.18752217292785645, |
|
"logits/rejected": 0.15378537774085999, |
|
"logps/chosen": -0.8704308271408081, |
|
"logps/rejected": -0.9513336420059204, |
|
"loss": 0.9014, |
|
"odds_ratio_loss": 0.6948253512382507, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.043521542102098465, |
|
"rewards/margins": 0.004045139066874981, |
|
"rewards/rejected": -0.04756668210029602, |
|
"sft_loss": 0.8704308271408081, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9358128374325135, |
|
"grad_norm": 3.1405630532603395, |
|
"learning_rate": 3.992136345409765e-06, |
|
"logits/chosen": -0.1735876053571701, |
|
"logits/rejected": -0.20124337077140808, |
|
"logps/chosen": -0.9253339767456055, |
|
"logps/rejected": -1.0305973291397095, |
|
"loss": 0.9111, |
|
"odds_ratio_loss": 0.6636070013046265, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04626670479774475, |
|
"rewards/margins": 0.005263164173811674, |
|
"rewards/rejected": -0.051529865711927414, |
|
"sft_loss": 0.9253339767456055, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9598080383923215, |
|
"grad_norm": 2.4716790122788983, |
|
"learning_rate": 3.940337405465786e-06, |
|
"logits/chosen": 0.26361703872680664, |
|
"logits/rejected": 0.44345617294311523, |
|
"logps/chosen": -0.8355854153633118, |
|
"logps/rejected": -1.0225704908370972, |
|
"loss": 0.9062, |
|
"odds_ratio_loss": 0.6545855402946472, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04177927225828171, |
|
"rewards/margins": 0.009349259547889233, |
|
"rewards/rejected": -0.05112852901220322, |
|
"sft_loss": 0.8355854153633118, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9838032393521295, |
|
"grad_norm": 2.3985102639359406, |
|
"learning_rate": 3.887595830514775e-06, |
|
"logits/chosen": 0.21671700477600098, |
|
"logits/rejected": 0.29912179708480835, |
|
"logps/chosen": -0.809670090675354, |
|
"logps/rejected": -1.0107569694519043, |
|
"loss": 0.9029, |
|
"odds_ratio_loss": 0.6326887011528015, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0404835119843483, |
|
"rewards/margins": 0.010054344311356544, |
|
"rewards/rejected": -0.05053785443305969, |
|
"sft_loss": 0.809670090675354, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0077984403119375, |
|
"grad_norm": 1.6971594247197401, |
|
"learning_rate": 3.833946137507195e-06, |
|
"logits/chosen": 0.4990086555480957, |
|
"logits/rejected": 0.616361141204834, |
|
"logps/chosen": -0.8005359768867493, |
|
"logps/rejected": -0.9603840708732605, |
|
"loss": 0.8398, |
|
"odds_ratio_loss": 0.6354148387908936, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.040026795119047165, |
|
"rewards/margins": 0.007992411032319069, |
|
"rewards/rejected": -0.04801920801401138, |
|
"sft_loss": 0.8005359768867493, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0317936412717457, |
|
"grad_norm": 2.2002987962167904, |
|
"learning_rate": 3.779423437715274e-06, |
|
"logits/chosen": 0.7601526975631714, |
|
"logits/rejected": 0.8180352449417114, |
|
"logps/chosen": -0.6671024560928345, |
|
"logps/rejected": -0.9577730298042297, |
|
"loss": 0.7742, |
|
"odds_ratio_loss": 0.5807942152023315, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03335512429475784, |
|
"rewards/margins": 0.014533529989421368, |
|
"rewards/rejected": -0.047888655215501785, |
|
"sft_loss": 0.6671024560928345, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0557888422315538, |
|
"grad_norm": 1.5148819350515028, |
|
"learning_rate": 3.7240634137542864e-06, |
|
"logits/chosen": 0.19566980004310608, |
|
"logits/rejected": 0.3528198003768921, |
|
"logps/chosen": -0.6874720454216003, |
|
"logps/rejected": -1.0558958053588867, |
|
"loss": 0.7663, |
|
"odds_ratio_loss": 0.48211669921875, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.034373603761196136, |
|
"rewards/margins": 0.01842118799686432, |
|
"rewards/rejected": -0.052794791758060455, |
|
"sft_loss": 0.6874720454216003, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0797840431913617, |
|
"grad_norm": 1.6130353172110996, |
|
"learning_rate": 3.6679022962299054e-06, |
|
"logits/chosen": 0.8750432133674622, |
|
"logits/rejected": 0.8553866147994995, |
|
"logps/chosen": -0.7515122890472412, |
|
"logps/rejected": -0.9563247561454773, |
|
"loss": 0.7745, |
|
"odds_ratio_loss": 0.5920617580413818, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.037575613707304, |
|
"rewards/margins": 0.010240620002150536, |
|
"rewards/rejected": -0.047816235572099686, |
|
"sft_loss": 0.7515122890472412, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1037792441511698, |
|
"grad_norm": 1.8444047185661667, |
|
"learning_rate": 3.6109768400269336e-06, |
|
"logits/chosen": 0.21664266288280487, |
|
"logits/rejected": 0.3455556333065033, |
|
"logps/chosen": -0.7820109128952026, |
|
"logps/rejected": -1.1722263097763062, |
|
"loss": 0.7949, |
|
"odds_ratio_loss": 0.5249099731445312, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03910055011510849, |
|
"rewards/margins": 0.019510772079229355, |
|
"rewards/rejected": -0.05861131474375725, |
|
"sft_loss": 0.7820109128952026, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.127774445110978, |
|
"grad_norm": 1.923809039800638, |
|
"learning_rate": 3.5533243002549044e-06, |
|
"logits/chosen": -0.051299355924129486, |
|
"logits/rejected": 0.12599964439868927, |
|
"logps/chosen": -0.6766480803489685, |
|
"logps/rejected": -0.9556339979171753, |
|
"loss": 0.769, |
|
"odds_ratio_loss": 0.5771059989929199, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03383240848779678, |
|
"rewards/margins": 0.013949294574558735, |
|
"rewards/rejected": -0.047781698405742645, |
|
"sft_loss": 0.6766480803489685, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.1517696460707858, |
|
"grad_norm": 2.0416324249302593, |
|
"learning_rate": 3.4949824078663214e-06, |
|
"logits/chosen": 0.3260158598423004, |
|
"logits/rejected": 0.4627075791358948, |
|
"logps/chosen": -0.6955934762954712, |
|
"logps/rejected": -1.0405316352844238, |
|
"loss": 0.7744, |
|
"odds_ratio_loss": 0.5207543969154358, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.03477967530488968, |
|
"rewards/margins": 0.017246905714273453, |
|
"rewards/rejected": -0.05202658101916313, |
|
"sft_loss": 0.6955934762954712, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.175764847030594, |
|
"grad_norm": 2.159701142475688, |
|
"learning_rate": 3.4359893449634713e-06, |
|
"logits/chosen": 0.10285909473896027, |
|
"logits/rejected": 0.18586108088493347, |
|
"logps/chosen": -0.7835036516189575, |
|
"logps/rejected": -0.9662873148918152, |
|
"loss": 0.7699, |
|
"odds_ratio_loss": 0.6257883310317993, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03917517885565758, |
|
"rewards/margins": 0.009139184840023518, |
|
"rewards/rejected": -0.04831436648964882, |
|
"sft_loss": 0.7835036516189575, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1997600479904018, |
|
"grad_norm": 1.905386181833648, |
|
"learning_rate": 3.3763837198099807e-06, |
|
"logits/chosen": 0.2618166208267212, |
|
"logits/rejected": 0.403994083404541, |
|
"logps/chosen": -0.7472913861274719, |
|
"logps/rejected": -0.9723391532897949, |
|
"loss": 0.8034, |
|
"odds_ratio_loss": 0.5758217573165894, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03736456483602524, |
|
"rewards/margins": 0.011252395808696747, |
|
"rewards/rejected": -0.048616960644721985, |
|
"sft_loss": 0.7472913861274719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.22375524895021, |
|
"grad_norm": 1.8483335773730425, |
|
"learning_rate": 3.3162045415634793e-06, |
|
"logits/chosen": -0.06936601549386978, |
|
"logits/rejected": 0.15932008624076843, |
|
"logps/chosen": -0.7298214435577393, |
|
"logps/rejected": -0.989848792552948, |
|
"loss": 0.764, |
|
"odds_ratio_loss": 0.5586143136024475, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.036491066217422485, |
|
"rewards/margins": 0.013001373037695885, |
|
"rewards/rejected": -0.04949244111776352, |
|
"sft_loss": 0.7298214435577393, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.247750449910018, |
|
"grad_norm": 1.4105189905656275, |
|
"learning_rate": 3.255491194745878e-06, |
|
"logits/chosen": -0.0699717178940773, |
|
"logits/rejected": 0.11926586925983429, |
|
"logps/chosen": -0.7712666988372803, |
|
"logps/rejected": -1.0007984638214111, |
|
"loss": 0.7514, |
|
"odds_ratio_loss": 0.576269805431366, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03856333717703819, |
|
"rewards/margins": 0.011476586572825909, |
|
"rewards/rejected": -0.050039924681186676, |
|
"sft_loss": 0.7712666988372803, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2717456508698262, |
|
"grad_norm": 1.5086406745902339, |
|
"learning_rate": 3.1942834134680123e-06, |
|
"logits/chosen": -0.4110763669013977, |
|
"logits/rejected": -0.197097510099411, |
|
"logps/chosen": -0.7337836027145386, |
|
"logps/rejected": -1.0581499338150024, |
|
"loss": 0.747, |
|
"odds_ratio_loss": 0.5731949806213379, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03668918460607529, |
|
"rewards/margins": 0.016218315809965134, |
|
"rewards/rejected": -0.05290750414133072, |
|
"sft_loss": 0.7337836027145386, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.295740851829634, |
|
"grad_norm": 2.007767969966132, |
|
"learning_rate": 3.13262125542547e-06, |
|
"logits/chosen": 0.24464428424835205, |
|
"logits/rejected": 0.42607539892196655, |
|
"logps/chosen": -0.8008230328559875, |
|
"logps/rejected": -1.019913911819458, |
|
"loss": 0.7839, |
|
"odds_ratio_loss": 0.5772299766540527, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04004114866256714, |
|
"rewards/margins": 0.010954543016850948, |
|
"rewards/rejected": -0.05099569633603096, |
|
"sft_loss": 0.8008230328559875, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.3197360527894422, |
|
"grad_norm": 2.031522996603775, |
|
"learning_rate": 3.0705450756826707e-06, |
|
"logits/chosen": -0.6761570572853088, |
|
"logits/rejected": -0.5336428880691528, |
|
"logps/chosen": -0.7791737914085388, |
|
"logps/rejected": -0.9758432507514954, |
|
"loss": 0.7734, |
|
"odds_ratio_loss": 0.5955380201339722, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03895869478583336, |
|
"rewards/margins": 0.009833470918238163, |
|
"rewards/rejected": -0.04879216477274895, |
|
"sft_loss": 0.7791737914085388, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.34373125374925, |
|
"grad_norm": 1.8127230145286217, |
|
"learning_rate": 3.00809550026231e-06, |
|
"logits/chosen": 0.7122937440872192, |
|
"logits/rejected": 0.8374090194702148, |
|
"logps/chosen": -0.7448546290397644, |
|
"logps/rejected": -1.0183660984039307, |
|
"loss": 0.7313, |
|
"odds_ratio_loss": 0.5605376362800598, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03724273294210434, |
|
"rewards/margins": 0.01367556769400835, |
|
"rewards/rejected": -0.050918303430080414, |
|
"sft_loss": 0.7448546290397644, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3677264547090582, |
|
"grad_norm": 1.6102410365866324, |
|
"learning_rate": 2.9453133995574955e-06, |
|
"logits/chosen": 0.1695878505706787, |
|
"logits/rejected": 0.34987810254096985, |
|
"logps/chosen": -0.7041548490524292, |
|
"logps/rejected": -1.1295292377471924, |
|
"loss": 0.7529, |
|
"odds_ratio_loss": 0.5541011095046997, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03520774096250534, |
|
"rewards/margins": 0.02126871421933174, |
|
"rewards/rejected": -0.05647646263241768, |
|
"sft_loss": 0.7041548490524292, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.3917216556688663, |
|
"grad_norm": 2.0516481147792964, |
|
"learning_rate": 2.8822398615839337e-06, |
|
"logits/chosen": -0.15236589312553406, |
|
"logits/rejected": 0.005555987358093262, |
|
"logps/chosen": -0.7019264698028564, |
|
"logps/rejected": -0.9463084936141968, |
|
"loss": 0.7377, |
|
"odds_ratio_loss": 0.5546727180480957, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03509632498025894, |
|
"rewards/margins": 0.012219103053212166, |
|
"rewards/rejected": -0.04731542617082596, |
|
"sft_loss": 0.7019264698028564, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.4157168566286742, |
|
"grad_norm": 2.5703275268486463, |
|
"learning_rate": 2.8189161650897045e-06, |
|
"logits/chosen": 0.09915417432785034, |
|
"logits/rejected": 0.2876579761505127, |
|
"logps/chosen": -0.7416352033615112, |
|
"logps/rejected": -0.9542354345321655, |
|
"loss": 0.7748, |
|
"odds_ratio_loss": 0.5765627026557922, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0370817631483078, |
|
"rewards/margins": 0.010630009695887566, |
|
"rewards/rejected": -0.04771176725625992, |
|
"sft_loss": 0.7416352033615112, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.4397120575884823, |
|
"grad_norm": 1.6574957139548097, |
|
"learning_rate": 2.7553837525402095e-06, |
|
"logits/chosen": 0.14950448274612427, |
|
"logits/rejected": 0.14670611917972565, |
|
"logps/chosen": -0.7459922432899475, |
|
"logps/rejected": -0.9438718557357788, |
|
"loss": 0.764, |
|
"odds_ratio_loss": 0.6029990911483765, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.037299610674381256, |
|
"rewards/margins": 0.009893985465168953, |
|
"rewards/rejected": -0.04719359427690506, |
|
"sft_loss": 0.7459922432899475, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4637072585482904, |
|
"grad_norm": 1.5955732799355493, |
|
"learning_rate": 2.691684202995966e-06, |
|
"logits/chosen": 0.43530672788619995, |
|
"logits/rejected": 0.4994083344936371, |
|
"logps/chosen": -0.8142836689949036, |
|
"logps/rejected": -0.9706009030342102, |
|
"loss": 0.7559, |
|
"odds_ratio_loss": 0.7006958723068237, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04071418568491936, |
|
"rewards/margins": 0.007815859280526638, |
|
"rewards/rejected": -0.04853004962205887, |
|
"sft_loss": 0.8142836689949036, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.4877024595080983, |
|
"grad_norm": 1.9589861397245603, |
|
"learning_rate": 2.6278592049010204e-06, |
|
"logits/chosen": -0.19675548374652863, |
|
"logits/rejected": -0.004504656884819269, |
|
"logps/chosen": -0.7537368535995483, |
|
"logps/rejected": -1.0135046243667603, |
|
"loss": 0.7741, |
|
"odds_ratio_loss": 0.5691729187965393, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03768684342503548, |
|
"rewards/margins": 0.012988388538360596, |
|
"rewards/rejected": -0.050675224512815475, |
|
"sft_loss": 0.7537368535995483, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.5116976604679064, |
|
"grad_norm": 1.7255875955000524, |
|
"learning_rate": 2.5639505287997584e-06, |
|
"logits/chosen": 0.3145737051963806, |
|
"logits/rejected": 0.47394928336143494, |
|
"logps/chosen": -0.7314926385879517, |
|
"logps/rejected": -1.001952886581421, |
|
"loss": 0.7829, |
|
"odds_ratio_loss": 0.5629433393478394, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03657463565468788, |
|
"rewards/margins": 0.013523015193641186, |
|
"rewards/rejected": -0.050097644329071045, |
|
"sft_loss": 0.7314926385879517, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.5356928614277146, |
|
"grad_norm": 2.504847023988975, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": 0.2320265769958496, |
|
"logits/rejected": 0.3284027874469757, |
|
"logps/chosen": -0.7656562924385071, |
|
"logps/rejected": -1.076923131942749, |
|
"loss": 0.7503, |
|
"odds_ratio_loss": 0.584337592124939, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.038282815366983414, |
|
"rewards/margins": 0.015563338994979858, |
|
"rewards/rejected": -0.053846150636672974, |
|
"sft_loss": 0.7656562924385071, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.5596880623875224, |
|
"grad_norm": 1.4394266237384084, |
|
"learning_rate": 2.436049471200242e-06, |
|
"logits/chosen": -0.5206400156021118, |
|
"logits/rejected": -0.38631540536880493, |
|
"logps/chosen": -0.8094362020492554, |
|
"logps/rejected": -0.9923938512802124, |
|
"loss": 0.7752, |
|
"odds_ratio_loss": 0.5967071056365967, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04047181457281113, |
|
"rewards/margins": 0.00914788618683815, |
|
"rewards/rejected": -0.04961969703435898, |
|
"sft_loss": 0.8094362020492554, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5836832633473306, |
|
"grad_norm": 1.7625452374002906, |
|
"learning_rate": 2.3721407950989804e-06, |
|
"logits/chosen": -0.24351301789283752, |
|
"logits/rejected": -0.07003232091665268, |
|
"logps/chosen": -0.6876959800720215, |
|
"logps/rejected": -0.9035342335700989, |
|
"loss": 0.7734, |
|
"odds_ratio_loss": 0.5917103290557861, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.034384798258543015, |
|
"rewards/margins": 0.010791914537549019, |
|
"rewards/rejected": -0.045176707208156586, |
|
"sft_loss": 0.6876959800720215, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.6076784643071385, |
|
"grad_norm": 1.6046093499190943, |
|
"learning_rate": 2.3083157970040344e-06, |
|
"logits/chosen": 0.5633162260055542, |
|
"logits/rejected": 0.6462755799293518, |
|
"logps/chosen": -0.7524802684783936, |
|
"logps/rejected": -1.0558850765228271, |
|
"loss": 0.7563, |
|
"odds_ratio_loss": 0.552274227142334, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03762401267886162, |
|
"rewards/margins": 0.015170246362686157, |
|
"rewards/rejected": -0.05279426649212837, |
|
"sft_loss": 0.7524802684783936, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.6316736652669466, |
|
"grad_norm": 2.117352018263469, |
|
"learning_rate": 2.2446162474597913e-06, |
|
"logits/chosen": 0.43944865465164185, |
|
"logits/rejected": 0.5002392530441284, |
|
"logps/chosen": -0.7501770257949829, |
|
"logps/rejected": -0.9691005945205688, |
|
"loss": 0.7699, |
|
"odds_ratio_loss": 0.5791727304458618, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.037508852779865265, |
|
"rewards/margins": 0.010946177877485752, |
|
"rewards/rejected": -0.04845503345131874, |
|
"sft_loss": 0.7501770257949829, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6556688662267547, |
|
"grad_norm": 1.6685249776962552, |
|
"learning_rate": 2.1810838349102963e-06, |
|
"logits/chosen": 0.16153453290462494, |
|
"logits/rejected": 0.20878514647483826, |
|
"logps/chosen": -0.7516240477561951, |
|
"logps/rejected": -1.0250643491744995, |
|
"loss": 0.7666, |
|
"odds_ratio_loss": 0.5872852206230164, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03758120536804199, |
|
"rewards/margins": 0.013672016561031342, |
|
"rewards/rejected": -0.051253218203783035, |
|
"sft_loss": 0.7516240477561951, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.6796640671865628, |
|
"grad_norm": 2.782782057649718, |
|
"learning_rate": 2.117760138416067e-06, |
|
"logits/chosen": 0.24376201629638672, |
|
"logits/rejected": 0.44258540868759155, |
|
"logps/chosen": -0.6985687017440796, |
|
"logps/rejected": -1.0050299167633057, |
|
"loss": 0.7614, |
|
"odds_ratio_loss": 0.543103814125061, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03492843732237816, |
|
"rewards/margins": 0.015323063358664513, |
|
"rewards/rejected": -0.05025150254368782, |
|
"sft_loss": 0.6985687017440796, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7036592681463707, |
|
"grad_norm": 1.5369658154698735, |
|
"learning_rate": 2.0546866004425053e-06, |
|
"logits/chosen": 0.3964254558086395, |
|
"logits/rejected": 0.4900701642036438, |
|
"logps/chosen": -0.7590494155883789, |
|
"logps/rejected": -1.2440413236618042, |
|
"loss": 0.7652, |
|
"odds_ratio_loss": 0.5372438430786133, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.037952471524477005, |
|
"rewards/margins": 0.024249596521258354, |
|
"rewards/rejected": -0.06220207363367081, |
|
"sft_loss": 0.7590494155883789, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.7276544691061788, |
|
"grad_norm": 1.9970193945029362, |
|
"learning_rate": 1.9919044997376906e-06, |
|
"logits/chosen": 0.6031176447868347, |
|
"logits/rejected": 0.7783833742141724, |
|
"logps/chosen": -0.7290822267532349, |
|
"logps/rejected": -1.021554946899414, |
|
"loss": 0.7176, |
|
"odds_ratio_loss": 0.557815432548523, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03645411133766174, |
|
"rewards/margins": 0.014623639173805714, |
|
"rewards/rejected": -0.051077745854854584, |
|
"sft_loss": 0.7290822267532349, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7516496700659867, |
|
"grad_norm": 2.558147455560064, |
|
"learning_rate": 1.9294549243173306e-06, |
|
"logits/chosen": -0.027294237166643143, |
|
"logits/rejected": 0.11035363376140594, |
|
"logps/chosen": -0.7765438556671143, |
|
"logps/rejected": -1.0300321578979492, |
|
"loss": 0.7771, |
|
"odds_ratio_loss": 0.5954040884971619, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03882719203829765, |
|
"rewards/margins": 0.012674416415393353, |
|
"rewards/rejected": -0.05150160938501358, |
|
"sft_loss": 0.7765438556671143, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.7756448710257948, |
|
"grad_norm": 2.346615273317464, |
|
"learning_rate": 1.8673787445745298e-06, |
|
"logits/chosen": -0.449845552444458, |
|
"logits/rejected": -0.3746832311153412, |
|
"logps/chosen": -0.7114017605781555, |
|
"logps/rejected": -0.928491473197937, |
|
"loss": 0.7699, |
|
"odds_ratio_loss": 0.5795110464096069, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.035570088773965836, |
|
"rewards/margins": 0.010854486376047134, |
|
"rewards/rejected": -0.04642457515001297, |
|
"sft_loss": 0.7114017605781555, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.799640071985603, |
|
"grad_norm": 1.995371230537378, |
|
"learning_rate": 1.805716586531988e-06, |
|
"logits/chosen": -0.13443303108215332, |
|
"logits/rejected": 0.014731263741850853, |
|
"logps/chosen": -0.8079891204833984, |
|
"logps/rejected": -1.0810317993164062, |
|
"loss": 0.7825, |
|
"odds_ratio_loss": 0.6112096309661865, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0403994545340538, |
|
"rewards/margins": 0.013652140274643898, |
|
"rewards/rejected": -0.05405158922076225, |
|
"sft_loss": 0.8079891204833984, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.823635272945411, |
|
"grad_norm": 1.8742057389590454, |
|
"learning_rate": 1.7445088052541218e-06, |
|
"logits/chosen": 0.046121031045913696, |
|
"logits/rejected": 0.1955467015504837, |
|
"logps/chosen": -0.7093559503555298, |
|
"logps/rejected": -1.0484099388122559, |
|
"loss": 0.7617, |
|
"odds_ratio_loss": 0.5657014846801758, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03546779602766037, |
|
"rewards/margins": 0.016952697187662125, |
|
"rewards/rejected": -0.05242049694061279, |
|
"sft_loss": 0.7093559503555298, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.847630473905219, |
|
"grad_norm": 1.2680203881504901, |
|
"learning_rate": 1.6837954584365217e-06, |
|
"logits/chosen": 0.4459083080291748, |
|
"logits/rejected": 0.5636454224586487, |
|
"logps/chosen": -0.7526987195014954, |
|
"logps/rejected": -1.009804606437683, |
|
"loss": 0.7871, |
|
"odds_ratio_loss": 0.5556772947311401, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03763493150472641, |
|
"rewards/margins": 0.012855296023190022, |
|
"rewards/rejected": -0.050490230321884155, |
|
"sft_loss": 0.7526987195014954, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.8716256748650268, |
|
"grad_norm": 1.9254646582677224, |
|
"learning_rate": 1.6236162801900191e-06, |
|
"logits/chosen": -0.10451897233724594, |
|
"logits/rejected": 0.3060254156589508, |
|
"logps/chosen": -0.6585639715194702, |
|
"logps/rejected": -0.9869001507759094, |
|
"loss": 0.71, |
|
"odds_ratio_loss": 0.4942260682582855, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03292820230126381, |
|
"rewards/margins": 0.016416804865002632, |
|
"rewards/rejected": -0.04934500530362129, |
|
"sft_loss": 0.6585639715194702, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.895620875824835, |
|
"grad_norm": 1.9904836511656812, |
|
"learning_rate": 1.5640106550365298e-06, |
|
"logits/chosen": 0.11656351387500763, |
|
"logits/rejected": 0.29824742674827576, |
|
"logps/chosen": -0.7831540703773499, |
|
"logps/rejected": -1.0284688472747803, |
|
"loss": 0.7758, |
|
"odds_ratio_loss": 0.5839165449142456, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03915770351886749, |
|
"rewards/margins": 0.01226573996245861, |
|
"rewards/rejected": -0.051423441618680954, |
|
"sft_loss": 0.7831540703773499, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.919616076784643, |
|
"grad_norm": 1.7061927534288226, |
|
"learning_rate": 1.5050175921336797e-06, |
|
"logits/chosen": 0.14354857802391052, |
|
"logits/rejected": 0.27334246039390564, |
|
"logps/chosen": -0.7474446892738342, |
|
"logps/rejected": -0.9480558633804321, |
|
"loss": 0.7575, |
|
"odds_ratio_loss": 0.6441240310668945, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03737223893404007, |
|
"rewards/margins": 0.010030550882220268, |
|
"rewards/rejected": -0.04740279167890549, |
|
"sft_loss": 0.7474446892738342, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9436112777444512, |
|
"grad_norm": 2.251879648695612, |
|
"learning_rate": 1.446675699745097e-06, |
|
"logits/chosen": 0.25183239579200745, |
|
"logits/rejected": 0.38326969742774963, |
|
"logps/chosen": -0.7823570966720581, |
|
"logps/rejected": -0.9946805238723755, |
|
"loss": 0.8037, |
|
"odds_ratio_loss": 0.6080455183982849, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03911786153912544, |
|
"rewards/margins": 0.010616169311106205, |
|
"rewards/rejected": -0.049734026193618774, |
|
"sft_loss": 0.7823570966720581, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.9676064787042593, |
|
"grad_norm": 1.9391362449031262, |
|
"learning_rate": 1.3890231599730674e-06, |
|
"logits/chosen": 0.31725913286209106, |
|
"logits/rejected": 0.5106421709060669, |
|
"logps/chosen": -0.7221857309341431, |
|
"logps/rejected": -0.9829575419425964, |
|
"loss": 0.7904, |
|
"odds_ratio_loss": 0.5538625121116638, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03610928729176521, |
|
"rewards/margins": 0.013038587756454945, |
|
"rewards/rejected": -0.049147870391607285, |
|
"sft_loss": 0.7221857309341431, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.9916016796640672, |
|
"grad_norm": 1.5457295502049215, |
|
"learning_rate": 1.3320977037700952e-06, |
|
"logits/chosen": 0.8291665315628052, |
|
"logits/rejected": 1.1122350692749023, |
|
"logps/chosen": -0.6864774227142334, |
|
"logps/rejected": -1.0247427225112915, |
|
"loss": 0.7452, |
|
"odds_ratio_loss": 0.49447354674339294, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03432386741042137, |
|
"rewards/margins": 0.016913266852498055, |
|
"rewards/rejected": -0.051237136125564575, |
|
"sft_loss": 0.6864774227142334, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.015596880623875, |
|
"grad_norm": 1.5016852289986733, |
|
"learning_rate": 1.2759365862457148e-06, |
|
"logits/chosen": -0.4956502318382263, |
|
"logits/rejected": -0.1621031016111374, |
|
"logps/chosen": -0.7308815717697144, |
|
"logps/rejected": -0.9828909039497375, |
|
"loss": 0.7173, |
|
"odds_ratio_loss": 0.5487710237503052, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0365440808236599, |
|
"rewards/margins": 0.012600463815033436, |
|
"rewards/rejected": -0.049144547432661057, |
|
"sft_loss": 0.7308815717697144, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.039592081583683, |
|
"grad_norm": 1.622924065562837, |
|
"learning_rate": 1.2205765622847273e-06, |
|
"logits/chosen": -0.12397761642932892, |
|
"logits/rejected": 0.08023932576179504, |
|
"logps/chosen": -0.6277745962142944, |
|
"logps/rejected": -1.0955206155776978, |
|
"loss": 0.6995, |
|
"odds_ratio_loss": 0.4475070536136627, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03138873726129532, |
|
"rewards/margins": 0.023387301713228226, |
|
"rewards/rejected": -0.054776035249233246, |
|
"sft_loss": 0.6277745962142944, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0635872825434913, |
|
"grad_norm": 1.4741935497367946, |
|
"learning_rate": 1.1660538624928062e-06, |
|
"logits/chosen": -0.3639386296272278, |
|
"logits/rejected": -0.2011258602142334, |
|
"logps/chosen": -0.6642920970916748, |
|
"logps/rejected": -1.0270217657089233, |
|
"loss": 0.7019, |
|
"odds_ratio_loss": 0.4971997141838074, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03321460261940956, |
|
"rewards/margins": 0.018136484548449516, |
|
"rewards/rejected": -0.05135108903050423, |
|
"sft_loss": 0.6642920970916748, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.0875824835032994, |
|
"grad_norm": 1.7172174730539993, |
|
"learning_rate": 1.112404169485226e-06, |
|
"logits/chosen": -0.3923923075199127, |
|
"logits/rejected": -0.10327514261007309, |
|
"logps/chosen": -0.5645719766616821, |
|
"logps/rejected": -1.071115255355835, |
|
"loss": 0.6681, |
|
"odds_ratio_loss": 0.42052555084228516, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.028228599578142166, |
|
"rewards/margins": 0.025327179580926895, |
|
"rewards/rejected": -0.053555767983198166, |
|
"sft_loss": 0.5645719766616821, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.1115776844631076, |
|
"grad_norm": 1.1474314844125568, |
|
"learning_rate": 1.0596625945342148e-06, |
|
"logits/chosen": -0.008033117279410362, |
|
"logits/rejected": 0.16419892013072968, |
|
"logps/chosen": -0.7100299000740051, |
|
"logps/rejected": -0.9733055233955383, |
|
"loss": 0.6813, |
|
"odds_ratio_loss": 0.5328400731086731, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03550150245428085, |
|
"rewards/margins": 0.013163777068257332, |
|
"rewards/rejected": -0.048665277659893036, |
|
"sft_loss": 0.7100299000740051, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.1355728854229152, |
|
"grad_norm": 2.1383619388719515, |
|
"learning_rate": 1.0078636545902363e-06, |
|
"logits/chosen": -0.4247666001319885, |
|
"logits/rejected": -0.17631380259990692, |
|
"logps/chosen": -0.6582883596420288, |
|
"logps/rejected": -1.0547147989273071, |
|
"loss": 0.6895, |
|
"odds_ratio_loss": 0.47398701310157776, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0329144187271595, |
|
"rewards/margins": 0.019821325317025185, |
|
"rewards/rejected": -0.05273573845624924, |
|
"sft_loss": 0.6582883596420288, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.1595680863827234, |
|
"grad_norm": 1.5320300236939732, |
|
"learning_rate": 9.570412496922198e-07, |
|
"logits/chosen": -0.27953624725341797, |
|
"logits/rejected": -0.08715387433767319, |
|
"logps/chosen": -0.5965186357498169, |
|
"logps/rejected": -1.154284119606018, |
|
"loss": 0.6738, |
|
"odds_ratio_loss": 0.4240815043449402, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.029825935140252113, |
|
"rewards/margins": 0.02788827195763588, |
|
"rewards/rejected": -0.05771421268582344, |
|
"sft_loss": 0.5965186357498169, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1835632873425315, |
|
"grad_norm": 1.6204787225170885, |
|
"learning_rate": 9.07228640781539e-07, |
|
"logits/chosen": 0.368365079164505, |
|
"logits/rejected": 0.6101259589195251, |
|
"logps/chosen": -0.6893322467803955, |
|
"logps/rejected": -1.0903311967849731, |
|
"loss": 0.6791, |
|
"odds_ratio_loss": 0.4818887710571289, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.03446660935878754, |
|
"rewards/margins": 0.02004995197057724, |
|
"rewards/rejected": -0.054516565054655075, |
|
"sft_loss": 0.6893322467803955, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.2075584883023396, |
|
"grad_norm": 1.290844558254926, |
|
"learning_rate": 8.584584279342392e-07, |
|
"logits/chosen": -0.16083380579948425, |
|
"logits/rejected": -0.10739579051733017, |
|
"logps/chosen": -0.6938862800598145, |
|
"logps/rejected": -0.9513536691665649, |
|
"loss": 0.6888, |
|
"odds_ratio_loss": 0.5428452491760254, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.034694310277700424, |
|
"rewards/margins": 0.012873371131718159, |
|
"rewards/rejected": -0.047567687928676605, |
|
"sft_loss": 0.6938862800598145, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.2315536892621477, |
|
"grad_norm": 1.5229766148545818, |
|
"learning_rate": 8.10762529025782e-07, |
|
"logits/chosen": -0.4659739136695862, |
|
"logits/rejected": -0.4786594808101654, |
|
"logps/chosen": -0.6584521532058716, |
|
"logps/rejected": -0.8917843699455261, |
|
"loss": 0.65, |
|
"odds_ratio_loss": 0.5486137866973877, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03292260691523552, |
|
"rewards/margins": 0.011666612699627876, |
|
"rewards/rejected": -0.044589221477508545, |
|
"sft_loss": 0.6584521532058716, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.255548890221956, |
|
"grad_norm": 1.7015940933867517, |
|
"learning_rate": 7.641721588422526e-07, |
|
"logits/chosen": -0.009342163801193237, |
|
"logits/rejected": 0.1280032843351364, |
|
"logps/chosen": -0.6387184262275696, |
|
"logps/rejected": -1.049140453338623, |
|
"loss": 0.687, |
|
"odds_ratio_loss": 0.4773840010166168, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0319359228014946, |
|
"rewards/margins": 0.020521100610494614, |
|
"rewards/rejected": -0.05245702341198921, |
|
"sft_loss": 0.6387184262275696, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.2795440911817635, |
|
"grad_norm": 1.4203319350991257, |
|
"learning_rate": 7.187178086517116e-07, |
|
"logits/chosen": 0.14468683302402496, |
|
"logits/rejected": 0.2608656883239746, |
|
"logps/chosen": -0.6514204144477844, |
|
"logps/rejected": -1.2591578960418701, |
|
"loss": 0.6695, |
|
"odds_ratio_loss": 0.455849826335907, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03257102146744728, |
|
"rewards/margins": 0.03038688376545906, |
|
"rewards/rejected": -0.06295789778232574, |
|
"sft_loss": 0.6514204144477844, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.3035392921415716, |
|
"grad_norm": 1.7783791010197938, |
|
"learning_rate": 6.74429226249049e-07, |
|
"logits/chosen": 0.09898465871810913, |
|
"logits/rejected": 0.21373791992664337, |
|
"logps/chosen": -0.6381307244300842, |
|
"logps/rejected": -0.9742431640625, |
|
"loss": 0.6712, |
|
"odds_ratio_loss": 0.49530988931655884, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03190653771162033, |
|
"rewards/margins": 0.016805628314614296, |
|
"rewards/rejected": -0.04871216416358948, |
|
"sft_loss": 0.6381307244300842, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.3275344931013797, |
|
"grad_norm": 1.6090454208525553, |
|
"learning_rate": 6.313353964874155e-07, |
|
"logits/chosen": 0.1333683431148529, |
|
"logits/rejected": 0.3417516350746155, |
|
"logps/chosen": -0.6887052655220032, |
|
"logps/rejected": -1.0016798973083496, |
|
"loss": 0.6673, |
|
"odds_ratio_loss": 0.5059822797775269, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03443526476621628, |
|
"rewards/margins": 0.01564873196184635, |
|
"rewards/rejected": -0.05008399486541748, |
|
"sft_loss": 0.6887052655220032, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.351529694061188, |
|
"grad_norm": 1.6382111002720514, |
|
"learning_rate": 5.894645223089584e-07, |
|
"logits/chosen": 0.7236309051513672, |
|
"logits/rejected": 0.8550646901130676, |
|
"logps/chosen": -0.6779772639274597, |
|
"logps/rejected": -1.2183148860931396, |
|
"loss": 0.6958, |
|
"odds_ratio_loss": 0.448292076587677, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.033898863941431046, |
|
"rewards/margins": 0.027016881853342056, |
|
"rewards/rejected": -0.0609157457947731, |
|
"sft_loss": 0.6779772639274597, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.375524895020996, |
|
"grad_norm": 1.680992010239421, |
|
"learning_rate": 5.48844006287289e-07, |
|
"logits/chosen": 0.12925365567207336, |
|
"logits/rejected": 0.3167954981327057, |
|
"logps/chosen": -0.6692675352096558, |
|
"logps/rejected": -1.0140740871429443, |
|
"loss": 0.6691, |
|
"odds_ratio_loss": 0.4763975143432617, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.033463381230831146, |
|
"rewards/margins": 0.01724032498896122, |
|
"rewards/rejected": -0.050703711807727814, |
|
"sft_loss": 0.6692675352096558, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.3995200959808036, |
|
"grad_norm": 1.544720546176764, |
|
"learning_rate": 5.095004326937445e-07, |
|
"logits/chosen": -0.4231066107749939, |
|
"logits/rejected": -0.20230142772197723, |
|
"logps/chosen": -0.6737790107727051, |
|
"logps/rejected": -1.0810075998306274, |
|
"loss": 0.6744, |
|
"odds_ratio_loss": 0.4769432544708252, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.033688947558403015, |
|
"rewards/margins": 0.02036142908036709, |
|
"rewards/rejected": -0.05405038595199585, |
|
"sft_loss": 0.6737790107727051, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4235152969406117, |
|
"grad_norm": 1.7400382431256138, |
|
"learning_rate": 4.71459550099202e-07, |
|
"logits/chosen": 0.2943962812423706, |
|
"logits/rejected": 0.5343393087387085, |
|
"logps/chosen": -0.6686779856681824, |
|
"logps/rejected": -1.0820672512054443, |
|
"loss": 0.7078, |
|
"odds_ratio_loss": 0.5010559558868408, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.03343390300869942, |
|
"rewards/margins": 0.020669464021921158, |
|
"rewards/rejected": -0.054103363305330276, |
|
"sft_loss": 0.6686779856681824, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.44751049790042, |
|
"grad_norm": 1.548219424075948, |
|
"learning_rate": 4.347462545228134e-07, |
|
"logits/chosen": 0.13567771017551422, |
|
"logits/rejected": 0.31968480348587036, |
|
"logps/chosen": -0.6244124174118042, |
|
"logps/rejected": -1.05476975440979, |
|
"loss": 0.6563, |
|
"odds_ratio_loss": 0.4984089732170105, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03122062422335148, |
|
"rewards/margins": 0.021517863497138023, |
|
"rewards/rejected": -0.052738480269908905, |
|
"sft_loss": 0.6244124174118042, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.471505698860228, |
|
"grad_norm": 1.4610216249122747, |
|
"learning_rate": 3.9938457313869914e-07, |
|
"logits/chosen": -0.08544759452342987, |
|
"logits/rejected": 0.07162941992282867, |
|
"logps/chosen": -0.7579829096794128, |
|
"logps/rejected": -1.1255767345428467, |
|
"loss": 0.6864, |
|
"odds_ratio_loss": 0.547897458076477, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03789914771914482, |
|
"rewards/margins": 0.01837969198822975, |
|
"rewards/rejected": -0.05627884343266487, |
|
"sft_loss": 0.7579829096794128, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.495500899820036, |
|
"grad_norm": 1.6006797776983446, |
|
"learning_rate": 3.6539764855126224e-07, |
|
"logits/chosen": -0.23340921103954315, |
|
"logits/rejected": -0.1814245879650116, |
|
"logps/chosen": -0.6439553499221802, |
|
"logps/rejected": -1.0276587009429932, |
|
"loss": 0.6617, |
|
"odds_ratio_loss": 0.5049816370010376, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03219776228070259, |
|
"rewards/margins": 0.019185172393918037, |
|
"rewards/rejected": -0.05138293653726578, |
|
"sft_loss": 0.6439553499221802, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.519496100779844, |
|
"grad_norm": 2.318524117790848, |
|
"learning_rate": 3.328077236494087e-07, |
|
"logits/chosen": -0.12850667536258698, |
|
"logits/rejected": 0.07032374292612076, |
|
"logps/chosen": -0.5922039747238159, |
|
"logps/rejected": -1.0730435848236084, |
|
"loss": 0.6694, |
|
"odds_ratio_loss": 0.43941235542297363, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.029610196128487587, |
|
"rewards/margins": 0.024041980504989624, |
|
"rewards/rejected": -0.05365217477083206, |
|
"sft_loss": 0.5922039747238159, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.5434913017396523, |
|
"grad_norm": 1.8087989245838814, |
|
"learning_rate": 3.0163612704959486e-07, |
|
"logits/chosen": -0.6611061692237854, |
|
"logits/rejected": -0.5293869376182556, |
|
"logps/chosen": -0.6281863451004028, |
|
"logps/rejected": -0.9944284558296204, |
|
"loss": 0.6705, |
|
"odds_ratio_loss": 0.47698038816452026, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03140931576490402, |
|
"rewards/margins": 0.018312102183699608, |
|
"rewards/rejected": -0.04972142353653908, |
|
"sft_loss": 0.6281863451004028, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.56748650269946, |
|
"grad_norm": 1.5444353690364836, |
|
"learning_rate": 2.71903259137222e-07, |
|
"logits/chosen": 0.411745548248291, |
|
"logits/rejected": 0.4236873686313629, |
|
"logps/chosen": -0.611006498336792, |
|
"logps/rejected": -1.0047032833099365, |
|
"loss": 0.672, |
|
"odds_ratio_loss": 0.48614612221717834, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03055032715201378, |
|
"rewards/margins": 0.019684839993715286, |
|
"rewards/rejected": -0.050235163420438766, |
|
"sft_loss": 0.611006498336792, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.591481703659268, |
|
"grad_norm": 2.593043127599419, |
|
"learning_rate": 2.436285787155185e-07, |
|
"logits/chosen": 0.316955029964447, |
|
"logits/rejected": 0.47285112738609314, |
|
"logps/chosen": -0.6786519885063171, |
|
"logps/rejected": -1.2019875049591064, |
|
"loss": 0.6881, |
|
"odds_ratio_loss": 0.4908427298069, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03393259644508362, |
|
"rewards/margins": 0.026166772469878197, |
|
"rewards/rejected": -0.060099370777606964, |
|
"sft_loss": 0.6786519885063171, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.6154769046190762, |
|
"grad_norm": 2.2050381193088207, |
|
"learning_rate": 2.168305902706383e-07, |
|
"logits/chosen": -0.4541945457458496, |
|
"logits/rejected": -0.18702273070812225, |
|
"logps/chosen": -0.7026795148849487, |
|
"logps/rejected": -0.962356448173523, |
|
"loss": 0.6583, |
|
"odds_ratio_loss": 0.5365189909934998, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.035133976489305496, |
|
"rewards/margins": 0.012983846478164196, |
|
"rewards/rejected": -0.04811782017350197, |
|
"sft_loss": 0.7026795148849487, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.6394721055788843, |
|
"grad_norm": 1.6921175899136245, |
|
"learning_rate": 1.9152683186132476e-07, |
|
"logits/chosen": -0.4067768156528473, |
|
"logits/rejected": -0.3039708137512207, |
|
"logps/chosen": -0.6328436136245728, |
|
"logps/rejected": -1.12655770778656, |
|
"loss": 0.6919, |
|
"odds_ratio_loss": 0.4709090292453766, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.031642183661460876, |
|
"rewards/margins": 0.024685706943273544, |
|
"rewards/rejected": -0.05632789060473442, |
|
"sft_loss": 0.6328436136245728, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.663467306538692, |
|
"grad_norm": 1.5594348597838832, |
|
"learning_rate": 1.6773386364104972e-07, |
|
"logits/chosen": -0.1575368195772171, |
|
"logits/rejected": -0.003553843591362238, |
|
"logps/chosen": -0.6768941879272461, |
|
"logps/rejected": -1.032041072845459, |
|
"loss": 0.6913, |
|
"odds_ratio_loss": 0.50171959400177, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.033844709396362305, |
|
"rewards/margins": 0.017757344990968704, |
|
"rewards/rejected": -0.05160205811262131, |
|
"sft_loss": 0.6768941879272461, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.6874625074985, |
|
"grad_norm": 1.2735811398241894, |
|
"learning_rate": 1.4546725702015096e-07, |
|
"logits/chosen": 0.004650235176086426, |
|
"logits/rejected": 0.1661575585603714, |
|
"logps/chosen": -0.6541981101036072, |
|
"logps/rejected": -1.1094247102737427, |
|
"loss": 0.6669, |
|
"odds_ratio_loss": 0.4492813050746918, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.03270990774035454, |
|
"rewards/margins": 0.022761326283216476, |
|
"rewards/rejected": -0.055471230298280716, |
|
"sft_loss": 0.6541981101036072, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.7114577084583082, |
|
"grad_norm": 2.2135398834819715, |
|
"learning_rate": 1.24741584475056e-07, |
|
"logits/chosen": -0.07907108962535858, |
|
"logits/rejected": 0.08474680036306381, |
|
"logps/chosen": -0.6154497861862183, |
|
"logps/rejected": -1.0710924863815308, |
|
"loss": 0.6491, |
|
"odds_ratio_loss": 0.4509805142879486, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.030772492289543152, |
|
"rewards/margins": 0.022782133892178535, |
|
"rewards/rejected": -0.05355461686849594, |
|
"sft_loss": 0.6154497861862183, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.7354529094181164, |
|
"grad_norm": 1.5137426741255027, |
|
"learning_rate": 1.0557041001126145e-07, |
|
"logits/chosen": 0.3702402710914612, |
|
"logits/rejected": 0.6300150156021118, |
|
"logps/chosen": -0.5984182357788086, |
|
"logps/rejected": -1.115179419517517, |
|
"loss": 0.6191, |
|
"odds_ratio_loss": 0.41762223839759827, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0299209114164114, |
|
"rewards/margins": 0.025838062167167664, |
|
"rewards/rejected": -0.05575897544622421, |
|
"sft_loss": 0.5984182357788086, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.7594481103779245, |
|
"grad_norm": 1.565522436867544, |
|
"learning_rate": 8.796628028631321e-08, |
|
"logits/chosen": 0.17880654335021973, |
|
"logits/rejected": 0.1116660013794899, |
|
"logps/chosen": -0.6091745495796204, |
|
"logps/rejected": -1.0210378170013428, |
|
"loss": 0.6583, |
|
"odds_ratio_loss": 0.4544963836669922, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.030458729714155197, |
|
"rewards/margins": 0.02059316076338291, |
|
"rewards/rejected": -0.05105189234018326, |
|
"sft_loss": 0.6091745495796204, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7834433113377326, |
|
"grad_norm": 1.604017358081912, |
|
"learning_rate": 7.19407163985894e-08, |
|
"logits/chosen": -0.04378344863653183, |
|
"logits/rejected": 0.18321049213409424, |
|
"logps/chosen": -0.6626521348953247, |
|
"logps/rejected": -1.1215763092041016, |
|
"loss": 0.666, |
|
"odds_ratio_loss": 0.4741577208042145, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.033132605254650116, |
|
"rewards/margins": 0.022946210578083992, |
|
"rewards/rejected": -0.05607881397008896, |
|
"sft_loss": 0.6626521348953247, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.8074385122975407, |
|
"grad_norm": 1.4084206676302562, |
|
"learning_rate": 5.750420634727083e-08, |
|
"logits/chosen": -0.45710262656211853, |
|
"logits/rejected": -0.3050076961517334, |
|
"logps/chosen": -0.671418309211731, |
|
"logps/rejected": -1.1854102611541748, |
|
"loss": 0.6842, |
|
"odds_ratio_loss": 0.4368383288383484, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03357091173529625, |
|
"rewards/margins": 0.02569960430264473, |
|
"rewards/rejected": -0.05927051231265068, |
|
"sft_loss": 0.671418309211731, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.8314337132573484, |
|
"grad_norm": 1.3507137389822068, |
|
"learning_rate": 4.4666198168422656e-08, |
|
"logits/chosen": 0.33376216888427734, |
|
"logits/rejected": 0.41172194480895996, |
|
"logps/chosen": -0.6510582566261292, |
|
"logps/rejected": -1.0800405740737915, |
|
"loss": 0.6747, |
|
"odds_ratio_loss": 0.5277644395828247, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.032552916556596756, |
|
"rewards/margins": 0.021449116989970207, |
|
"rewards/rejected": -0.054002027958631516, |
|
"sft_loss": 0.6510582566261292, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.8554289142171565, |
|
"grad_norm": 1.6874037821147798, |
|
"learning_rate": 3.343509375168863e-08, |
|
"logits/chosen": 0.20301933586597443, |
|
"logits/rejected": 0.32382094860076904, |
|
"logps/chosen": -0.6405006647109985, |
|
"logps/rejected": -1.0241023302078247, |
|
"loss": 0.6718, |
|
"odds_ratio_loss": 0.48166948556900024, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03202503174543381, |
|
"rewards/margins": 0.019180091097950935, |
|
"rewards/rejected": -0.051205117255449295, |
|
"sft_loss": 0.6405006647109985, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.8794241151769646, |
|
"grad_norm": 1.6417139708130921, |
|
"learning_rate": 2.3818243341637293e-08, |
|
"logits/chosen": -0.3619822859764099, |
|
"logits/rejected": -0.15361133217811584, |
|
"logps/chosen": -0.6599988341331482, |
|
"logps/rejected": -1.098881483078003, |
|
"loss": 0.6565, |
|
"odds_ratio_loss": 0.456063449382782, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.03299994021654129, |
|
"rewards/margins": 0.021944135427474976, |
|
"rewards/rejected": -0.054944075644016266, |
|
"sft_loss": 0.6599988341331482, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.9034193161367727, |
|
"grad_norm": 1.648932215503252, |
|
"learning_rate": 1.5821940727361874e-08, |
|
"logits/chosen": -0.7362561821937561, |
|
"logits/rejected": -0.4996170997619629, |
|
"logps/chosen": -0.6824958920478821, |
|
"logps/rejected": -0.9969790577888489, |
|
"loss": 0.7067, |
|
"odds_ratio_loss": 0.5307115316390991, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.034124795347452164, |
|
"rewards/margins": 0.01572415977716446, |
|
"rewards/rejected": -0.049848951399326324, |
|
"sft_loss": 0.6824958920478821, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.927414517096581, |
|
"grad_norm": 1.7678674281978446, |
|
"learning_rate": 9.451419123484573e-09, |
|
"logits/chosen": -0.15318191051483154, |
|
"logits/rejected": 0.047946538776159286, |
|
"logps/chosen": -0.6560810804367065, |
|
"logps/rejected": -1.0658347606658936, |
|
"loss": 0.6692, |
|
"odds_ratio_loss": 0.5046226382255554, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.032804060727357864, |
|
"rewards/margins": 0.02048768661916256, |
|
"rewards/rejected": -0.053291745483875275, |
|
"sft_loss": 0.6560810804367065, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.9514097180563885, |
|
"grad_norm": 1.4413325593301094, |
|
"learning_rate": 4.710847745256209e-09, |
|
"logits/chosen": 0.12647075951099396, |
|
"logits/rejected": 0.2795228958129883, |
|
"logps/chosen": -0.6180914640426636, |
|
"logps/rejected": -1.0847346782684326, |
|
"loss": 0.6722, |
|
"odds_ratio_loss": 0.41623228788375854, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.030904576182365417, |
|
"rewards/margins": 0.02333216182887554, |
|
"rewards/rejected": -0.05423673242330551, |
|
"sft_loss": 0.6180914640426636, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.9754049190161966, |
|
"grad_norm": 1.5296676400661524, |
|
"learning_rate": 1.603329079994942e-09, |
|
"logits/chosen": -0.3425149619579315, |
|
"logits/rejected": -0.06856220215559006, |
|
"logps/chosen": -0.6569226980209351, |
|
"logps/rejected": -1.1020539999008179, |
|
"loss": 0.6649, |
|
"odds_ratio_loss": 0.4642546772956848, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03284613788127899, |
|
"rewards/margins": 0.02225656434893608, |
|
"rewards/rejected": -0.055102698504924774, |
|
"sft_loss": 0.6569226980209351, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.994601079784043, |
|
"step": 1248, |
|
"total_flos": 132590267662336.0, |
|
"train_loss": 0.7937506708579186, |
|
"train_runtime": 49781.9259, |
|
"train_samples_per_second": 1.205, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100.0, |
|
"total_flos": 132590267662336.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|