zephyr-7b / trainer_state.json
jikaixuan's picture
Model save
881e79b verified
raw
history blame
62.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.59375,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -2.1666858196258545,
"logits/rejected": -2.182244300842285,
"logps/chosen": -12.368609428405762,
"logps/rejected": -24.687644958496094,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 10.0
},
{
"epoch": 0.01,
"grad_norm": 0.60546875,
"learning_rate": 5.208333333333334e-07,
"logits/chosen": -2.2113068103790283,
"logits/rejected": -2.2719719409942627,
"logps/chosen": -57.57659149169922,
"logps/rejected": -65.19544219970703,
"loss": 0.693,
"pred_label": 0.0,
"rewards/accuracies": 0.2152777761220932,
"rewards/chosen": 0.001057142741046846,
"rewards/margins": 3.17241829179693e-05,
"rewards/rejected": 0.001025418401695788,
"step": 10,
"use_label": 90.0
},
{
"epoch": 0.02,
"grad_norm": 0.6796875,
"learning_rate": 1.0416666666666667e-06,
"logits/chosen": -2.243159770965576,
"logits/rejected": -2.2802278995513916,
"logps/chosen": -56.544715881347656,
"logps/rejected": -68.35901641845703,
"loss": 0.6924,
"pred_label": 0.0,
"rewards/accuracies": 0.22499999403953552,
"rewards/chosen": 0.006556531880050898,
"rewards/margins": 0.001379690133035183,
"rewards/rejected": 0.005176841747015715,
"step": 20,
"use_label": 242.0
},
{
"epoch": 0.03,
"grad_norm": 0.55078125,
"learning_rate": 1.5625e-06,
"logits/chosen": -2.2634024620056152,
"logits/rejected": -2.2475943565368652,
"logps/chosen": -53.98667526245117,
"logps/rejected": -67.89213562011719,
"loss": 0.692,
"pred_label": 0.0,
"rewards/accuracies": 0.2750000059604645,
"rewards/chosen": 0.01648966409265995,
"rewards/margins": 0.002599921775981784,
"rewards/rejected": 0.013889740221202374,
"step": 30,
"use_label": 402.0
},
{
"epoch": 0.04,
"grad_norm": 0.6328125,
"learning_rate": 2.0833333333333334e-06,
"logits/chosen": -2.2825467586517334,
"logits/rejected": -2.2754693031311035,
"logps/chosen": -55.582061767578125,
"logps/rejected": -66.59407043457031,
"loss": 0.6909,
"pred_label": 0.0,
"rewards/accuracies": 0.21250000596046448,
"rewards/chosen": 0.018406417220830917,
"rewards/margins": 0.0006764450808987021,
"rewards/rejected": 0.017729971557855606,
"step": 40,
"use_label": 562.0
},
{
"epoch": 0.05,
"grad_norm": 0.6015625,
"learning_rate": 2.604166666666667e-06,
"logits/chosen": -2.3444912433624268,
"logits/rejected": -2.3341281414031982,
"logps/chosen": -69.13630676269531,
"logps/rejected": -84.64376831054688,
"loss": 0.6889,
"pred_label": 0.0,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": 0.02657836303114891,
"rewards/margins": 0.005359734408557415,
"rewards/rejected": 0.021218623965978622,
"step": 50,
"use_label": 722.0
},
{
"epoch": 0.06,
"grad_norm": 0.72265625,
"learning_rate": 3.125e-06,
"logits/chosen": -2.3026936054229736,
"logits/rejected": -2.309264659881592,
"logps/chosen": -82.00704193115234,
"logps/rejected": -90.7305908203125,
"loss": 0.6874,
"pred_label": 0.0,
"rewards/accuracies": 0.34375,
"rewards/chosen": 0.03688042238354683,
"rewards/margins": 0.014220851473510265,
"rewards/rejected": 0.02265957184135914,
"step": 60,
"use_label": 882.0
},
{
"epoch": 0.07,
"grad_norm": 0.79296875,
"learning_rate": 3.6458333333333333e-06,
"logits/chosen": -2.344853401184082,
"logits/rejected": -2.3261306285858154,
"logps/chosen": -77.20336151123047,
"logps/rejected": -77.6347885131836,
"loss": 0.6851,
"pred_label": 0.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": 0.02531364932656288,
"rewards/margins": 0.01608472317457199,
"rewards/rejected": 0.009228924289345741,
"step": 70,
"use_label": 1042.0
},
{
"epoch": 0.08,
"grad_norm": 0.80078125,
"learning_rate": 4.166666666666667e-06,
"logits/chosen": -2.241945743560791,
"logits/rejected": -2.195178985595703,
"logps/chosen": -81.6376953125,
"logps/rejected": -89.05104064941406,
"loss": 0.6814,
"pred_label": 0.9750000238418579,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": 0.004142354242503643,
"rewards/margins": 0.025017932057380676,
"rewards/rejected": -0.02087557688355446,
"step": 80,
"use_label": 1201.0250244140625
},
{
"epoch": 0.09,
"grad_norm": 1.578125,
"learning_rate": 4.6875000000000004e-06,
"logits/chosen": -2.1907405853271484,
"logits/rejected": -2.232959270477295,
"logps/chosen": -62.31688690185547,
"logps/rejected": -80.38573455810547,
"loss": 0.6812,
"pred_label": 3.0999999046325684,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.012271342799067497,
"rewards/margins": 0.04507603123784065,
"rewards/rejected": -0.0573473684489727,
"step": 90,
"use_label": 1358.9000244140625
},
{
"epoch": 0.1,
"grad_norm": 0.796875,
"learning_rate": 4.9997324926814375e-06,
"logits/chosen": -2.132638454437256,
"logits/rejected": -2.0995519161224365,
"logps/chosen": -76.97563171386719,
"logps/rejected": -79.27615356445312,
"loss": 0.6818,
"pred_label": 7.150000095367432,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.02400936186313629,
"rewards/margins": 0.05036945268511772,
"rewards/rejected": -0.07437881827354431,
"step": 100,
"use_label": 1514.8499755859375
},
{
"epoch": 0.1,
"eval_logits/chosen": -2.097480297088623,
"eval_logits/rejected": -2.0663790702819824,
"eval_logps/chosen": -69.46318054199219,
"eval_logps/rejected": -80.35824584960938,
"eval_loss": 0.6813791394233704,
"eval_pred_label": 22.539682388305664,
"eval_rewards/accuracies": 0.3392857015132904,
"eval_rewards/chosen": -0.005626226309686899,
"eval_rewards/margins": 0.04397555813193321,
"eval_rewards/rejected": -0.04960178583860397,
"eval_runtime": 245.3242,
"eval_samples_per_second": 8.152,
"eval_steps_per_second": 0.257,
"eval_use_label": 1833.4603271484375,
"step": 100
},
{
"epoch": 0.12,
"grad_norm": 1.1171875,
"learning_rate": 4.996723692767927e-06,
"logits/chosen": -2.114673137664795,
"logits/rejected": -2.094468355178833,
"logps/chosen": -63.9236946105957,
"logps/rejected": -79.44518280029297,
"loss": 0.6827,
"pred_label": 34.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.02154584601521492,
"rewards/margins": 0.04528125748038292,
"rewards/rejected": -0.06682710349559784,
"step": 110,
"use_label": 2152.0
},
{
"epoch": 0.13,
"grad_norm": 1.0390625,
"learning_rate": 4.9903757462135984e-06,
"logits/chosen": -2.2926628589630127,
"logits/rejected": -2.177788257598877,
"logps/chosen": -83.48246002197266,
"logps/rejected": -97.60291290283203,
"loss": 0.683,
"pred_label": 44.67499923706055,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.0941522866487503,
"rewards/margins": 0.06425690650939941,
"rewards/rejected": -0.15840919315814972,
"step": 120,
"use_label": 2301.324951171875
},
{
"epoch": 0.14,
"grad_norm": 0.546875,
"learning_rate": 4.980697142834315e-06,
"logits/chosen": -2.0968613624572754,
"logits/rejected": -2.1124091148376465,
"logps/chosen": -66.370849609375,
"logps/rejected": -77.3319320678711,
"loss": 0.6845,
"pred_label": 57.57500076293945,
"rewards/accuracies": 0.2750000059604645,
"rewards/chosen": -0.07896758615970612,
"rewards/margins": 0.04609644412994385,
"rewards/rejected": -0.12506404519081116,
"step": 130,
"use_label": 2448.425048828125
},
{
"epoch": 0.15,
"grad_norm": 0.78515625,
"learning_rate": 4.967700826904229e-06,
"logits/chosen": -2.1041221618652344,
"logits/rejected": -2.138929843902588,
"logps/chosen": -68.11909484863281,
"logps/rejected": -90.16340637207031,
"loss": 0.6868,
"pred_label": 73.75,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.08846104890108109,
"rewards/margins": 0.0647779330611229,
"rewards/rejected": -0.15323898196220398,
"step": 140,
"use_label": 2592.25
},
{
"epoch": 0.16,
"grad_norm": 1.1015625,
"learning_rate": 4.951404179843963e-06,
"logits/chosen": -2.1765952110290527,
"logits/rejected": -2.125175714492798,
"logps/chosen": -54.37804412841797,
"logps/rejected": -58.982269287109375,
"loss": 0.6809,
"pred_label": 91.3499984741211,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.06883221119642258,
"rewards/margins": 0.06803621351718903,
"rewards/rejected": -0.136868417263031,
"step": 150,
"use_label": 2734.64990234375
},
{
"epoch": 0.17,
"grad_norm": 1.03125,
"learning_rate": 4.931828996974498e-06,
"logits/chosen": -2.2455694675445557,
"logits/rejected": -2.213240623474121,
"logps/chosen": -94.4081802368164,
"logps/rejected": -107.48802185058594,
"loss": 0.6857,
"pred_label": 115.55000305175781,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.12804970145225525,
"rewards/margins": 0.12874242663383484,
"rewards/rejected": -0.2567921280860901,
"step": 160,
"use_label": 2870.449951171875
},
{
"epoch": 0.18,
"grad_norm": 1.1875,
"learning_rate": 4.909001458367867e-06,
"logits/chosen": -2.1201233863830566,
"logits/rejected": -2.0822367668151855,
"logps/chosen": -75.75311279296875,
"logps/rejected": -87.55944061279297,
"loss": 0.6869,
"pred_label": 141.85000610351562,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.1179669052362442,
"rewards/margins": 0.09383226186037064,
"rewards/rejected": -0.21179917454719543,
"step": 170,
"use_label": 3004.14990234375
},
{
"epoch": 0.19,
"grad_norm": 1.4296875,
"learning_rate": 4.882952093833628e-06,
"logits/chosen": -2.1013779640197754,
"logits/rejected": -2.121537685394287,
"logps/chosen": -70.6474838256836,
"logps/rejected": -89.79743957519531,
"loss": 0.685,
"pred_label": 161.3249969482422,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.08145526796579361,
"rewards/margins": 0.08172430098056793,
"rewards/rejected": -0.16317956149578094,
"step": 180,
"use_label": 3144.675048828125
},
{
"epoch": 0.2,
"grad_norm": 0.8515625,
"learning_rate": 4.853715742087947e-06,
"logits/chosen": -2.1533255577087402,
"logits/rejected": -2.104222297668457,
"logps/chosen": -87.3572998046875,
"logps/rejected": -91.95249938964844,
"loss": 0.6862,
"pred_label": 181.39999389648438,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.13474301993846893,
"rewards/margins": 0.08988693356513977,
"rewards/rejected": -0.2246299535036087,
"step": 190,
"use_label": 3284.60009765625
},
{
"epoch": 0.21,
"grad_norm": 0.96875,
"learning_rate": 4.821331504159906e-06,
"logits/chosen": -2.137516736984253,
"logits/rejected": -2.13090443611145,
"logps/chosen": -94.10081481933594,
"logps/rejected": -95.15316009521484,
"loss": 0.6818,
"pred_label": 205.875,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.14046669006347656,
"rewards/margins": 0.07937734574079514,
"rewards/rejected": -0.2198440283536911,
"step": 200,
"use_label": 3420.125
},
{
"epoch": 0.21,
"eval_logits/chosen": -2.021465301513672,
"eval_logits/rejected": -1.9937611818313599,
"eval_logps/chosen": -82.4782485961914,
"eval_logps/rejected": -99.20675659179688,
"eval_loss": 0.6860649585723877,
"eval_pred_label": 258.79364013671875,
"eval_rewards/accuracies": 0.3373015820980072,
"eval_rewards/chosen": -0.13577698171138763,
"eval_rewards/margins": 0.10230996459722519,
"eval_rewards/rejected": -0.23808695375919342,
"eval_runtime": 245.9338,
"eval_samples_per_second": 8.132,
"eval_steps_per_second": 0.256,
"eval_use_label": 3701.206298828125,
"step": 200
},
{
"epoch": 0.22,
"grad_norm": 1.1484375,
"learning_rate": 4.7858426910973435e-06,
"logits/chosen": -2.1574149131774902,
"logits/rejected": -2.1307334899902344,
"logps/chosen": -77.64894104003906,
"logps/rejected": -89.26710510253906,
"loss": 0.6828,
"pred_label": 313.32501220703125,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.09638272225856781,
"rewards/margins": 0.12071452289819717,
"rewards/rejected": -0.2170972377061844,
"step": 210,
"use_label": 3976.675048828125
},
{
"epoch": 0.23,
"grad_norm": 1.40625,
"learning_rate": 4.747296766042161e-06,
"logits/chosen": -2.1187565326690674,
"logits/rejected": -2.102626323699951,
"logps/chosen": -90.67762756347656,
"logps/rejected": -96.60699462890625,
"loss": 0.6884,
"pred_label": 343.875,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.1462414264678955,
"rewards/margins": 0.12368818372488022,
"rewards/rejected": -0.2699296176433563,
"step": 220,
"use_label": 4106.125
},
{
"epoch": 0.24,
"grad_norm": 1.1484375,
"learning_rate": 4.705745280752586e-06,
"logits/chosen": -2.1437509059906006,
"logits/rejected": -2.084073781967163,
"logps/chosen": -90.86326599121094,
"logps/rejected": -96.72235870361328,
"loss": 0.6875,
"pred_label": 378.6000061035156,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.12124122679233551,
"rewards/margins": 0.11637073755264282,
"rewards/rejected": -0.23761197924613953,
"step": 230,
"use_label": 4231.39990234375
},
{
"epoch": 0.25,
"grad_norm": 0.953125,
"learning_rate": 4.661243806657256e-06,
"logits/chosen": -2.1431565284729004,
"logits/rejected": -2.1365227699279785,
"logps/chosen": -71.16796875,
"logps/rejected": -91.01861572265625,
"loss": 0.6846,
"pred_label": 403.125,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.07454425096511841,
"rewards/margins": 0.09627760201692581,
"rewards/rejected": -0.17082183063030243,
"step": 240,
"use_label": 4366.875
},
{
"epoch": 0.26,
"grad_norm": 0.890625,
"learning_rate": 4.613851860533367e-06,
"logits/chosen": -2.1595332622528076,
"logits/rejected": -2.183953285217285,
"logps/chosen": -71.86934661865234,
"logps/rejected": -80.0597152709961,
"loss": 0.6844,
"pred_label": 422.25,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.06741674989461899,
"rewards/margins": 0.08548234403133392,
"rewards/rejected": -0.1528991013765335,
"step": 250,
"use_label": 4507.75
},
{
"epoch": 0.27,
"grad_norm": 1.0390625,
"learning_rate": 4.563632824908252e-06,
"logits/chosen": -2.1189560890197754,
"logits/rejected": -2.071620464324951,
"logps/chosen": -77.1129150390625,
"logps/rejected": -101.45845031738281,
"loss": 0.6837,
"pred_label": 445.79998779296875,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.16171860694885254,
"rewards/margins": 0.11343212425708771,
"rewards/rejected": -0.27515071630477905,
"step": 260,
"use_label": 4644.2001953125
},
{
"epoch": 0.28,
"grad_norm": 1.0703125,
"learning_rate": 4.510653863290871e-06,
"logits/chosen": -2.1512458324432373,
"logits/rejected": -2.164412021636963,
"logps/chosen": -91.74055480957031,
"logps/rejected": -95.13731384277344,
"loss": 0.6883,
"pred_label": 470.04998779296875,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.16311386227607727,
"rewards/margins": 0.0933571308851242,
"rewards/rejected": -0.2564709782600403,
"step": 270,
"use_label": 4779.9501953125
},
{
"epoch": 0.29,
"grad_norm": 0.8828125,
"learning_rate": 4.454985830346574e-06,
"logits/chosen": -2.0734293460845947,
"logits/rejected": -2.1033730506896973,
"logps/chosen": -76.7903823852539,
"logps/rejected": -86.99803161621094,
"loss": 0.6858,
"pred_label": 494.9750061035156,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.15558014810085297,
"rewards/margins": 0.050300367176532745,
"rewards/rejected": -0.2058805227279663,
"step": 280,
"use_label": 4915.02490234375
},
{
"epoch": 0.3,
"grad_norm": 1.3125,
"learning_rate": 4.396703177135262e-06,
"logits/chosen": -1.9870249032974243,
"logits/rejected": -1.956434965133667,
"logps/chosen": -89.98160552978516,
"logps/rejected": -99.75212097167969,
"loss": 0.6905,
"pred_label": 527.0499877929688,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.13706301152706146,
"rewards/margins": 0.16557420790195465,
"rewards/rejected": -0.3026372492313385,
"step": 290,
"use_label": 5042.9501953125
},
{
"epoch": 0.31,
"grad_norm": 1.6015625,
"learning_rate": 4.335883851539693e-06,
"logits/chosen": -1.9497883319854736,
"logits/rejected": -1.964604377746582,
"logps/chosen": -68.64933013916016,
"logps/rejected": -91.48945617675781,
"loss": 0.6848,
"pred_label": 561.8499755859375,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.14721202850341797,
"rewards/margins": 0.14547064900398254,
"rewards/rejected": -0.2926826477050781,
"step": 300,
"use_label": 5168.14990234375
},
{
"epoch": 0.31,
"eval_logits/chosen": -1.9156862497329712,
"eval_logits/rejected": -1.8827954530715942,
"eval_logps/chosen": -89.57630920410156,
"eval_logps/rejected": -109.2765884399414,
"eval_loss": 0.6877307295799255,
"eval_pred_label": 626.1270141601562,
"eval_rewards/accuracies": 0.341269850730896,
"eval_rewards/chosen": -0.20675767958164215,
"eval_rewards/margins": 0.13202756643295288,
"eval_rewards/rejected": -0.33878523111343384,
"eval_runtime": 246.2269,
"eval_samples_per_second": 8.123,
"eval_steps_per_second": 0.256,
"eval_use_label": 5437.873046875,
"step": 300
},
{
"epoch": 0.32,
"grad_norm": 1.5,
"learning_rate": 4.2726091940171055e-06,
"logits/chosen": -2.043640613555908,
"logits/rejected": -2.01674222946167,
"logps/chosen": -72.24534606933594,
"logps/rejected": -89.407470703125,
"loss": 0.6865,
"pred_label": 688.9500122070312,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.23255303502082825,
"rewards/margins": 0.06651856750249863,
"rewards/rejected": -0.29907160997390747,
"step": 310,
"use_label": 5705.0498046875
},
{
"epoch": 0.33,
"grad_norm": 1.1796875,
"learning_rate": 4.206963828813555e-06,
"logits/chosen": -1.9597671031951904,
"logits/rejected": -1.9893718957901,
"logps/chosen": -94.37977600097656,
"logps/rejected": -118.25643157958984,
"loss": 0.6871,
"pred_label": 724.375,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.20438706874847412,
"rewards/margins": 0.13566336035728455,
"rewards/rejected": -0.34005045890808105,
"step": 320,
"use_label": 5829.625
},
{
"epoch": 0.35,
"grad_norm": 0.95703125,
"learning_rate": 4.139035550786495e-06,
"logits/chosen": -1.989506483078003,
"logits/rejected": -1.9580066204071045,
"logps/chosen": -73.50363159179688,
"logps/rejected": -87.75289154052734,
"loss": 0.683,
"pred_label": 754.4500122070312,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.1003209576010704,
"rewards/margins": 0.13466720283031464,
"rewards/rejected": -0.23498816788196564,
"step": 330,
"use_label": 5959.5498046875
},
{
"epoch": 0.36,
"grad_norm": 1.0234375,
"learning_rate": 4.068915207986931e-06,
"logits/chosen": -2.0428695678710938,
"logits/rejected": -2.016120195388794,
"logps/chosen": -74.91081237792969,
"logps/rejected": -93.89201354980469,
"loss": 0.6894,
"pred_label": 786.4749755859375,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.11903776973485947,
"rewards/margins": 0.11223740875720978,
"rewards/rejected": -0.23127520084381104,
"step": 340,
"use_label": 6087.52490234375
},
{
"epoch": 0.37,
"grad_norm": 0.984375,
"learning_rate": 3.996696580158211e-06,
"logits/chosen": -2.0441341400146484,
"logits/rejected": -2.0229620933532715,
"logps/chosen": -73.9575424194336,
"logps/rejected": -86.34129333496094,
"loss": 0.6869,
"pred_label": 817.5250244140625,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.133123978972435,
"rewards/margins": 0.08419892936944962,
"rewards/rejected": -0.2173229157924652,
"step": 350,
"use_label": 6216.47509765625
},
{
"epoch": 0.38,
"grad_norm": 1.546875,
"learning_rate": 3.922476253313921e-06,
"logits/chosen": -2.0575146675109863,
"logits/rejected": -2.054591417312622,
"logps/chosen": -82.88232421875,
"logps/rejected": -90.05668640136719,
"loss": 0.6863,
"pred_label": 848.6500244140625,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.13817565143108368,
"rewards/margins": 0.11208128929138184,
"rewards/rejected": -0.2502569556236267,
"step": 360,
"use_label": 6345.35009765625
},
{
"epoch": 0.39,
"grad_norm": 0.75,
"learning_rate": 3.846353490562664e-06,
"logits/chosen": -2.076312780380249,
"logits/rejected": -1.9995708465576172,
"logps/chosen": -85.83981323242188,
"logps/rejected": -95.1656723022461,
"loss": 0.6844,
"pred_label": 880.4249877929688,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.11745607852935791,
"rewards/margins": 0.14055705070495605,
"rewards/rejected": -0.2580130994319916,
"step": 370,
"use_label": 6473.5751953125
},
{
"epoch": 0.4,
"grad_norm": 0.96484375,
"learning_rate": 3.768430099352445e-06,
"logits/chosen": -2.0079166889190674,
"logits/rejected": -1.986297845840454,
"logps/chosen": -76.30638122558594,
"logps/rejected": -93.93800354003906,
"loss": 0.6924,
"pred_label": 912.5999755859375,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.1675274670124054,
"rewards/margins": 0.08305275440216064,
"rewards/rejected": -0.25058022141456604,
"step": 380,
"use_label": 6601.39990234375
},
{
"epoch": 0.41,
"grad_norm": 0.97265625,
"learning_rate": 3.6888102953122307e-06,
"logits/chosen": -1.9291635751724243,
"logits/rejected": -1.914608359336853,
"logps/chosen": -101.44157409667969,
"logps/rejected": -96.10136413574219,
"loss": 0.6878,
"pred_label": 952.8250122070312,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.1657881736755371,
"rewards/margins": 0.12364902347326279,
"rewards/rejected": -0.2894372344017029,
"step": 390,
"use_label": 6721.1748046875
},
{
"epoch": 0.42,
"grad_norm": 1.296875,
"learning_rate": 3.607600562872785e-06,
"logits/chosen": -1.8988447189331055,
"logits/rejected": -1.8926557302474976,
"logps/chosen": -87.97608947753906,
"logps/rejected": -108.15446472167969,
"loss": 0.6857,
"pred_label": 987.5999755859375,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.16945099830627441,
"rewards/margins": 0.11657001823186874,
"rewards/rejected": -0.28602102398872375,
"step": 400,
"use_label": 6846.39990234375
},
{
"epoch": 0.42,
"eval_logits/chosen": -1.4529144763946533,
"eval_logits/rejected": -1.4031411409378052,
"eval_logps/chosen": -86.92367553710938,
"eval_logps/rejected": -108.39134979248047,
"eval_loss": 0.6884719133377075,
"eval_pred_label": 1055.5555419921875,
"eval_rewards/accuracies": 0.3531745970249176,
"eval_rewards/chosen": -0.18023118376731873,
"eval_rewards/margins": 0.14970164000988007,
"eval_rewards/rejected": -0.32993283867836,
"eval_runtime": 246.35,
"eval_samples_per_second": 8.119,
"eval_steps_per_second": 0.256,
"eval_use_label": 7112.4443359375,
"step": 400
},
{
"epoch": 0.43,
"grad_norm": 1.28125,
"learning_rate": 3.5249095128531863e-06,
"logits/chosen": -1.289879560470581,
"logits/rejected": -1.4085474014282227,
"logps/chosen": -85.75054168701172,
"logps/rejected": -96.24283599853516,
"loss": 0.6874,
"pred_label": 1135.699951171875,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.21242520213127136,
"rewards/margins": 0.17107079923152924,
"rewards/rejected": -0.3834960162639618,
"step": 410,
"use_label": 7362.2998046875
},
{
"epoch": 0.44,
"grad_norm": 0.97265625,
"learning_rate": 3.4408477372034743e-06,
"logits/chosen": -1.2336995601654053,
"logits/rejected": -1.1623611450195312,
"logps/chosen": -97.20266723632812,
"logps/rejected": -117.6893081665039,
"loss": 0.6882,
"pred_label": 1171.425048828125,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.3355943560600281,
"rewards/margins": 0.16045086085796356,
"rewards/rejected": -0.49604520201683044,
"step": 420,
"use_label": 7486.5751953125
},
{
"epoch": 0.45,
"grad_norm": 1.1484375,
"learning_rate": 3.355527661097728e-06,
"logits/chosen": -1.3129976987838745,
"logits/rejected": -1.2275488376617432,
"logps/chosen": -106.88911437988281,
"logps/rejected": -112.3751449584961,
"loss": 0.6918,
"pred_label": 1207.9749755859375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.3042059540748596,
"rewards/margins": 0.13597823679447174,
"rewards/rejected": -0.44018417596817017,
"step": 430,
"use_label": 7610.02490234375
},
{
"epoch": 0.46,
"grad_norm": 1.5625,
"learning_rate": 3.269063392575352e-06,
"logits/chosen": -1.3159044981002808,
"logits/rejected": -1.413769006729126,
"logps/chosen": -90.12797546386719,
"logps/rejected": -101.85379028320312,
"loss": 0.6858,
"pred_label": 1242.5,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.22682049870491028,
"rewards/margins": 0.159098818898201,
"rewards/rejected": -0.3859192728996277,
"step": 440,
"use_label": 7735.5
},
{
"epoch": 0.47,
"grad_norm": 1.375,
"learning_rate": 3.181570569931697e-06,
"logits/chosen": -1.4389588832855225,
"logits/rejected": -1.5265202522277832,
"logps/chosen": -96.37947845458984,
"logps/rejected": -113.1718521118164,
"loss": 0.6951,
"pred_label": 1281.3499755859375,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.2355901300907135,
"rewards/margins": 0.13590970635414124,
"rewards/rejected": -0.37149983644485474,
"step": 450,
"use_label": 7856.64990234375
},
{
"epoch": 0.48,
"grad_norm": 1.015625,
"learning_rate": 3.09316620706208e-06,
"logits/chosen": -1.2455997467041016,
"logits/rejected": -1.1902601718902588,
"logps/chosen": -72.07853698730469,
"logps/rejected": -84.86478424072266,
"loss": 0.6842,
"pred_label": 1311.824951171875,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.1508016437292099,
"rewards/margins": 0.1797787994146347,
"rewards/rejected": -0.330580472946167,
"step": 460,
"use_label": 7986.1748046875
},
{
"epoch": 0.49,
"grad_norm": 1.1015625,
"learning_rate": 3.0039685369660785e-06,
"logits/chosen": -1.175449252128601,
"logits/rejected": -1.0759943723678589,
"logps/chosen": -88.91249084472656,
"logps/rejected": -110.02799987792969,
"loss": 0.6873,
"pred_label": 1345.1500244140625,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.22000393271446228,
"rewards/margins": 0.1964809000492096,
"rewards/rejected": -0.4164848327636719,
"step": 470,
"use_label": 8112.85009765625
},
{
"epoch": 0.5,
"grad_norm": 1.0859375,
"learning_rate": 2.91409685362137e-06,
"logits/chosen": -1.0014227628707886,
"logits/rejected": -1.0880533456802368,
"logps/chosen": -99.41879272460938,
"logps/rejected": -120.02769470214844,
"loss": 0.6868,
"pred_label": 1391.25,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.24276605248451233,
"rewards/margins": 0.17868337035179138,
"rewards/rejected": -0.4214494228363037,
"step": 480,
"use_label": 8226.75
},
{
"epoch": 0.51,
"grad_norm": 1.4375,
"learning_rate": 2.8236713524386085e-06,
"logits/chosen": -1.0729541778564453,
"logits/rejected": -0.9298813939094543,
"logps/chosen": -88.73147583007812,
"logps/rejected": -94.53245544433594,
"loss": 0.6921,
"pred_label": 1428.9000244140625,
"rewards/accuracies": 0.26875001192092896,
"rewards/chosen": -0.22107498347759247,
"rewards/margins": 0.12524999678134918,
"rewards/rejected": -0.34632498025894165,
"step": 490,
"use_label": 8349.099609375
},
{
"epoch": 0.52,
"grad_norm": 1.421875,
"learning_rate": 2.7328129695107205e-06,
"logits/chosen": -0.8902079463005066,
"logits/rejected": -1.065393090248108,
"logps/chosen": -113.58573150634766,
"logps/rejected": -131.9083709716797,
"loss": 0.6894,
"pred_label": 1462.4000244140625,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.37447452545166016,
"rewards/margins": 0.17800332605838776,
"rewards/rejected": -0.5524778962135315,
"step": 500,
"use_label": 8475.599609375
},
{
"epoch": 0.52,
"eval_logits/chosen": -0.6888664960861206,
"eval_logits/rejected": -0.5997034311294556,
"eval_logps/chosen": -97.52025604248047,
"eval_logps/rejected": -120.9921646118164,
"eval_loss": 0.6891720294952393,
"eval_pred_label": 1530.5714111328125,
"eval_rewards/accuracies": 0.3551587164402008,
"eval_rewards/chosen": -0.28619715571403503,
"eval_rewards/margins": 0.1697438359260559,
"eval_rewards/rejected": -0.45594096183776855,
"eval_runtime": 246.2759,
"eval_samples_per_second": 8.121,
"eval_steps_per_second": 0.256,
"eval_use_label": 8741.4287109375,
"step": 500
},
{
"epoch": 0.53,
"grad_norm": 1.0078125,
"learning_rate": 2.641643219871597e-06,
"logits/chosen": -0.7708507776260376,
"logits/rejected": -0.882653534412384,
"logps/chosen": -90.50456237792969,
"logps/rejected": -116.84162902832031,
"loss": 0.686,
"pred_label": 1610.5999755859375,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.2625977396965027,
"rewards/margins": 0.20036396384239197,
"rewards/rejected": -0.4629616141319275,
"step": 510,
"use_label": 8991.400390625
},
{
"epoch": 0.54,
"grad_norm": 1.4765625,
"learning_rate": 2.5502840349805074e-06,
"logits/chosen": -0.8800374865531921,
"logits/rejected": -1.038163185119629,
"logps/chosen": -100.99266052246094,
"logps/rejected": -116.75798034667969,
"loss": 0.6895,
"pred_label": 1653.0,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.2859944701194763,
"rewards/margins": 0.15662841498851776,
"rewards/rejected": -0.4426229000091553,
"step": 520,
"use_label": 9109.0
},
{
"epoch": 0.55,
"grad_norm": 1.3671875,
"learning_rate": 2.4588575996495797e-06,
"logits/chosen": -0.8304817080497742,
"logits/rejected": -0.7847825288772583,
"logps/chosen": -105.92545318603516,
"logps/rejected": -117.15931701660156,
"loss": 0.6895,
"pred_label": 1692.175048828125,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.316447913646698,
"rewards/margins": 0.17969803512096405,
"rewards/rejected": -0.49614596366882324,
"step": 530,
"use_label": 9229.8251953125
},
{
"epoch": 0.57,
"grad_norm": 2.03125,
"learning_rate": 2.367486188632446e-06,
"logits/chosen": -0.67156982421875,
"logits/rejected": -0.8070074319839478,
"logps/chosen": -112.666748046875,
"logps/rejected": -131.92593383789062,
"loss": 0.6896,
"pred_label": 1734.375,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.35928016901016235,
"rewards/margins": 0.22706659138202667,
"rewards/rejected": -0.5863467454910278,
"step": 540,
"use_label": 9347.625
},
{
"epoch": 0.58,
"grad_norm": 1.796875,
"learning_rate": 2.276292003092593e-06,
"logits/chosen": -0.7944391369819641,
"logits/rejected": -0.7596977353096008,
"logps/chosen": -107.38740539550781,
"logps/rejected": -111.28292083740234,
"loss": 0.6887,
"pred_label": 1775.7249755859375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.3932684063911438,
"rewards/margins": 0.12325477600097656,
"rewards/rejected": -0.5165232419967651,
"step": 550,
"use_label": 9466.275390625
},
{
"epoch": 0.59,
"grad_norm": 1.3515625,
"learning_rate": 2.1853970071701415e-06,
"logits/chosen": -0.7152852416038513,
"logits/rejected": -0.7174454927444458,
"logps/chosen": -104.6649398803711,
"logps/rejected": -117.61528015136719,
"loss": 0.6901,
"pred_label": 1814.375,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.3510952889919281,
"rewards/margins": 0.15508435666561127,
"rewards/rejected": -0.5061796307563782,
"step": 560,
"use_label": 9587.625
},
{
"epoch": 0.6,
"grad_norm": 2.125,
"learning_rate": 2.0949227648656194e-06,
"logits/chosen": -0.925454318523407,
"logits/rejected": -0.849765956401825,
"logps/chosen": -100.53346252441406,
"logps/rejected": -131.70309448242188,
"loss": 0.6872,
"pred_label": 1852.2249755859375,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.3393338620662689,
"rewards/margins": 0.23398590087890625,
"rewards/rejected": -0.5733197927474976,
"step": 570,
"use_label": 9709.775390625
},
{
"epoch": 0.61,
"grad_norm": 1.15625,
"learning_rate": 2.00499027745888e-06,
"logits/chosen": -0.7680953145027161,
"logits/rejected": -0.8566532135009766,
"logps/chosen": -111.98583984375,
"logps/rejected": -131.1743927001953,
"loss": 0.6879,
"pred_label": 1893.7750244140625,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.37074294686317444,
"rewards/margins": 0.1566895693540573,
"rewards/rejected": -0.5274325013160706,
"step": 580,
"use_label": 9828.224609375
},
{
"epoch": 0.62,
"grad_norm": 1.1171875,
"learning_rate": 1.915719821680624e-06,
"logits/chosen": -0.8080962300300598,
"logits/rejected": -0.7905328869819641,
"logps/chosen": -125.2184066772461,
"logps/rejected": -148.79432678222656,
"loss": 0.6891,
"pred_label": 1939.25,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.4552985727787018,
"rewards/margins": 0.22290782630443573,
"rewards/rejected": -0.6782063245773315,
"step": 590,
"use_label": 9942.75
},
{
"epoch": 0.63,
"grad_norm": 1.9609375,
"learning_rate": 1.8272307888529276e-06,
"logits/chosen": -0.5244548320770264,
"logits/rejected": -0.7590290904045105,
"logps/chosen": -122.6807632446289,
"logps/rejected": -162.36203002929688,
"loss": 0.6881,
"pred_label": 1992.0,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.48354387283325195,
"rewards/margins": 0.23392179608345032,
"rewards/rejected": -0.7174656391143799,
"step": 600,
"use_label": 10050.0
},
{
"epoch": 0.63,
"eval_logits/chosen": -0.35794487595558167,
"eval_logits/rejected": -0.2547617554664612,
"eval_logps/chosen": -107.16178131103516,
"eval_logps/rejected": -135.9844512939453,
"eval_loss": 0.6918326616287231,
"eval_pred_label": 2082.3173828125,
"eval_rewards/accuracies": 0.3531745970249176,
"eval_rewards/chosen": -0.3826123774051666,
"eval_rewards/margins": 0.22325147688388824,
"eval_rewards/rejected": -0.6058638095855713,
"eval_runtime": 248.3104,
"eval_samples_per_second": 8.054,
"eval_steps_per_second": 0.254,
"eval_use_label": 10293.6826171875,
"step": 600
},
{
"epoch": 0.64,
"grad_norm": 1.515625,
"learning_rate": 1.739641525213929e-06,
"logits/chosen": -0.572044312953949,
"logits/rejected": -0.654716432094574,
"logps/chosen": -95.46563720703125,
"logps/rejected": -132.0639190673828,
"loss": 0.6926,
"pred_label": 2185.449951171875,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.3655874729156494,
"rewards/margins": 0.21378450095653534,
"rewards/rejected": -0.579371988773346,
"step": 610,
"use_label": 10520.5498046875
},
{
"epoch": 0.65,
"grad_norm": 1.0859375,
"learning_rate": 1.6530691736402317e-06,
"logits/chosen": -0.7425838708877563,
"logits/rejected": -0.7612688541412354,
"logps/chosen": -98.45491790771484,
"logps/rejected": -139.22779846191406,
"loss": 0.6874,
"pred_label": 2228.10009765625,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.3674684762954712,
"rewards/margins": 0.22383132576942444,
"rewards/rejected": -0.591299831867218,
"step": 620,
"use_label": 10637.900390625
},
{
"epoch": 0.66,
"grad_norm": 1.34375,
"learning_rate": 1.5676295169786864e-06,
"logits/chosen": -0.5626051425933838,
"logits/rejected": -0.7373117208480835,
"logps/chosen": -109.76419830322266,
"logps/rejected": -132.89573669433594,
"loss": 0.6861,
"pred_label": 2268.074951171875,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.3673921525478363,
"rewards/margins": 0.2162620723247528,
"rewards/rejected": -0.5836542844772339,
"step": 630,
"use_label": 10757.9248046875
},
{
"epoch": 0.67,
"grad_norm": 1.2578125,
"learning_rate": 1.4834368231970922e-06,
"logits/chosen": -0.70842045545578,
"logits/rejected": -0.5356844663619995,
"logps/chosen": -115.94453430175781,
"logps/rejected": -132.53977966308594,
"loss": 0.6881,
"pred_label": 2312.199951171875,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.4425238072872162,
"rewards/margins": 0.23113970458507538,
"rewards/rejected": -0.6736636161804199,
"step": 640,
"use_label": 10873.7998046875
},
{
"epoch": 0.68,
"grad_norm": 1.5,
"learning_rate": 1.4006036925609245e-06,
"logits/chosen": -0.7530516386032104,
"logits/rejected": -0.39667490124702454,
"logps/chosen": -117.97354888916016,
"logps/rejected": -148.5204620361328,
"loss": 0.6907,
"pred_label": 2364.60009765625,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.4478411078453064,
"rewards/margins": 0.25875502824783325,
"rewards/rejected": -0.7065961956977844,
"step": 650,
"use_label": 10981.400390625
},
{
"epoch": 0.69,
"grad_norm": 1.2109375,
"learning_rate": 1.3192409070404582e-06,
"logits/chosen": -0.4164413511753082,
"logits/rejected": -0.5387105345726013,
"logps/chosen": -93.08172607421875,
"logps/rejected": -106.9631576538086,
"loss": 0.6884,
"pred_label": 2410.39990234375,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.3495523929595947,
"rewards/margins": 0.1542079746723175,
"rewards/rejected": -0.5037603378295898,
"step": 660,
"use_label": 11095.599609375
},
{
"epoch": 0.7,
"grad_norm": 1.515625,
"learning_rate": 1.2394572821496953e-06,
"logits/chosen": -0.9564473032951355,
"logits/rejected": -1.0122594833374023,
"logps/chosen": -100.20994567871094,
"logps/rejected": -121.32554626464844,
"loss": 0.6935,
"pred_label": 2446.14990234375,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.3450331687927246,
"rewards/margins": 0.19006122648715973,
"rewards/rejected": -0.5350943803787231,
"step": 670,
"use_label": 11219.849609375
},
{
"epoch": 0.71,
"grad_norm": 1.546875,
"learning_rate": 1.1613595214152713e-06,
"logits/chosen": -0.588452935218811,
"logits/rejected": -0.6323766708374023,
"logps/chosen": -125.20991516113281,
"logps/rejected": -139.94993591308594,
"loss": 0.6902,
"pred_label": 2485.10009765625,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.3915707468986511,
"rewards/margins": 0.19166378676891327,
"rewards/rejected": -0.5832345485687256,
"step": 680,
"use_label": 11340.900390625
},
{
"epoch": 0.72,
"grad_norm": 1.578125,
"learning_rate": 1.0850520736699362e-06,
"logits/chosen": -0.6506579518318176,
"logits/rejected": -0.7167869806289673,
"logps/chosen": -144.53038024902344,
"logps/rejected": -167.38192749023438,
"loss": 0.6898,
"pred_label": 2534.75,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.42825189232826233,
"rewards/margins": 0.28569427132606506,
"rewards/rejected": -0.7139460444450378,
"step": 690,
"use_label": 11451.25
},
{
"epoch": 0.73,
"grad_norm": 1.59375,
"learning_rate": 1.0106369933615043e-06,
"logits/chosen": -0.8556931614875793,
"logits/rejected": -0.6913198232650757,
"logps/chosen": -105.3968505859375,
"logps/rejected": -124.95710754394531,
"loss": 0.6913,
"pred_label": 2580.824951171875,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.39049768447875977,
"rewards/margins": 0.17418017983436584,
"rewards/rejected": -0.564677894115448,
"step": 700,
"use_label": 11565.1748046875
},
{
"epoch": 0.73,
"eval_logits/chosen": -0.3469957709312439,
"eval_logits/rejected": -0.24619349837303162,
"eval_logps/chosen": -104.32471466064453,
"eval_logps/rejected": -133.26370239257812,
"eval_loss": 0.6898515224456787,
"eval_pred_label": 2673.52392578125,
"eval_rewards/accuracies": 0.3670634925365448,
"eval_rewards/chosen": -0.35424166917800903,
"eval_rewards/margins": 0.22441466152668,
"eval_rewards/rejected": -0.5786563754081726,
"eval_runtime": 248.2749,
"eval_samples_per_second": 8.056,
"eval_steps_per_second": 0.254,
"eval_use_label": 11806.4765625,
"step": 700
},
{
"epoch": 0.74,
"grad_norm": 1.03125,
"learning_rate": 9.382138040640714e-07,
"logits/chosen": -0.6519032716751099,
"logits/rejected": -0.637380063533783,
"logps/chosen": -102.23021697998047,
"logps/rejected": -127.60137939453125,
"loss": 0.6903,
"pred_label": 2771.699951171875,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.3915974497795105,
"rewards/margins": 0.21561889350414276,
"rewards/rejected": -0.6072162985801697,
"step": 710,
"use_label": 12038.2998046875
},
{
"epoch": 0.75,
"grad_norm": 1.609375,
"learning_rate": 8.678793653740633e-07,
"logits/chosen": -0.6509895324707031,
"logits/rejected": -0.6935362815856934,
"logps/chosen": -87.30061340332031,
"logps/rejected": -114.2796630859375,
"loss": 0.6903,
"pred_label": 2811.47509765625,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.30430155992507935,
"rewards/margins": 0.18221500515937805,
"rewards/rejected": -0.486516535282135,
"step": 720,
"use_label": 12158.525390625
},
{
"epoch": 0.76,
"grad_norm": 2.21875,
"learning_rate": 7.997277433690984e-07,
"logits/chosen": -0.6035222411155701,
"logits/rejected": -0.65208500623703,
"logps/chosen": -100.17440032958984,
"logps/rejected": -119.87808990478516,
"loss": 0.6865,
"pred_label": 2850.0,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.2982019782066345,
"rewards/margins": 0.2585477828979492,
"rewards/rejected": -0.5567497611045837,
"step": 730,
"use_label": 12280.0
},
{
"epoch": 0.77,
"grad_norm": 0.80859375,
"learning_rate": 7.338500848029603e-07,
"logits/chosen": -0.4770827293395996,
"logits/rejected": -0.5081530213356018,
"logps/chosen": -94.86068725585938,
"logps/rejected": -116.67037200927734,
"loss": 0.6916,
"pred_label": 2886.125,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.34235304594039917,
"rewards/margins": 0.19017408788204193,
"rewards/rejected": -0.5325270891189575,
"step": 740,
"use_label": 12403.875
},
{
"epoch": 0.79,
"grad_norm": 1.1015625,
"learning_rate": 6.70334495204884e-07,
"logits/chosen": -0.5357509851455688,
"logits/rejected": -0.594279408454895,
"logps/chosen": -119.76139831542969,
"logps/rejected": -145.1709747314453,
"loss": 0.6905,
"pred_label": 2929.22509765625,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.4223107397556305,
"rewards/margins": 0.18705633282661438,
"rewards/rejected": -0.6093670725822449,
"step": 750,
"use_label": 12520.775390625
},
{
"epoch": 0.8,
"grad_norm": 1.1640625,
"learning_rate": 6.092659210462232e-07,
"logits/chosen": -0.6737512350082397,
"logits/rejected": -0.6523575186729431,
"logps/chosen": -86.640625,
"logps/rejected": -124.01812744140625,
"loss": 0.6899,
"pred_label": 2976.050048828125,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.32672789692878723,
"rewards/margins": 0.1930442750453949,
"rewards/rejected": -0.5197721719741821,
"step": 760,
"use_label": 12633.9501953125
},
{
"epoch": 0.81,
"grad_norm": 1.4375,
"learning_rate": 5.507260361320738e-07,
"logits/chosen": -0.6238114833831787,
"logits/rejected": -0.6686199307441711,
"logps/chosen": -127.0525131225586,
"logps/rejected": -142.44747924804688,
"loss": 0.689,
"pred_label": 3021.85009765625,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.43505221605300903,
"rewards/margins": 0.25210094451904297,
"rewards/rejected": -0.687153160572052,
"step": 770,
"use_label": 12748.150390625
},
{
"epoch": 0.82,
"grad_norm": 1.7578125,
"learning_rate": 4.947931323697983e-07,
"logits/chosen": -0.6369722485542297,
"logits/rejected": -0.7722553014755249,
"logps/chosen": -112.76126861572266,
"logps/rejected": -133.56796264648438,
"loss": 0.6915,
"pred_label": 3075.72509765625,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.3996170461177826,
"rewards/margins": 0.22261002659797668,
"rewards/rejected": -0.6222270727157593,
"step": 780,
"use_label": 12854.275390625
},
{
"epoch": 0.83,
"grad_norm": 1.421875,
"learning_rate": 4.4154201506053985e-07,
"logits/chosen": -0.5256940126419067,
"logits/rejected": -0.467402845621109,
"logps/chosen": -95.73258209228516,
"logps/rejected": -103.3360366821289,
"loss": 0.6917,
"pred_label": 3123.85009765625,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.30898317694664,
"rewards/margins": 0.2029590606689453,
"rewards/rejected": -0.5119422674179077,
"step": 790,
"use_label": 12966.150390625
},
{
"epoch": 0.84,
"grad_norm": 1.359375,
"learning_rate": 3.910439028537638e-07,
"logits/chosen": -0.6677756905555725,
"logits/rejected": -0.607046902179718,
"logps/chosen": -92.61612701416016,
"logps/rejected": -115.20296478271484,
"loss": 0.6893,
"pred_label": 3166.449951171875,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.3256850242614746,
"rewards/margins": 0.20536477863788605,
"rewards/rejected": -0.5310498476028442,
"step": 800,
"use_label": 13083.5498046875
},
{
"epoch": 0.84,
"eval_logits/chosen": -0.23666124045848846,
"eval_logits/rejected": -0.1293245106935501,
"eval_logps/chosen": -103.33552551269531,
"eval_logps/rejected": -132.24159240722656,
"eval_loss": 0.6903889179229736,
"eval_pred_label": 3252.09521484375,
"eval_rewards/accuracies": 0.363095223903656,
"eval_rewards/chosen": -0.34434974193573,
"eval_rewards/margins": 0.22408555448055267,
"eval_rewards/rejected": -0.5684353113174438,
"eval_runtime": 248.2839,
"eval_samples_per_second": 8.055,
"eval_steps_per_second": 0.254,
"eval_use_label": 13331.904296875,
"step": 800
},
{
"epoch": 0.85,
"grad_norm": 1.3828125,
"learning_rate": 3.4336633249862084e-07,
"logits/chosen": -0.6630854606628418,
"logits/rejected": -0.6445407867431641,
"logps/chosen": -108.18148040771484,
"logps/rejected": -135.99142456054688,
"loss": 0.6901,
"pred_label": 3350.35009765625,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.3832666873931885,
"rewards/margins": 0.1908622682094574,
"rewards/rejected": -0.5741289258003235,
"step": 810,
"use_label": 13563.650390625
},
{
"epoch": 0.86,
"grad_norm": 1.3359375,
"learning_rate": 2.98573068519539e-07,
"logits/chosen": -0.6042599081993103,
"logits/rejected": -0.6371781826019287,
"logps/chosen": -94.31297302246094,
"logps/rejected": -101.22802734375,
"loss": 0.689,
"pred_label": 3393.47509765625,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.3432285487651825,
"rewards/margins": 0.13310988247394562,
"rewards/rejected": -0.4763384461402893,
"step": 820,
"use_label": 13680.525390625
},
{
"epoch": 0.87,
"grad_norm": 1.484375,
"learning_rate": 2.5672401793681854e-07,
"logits/chosen": -0.5476540923118591,
"logits/rejected": -0.43125781416893005,
"logps/chosen": -86.91058349609375,
"logps/rejected": -110.5887222290039,
"loss": 0.6923,
"pred_label": 3435.074951171875,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.2886909246444702,
"rewards/margins": 0.25071993470191956,
"rewards/rejected": -0.5394108295440674,
"step": 830,
"use_label": 13798.9248046875
},
{
"epoch": 0.88,
"grad_norm": 1.9296875,
"learning_rate": 2.178751501463036e-07,
"logits/chosen": -0.5565081834793091,
"logits/rejected": -0.6612057685852051,
"logps/chosen": -89.98490142822266,
"logps/rejected": -93.48139953613281,
"loss": 0.6915,
"pred_label": 3471.35009765625,
"rewards/accuracies": 0.24375000596046448,
"rewards/chosen": -0.306854248046875,
"rewards/margins": 0.09164027869701385,
"rewards/rejected": -0.39849454164505005,
"step": 840,
"use_label": 13922.650390625
},
{
"epoch": 0.89,
"grad_norm": 1.359375,
"learning_rate": 1.820784220652766e-07,
"logits/chosen": -0.6778563261032104,
"logits/rejected": -0.73534095287323,
"logps/chosen": -120.2663345336914,
"logps/rejected": -149.02294921875,
"loss": 0.6854,
"pred_label": 3509.0,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.36049091815948486,
"rewards/margins": 0.2984590530395508,
"rewards/rejected": -0.6589499711990356,
"step": 850,
"use_label": 14045.0
},
{
"epoch": 0.9,
"grad_norm": 1.796875,
"learning_rate": 1.4938170864468636e-07,
"logits/chosen": -0.5929479002952576,
"logits/rejected": -0.48117414116859436,
"logps/chosen": -115.10990142822266,
"logps/rejected": -133.1912841796875,
"loss": 0.6892,
"pred_label": 3556.324951171875,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.33908045291900635,
"rewards/margins": 0.23609444499015808,
"rewards/rejected": -0.5751749277114868,
"step": 860,
"use_label": 14157.6748046875
},
{
"epoch": 0.91,
"grad_norm": 1.7578125,
"learning_rate": 1.1982873884064466e-07,
"logits/chosen": -0.6633087992668152,
"logits/rejected": -0.6678288578987122,
"logps/chosen": -117.92154693603516,
"logps/rejected": -145.3701171875,
"loss": 0.6893,
"pred_label": 3603.75,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.3660942316055298,
"rewards/margins": 0.2644110918045044,
"rewards/rejected": -0.6305053234100342,
"step": 870,
"use_label": 14270.25
},
{
"epoch": 0.92,
"grad_norm": 0.87890625,
"learning_rate": 9.345903713082305e-08,
"logits/chosen": -0.5895944237709045,
"logits/rejected": -0.5510295629501343,
"logps/chosen": -96.94719696044922,
"logps/rejected": -141.16554260253906,
"loss": 0.6891,
"pred_label": 3651.0,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.3419613242149353,
"rewards/margins": 0.32287630438804626,
"rewards/rejected": -0.6648377180099487,
"step": 880,
"use_label": 14383.0
},
{
"epoch": 0.93,
"grad_norm": 1.6484375,
"learning_rate": 7.030787065396866e-08,
"logits/chosen": -0.5159703493118286,
"logits/rejected": -0.5519541501998901,
"logps/chosen": -96.9026107788086,
"logps/rejected": -120.7626724243164,
"loss": 0.693,
"pred_label": 3690.675048828125,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.3307461142539978,
"rewards/margins": 0.1426464170217514,
"rewards/rejected": -0.4733925461769104,
"step": 890,
"use_label": 14503.3251953125
},
{
"epoch": 0.94,
"grad_norm": 1.9609375,
"learning_rate": 5.0406202043228604e-08,
"logits/chosen": -0.2721698582172394,
"logits/rejected": -0.407818466424942,
"logps/chosen": -104.2662582397461,
"logps/rejected": -149.70314025878906,
"loss": 0.689,
"pred_label": 3732.824951171875,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.3485477864742279,
"rewards/margins": 0.2633667290210724,
"rewards/rejected": -0.6119145154953003,
"step": 900,
"use_label": 14621.1748046875
},
{
"epoch": 0.94,
"eval_logits/chosen": -0.2437347173690796,
"eval_logits/rejected": -0.13671822845935822,
"eval_logps/chosen": -103.0300521850586,
"eval_logps/rejected": -131.91110229492188,
"eval_loss": 0.6907457709312439,
"eval_pred_label": 3821.52392578125,
"eval_rewards/accuracies": 0.363095223903656,
"eval_rewards/chosen": -0.3412950336933136,
"eval_rewards/margins": 0.22383520007133484,
"eval_rewards/rejected": -0.5651301741600037,
"eval_runtime": 248.2504,
"eval_samples_per_second": 8.056,
"eval_steps_per_second": 0.254,
"eval_use_label": 14866.4765625,
"step": 900
},
{
"epoch": 0.95,
"grad_norm": 1.171875,
"learning_rate": 3.378064801637687e-08,
"logits/chosen": -0.5370496511459351,
"logits/rejected": -0.5028234720230103,
"logps/chosen": -89.67744445800781,
"logps/rejected": -113.96895599365234,
"loss": 0.6882,
"pred_label": 3916.52490234375,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.2901899218559265,
"rewards/margins": 0.2133828103542328,
"rewards/rejected": -0.5035727024078369,
"step": 910,
"use_label": 15101.474609375
},
{
"epoch": 0.96,
"grad_norm": 1.3125,
"learning_rate": 2.0453443778310766e-08,
"logits/chosen": -0.43033066391944885,
"logits/rejected": -0.4173038899898529,
"logps/chosen": -80.09765625,
"logps/rejected": -120.93513488769531,
"loss": 0.6934,
"pred_label": 3958.0,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.26141807436943054,
"rewards/margins": 0.23344416916370392,
"rewards/rejected": -0.49486222863197327,
"step": 920,
"use_label": 15220.0
},
{
"epoch": 0.97,
"grad_norm": 2.109375,
"learning_rate": 1.0442413283435759e-08,
"logits/chosen": -0.4513850212097168,
"logits/rejected": -0.5099025964736938,
"logps/chosen": -92.44239807128906,
"logps/rejected": -119.61177062988281,
"loss": 0.6878,
"pred_label": 3998.60009765625,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.29288578033447266,
"rewards/margins": 0.20934204757213593,
"rewards/rejected": -0.502227783203125,
"step": 930,
"use_label": 15339.400390625
},
{
"epoch": 0.98,
"grad_norm": 1.25,
"learning_rate": 3.760945397705828e-09,
"logits/chosen": -0.3625331521034241,
"logits/rejected": -0.5358187556266785,
"logps/chosen": -103.41780090332031,
"logps/rejected": -130.23828125,
"loss": 0.691,
"pred_label": 4038.60009765625,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.34467238187789917,
"rewards/margins": 0.18087737262248993,
"rewards/rejected": -0.5255497694015503,
"step": 940,
"use_label": 15459.400390625
},
{
"epoch": 0.99,
"grad_norm": 1.59375,
"learning_rate": 4.1797599220405605e-10,
"logits/chosen": -0.674268901348114,
"logits/rejected": -0.7018919587135315,
"logps/chosen": -114.91938781738281,
"logps/rejected": -133.3175506591797,
"loss": 0.6895,
"pred_label": 4082.625,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.3830910325050354,
"rewards/margins": 0.1591145098209381,
"rewards/rejected": -0.5422054529190063,
"step": 950,
"use_label": 15575.375
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6880922077838039,
"train_runtime": 20023.3666,
"train_samples_per_second": 3.053,
"train_steps_per_second": 0.048
}
],
"logging_steps": 10,
"max_steps": 955,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}