zephyr-7b / trainer_state.json
jikaixuan's picture
Model save
f53b622 verified
raw
history blame
62.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.59375,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -2.1666858196258545,
"logits/rejected": -2.182244300842285,
"logps/chosen": -12.368609428405762,
"logps/rejected": -24.687644958496094,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 10.0
},
{
"epoch": 0.01,
"grad_norm": 0.6015625,
"learning_rate": 5.208333333333334e-07,
"logits/chosen": -2.2111542224884033,
"logits/rejected": -2.2718067169189453,
"logps/chosen": -57.56840133666992,
"logps/rejected": -65.20916748046875,
"loss": 0.693,
"pred_label": 0.0,
"rewards/accuracies": 0.2569444477558136,
"rewards/chosen": 0.0011389791034162045,
"rewards/margins": 0.0002508986508473754,
"rewards/rejected": 0.0008880805689841509,
"step": 10,
"use_label": 90.0
},
{
"epoch": 0.02,
"grad_norm": 0.6796875,
"learning_rate": 1.0416666666666667e-06,
"logits/chosen": -2.242893695831299,
"logits/rejected": -2.279961109161377,
"logps/chosen": -56.537681579589844,
"logps/rejected": -68.3794174194336,
"loss": 0.6924,
"pred_label": 0.0,
"rewards/accuracies": 0.22499999403953552,
"rewards/chosen": 0.006626849062740803,
"rewards/margins": 0.001654049614444375,
"rewards/rejected": 0.004972799215465784,
"step": 20,
"use_label": 242.0
},
{
"epoch": 0.03,
"grad_norm": 0.55078125,
"learning_rate": 1.5625e-06,
"logits/chosen": -2.2637219429016113,
"logits/rejected": -2.2480521202087402,
"logps/chosen": -53.993507385253906,
"logps/rejected": -67.89700317382812,
"loss": 0.6919,
"pred_label": 0.0,
"rewards/accuracies": 0.26875001192092896,
"rewards/chosen": 0.016421381384134293,
"rewards/margins": 0.002580237342044711,
"rewards/rejected": 0.013841142877936363,
"step": 30,
"use_label": 402.0
},
{
"epoch": 0.04,
"grad_norm": 0.6328125,
"learning_rate": 2.0833333333333334e-06,
"logits/chosen": -2.2831993103027344,
"logits/rejected": -2.2760486602783203,
"logps/chosen": -55.59602737426758,
"logps/rejected": -66.58573913574219,
"loss": 0.6909,
"pred_label": 0.0,
"rewards/accuracies": 0.20624999701976776,
"rewards/chosen": 0.018266689032316208,
"rewards/margins": 0.0004533957107923925,
"rewards/rejected": 0.017813291400671005,
"step": 40,
"use_label": 562.0
},
{
"epoch": 0.05,
"grad_norm": 0.6015625,
"learning_rate": 2.604166666666667e-06,
"logits/chosen": -2.344376564025879,
"logits/rejected": -2.3342297077178955,
"logps/chosen": -69.12073516845703,
"logps/rejected": -84.67558288574219,
"loss": 0.6889,
"pred_label": 0.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": 0.02673395536839962,
"rewards/margins": 0.00583356199786067,
"rewards/rejected": 0.020900394767522812,
"step": 50,
"use_label": 722.0
},
{
"epoch": 0.06,
"grad_norm": 0.72265625,
"learning_rate": 3.125e-06,
"logits/chosen": -2.3030121326446533,
"logits/rejected": -2.3094825744628906,
"logps/chosen": -82.04167175292969,
"logps/rejected": -90.7291488647461,
"loss": 0.6876,
"pred_label": 0.0,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.036534082144498825,
"rewards/margins": 0.013860121369361877,
"rewards/rejected": 0.022673960775136948,
"step": 60,
"use_label": 882.0
},
{
"epoch": 0.07,
"grad_norm": 0.7890625,
"learning_rate": 3.6458333333333333e-06,
"logits/chosen": -2.345569610595703,
"logits/rejected": -2.3263676166534424,
"logps/chosen": -77.1853256225586,
"logps/rejected": -77.63880920410156,
"loss": 0.685,
"pred_label": 0.0,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": 0.025494003668427467,
"rewards/margins": 0.016305232420563698,
"rewards/rejected": 0.009188770316541195,
"step": 70,
"use_label": 1042.0
},
{
"epoch": 0.08,
"grad_norm": 0.81640625,
"learning_rate": 4.166666666666667e-06,
"logits/chosen": -2.241882801055908,
"logits/rejected": -2.195146322250366,
"logps/chosen": -81.66094207763672,
"logps/rejected": -89.08940124511719,
"loss": 0.6805,
"pred_label": 0.0,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": 0.003909807652235031,
"rewards/margins": 0.025169039145112038,
"rewards/rejected": -0.021259231492877007,
"step": 80,
"use_label": 1202.0
},
{
"epoch": 0.09,
"grad_norm": 1.7734375,
"learning_rate": 4.6875000000000004e-06,
"logits/chosen": -2.1871695518493652,
"logits/rejected": -2.2313501834869385,
"logps/chosen": -62.76741409301758,
"logps/rejected": -81.16191101074219,
"loss": 0.6747,
"pred_label": 0.0,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.016776535660028458,
"rewards/margins": 0.048332639038562775,
"rewards/rejected": -0.06510917842388153,
"step": 90,
"use_label": 1362.0
},
{
"epoch": 0.1,
"grad_norm": 1.4375,
"learning_rate": 4.9997324926814375e-06,
"logits/chosen": -2.1414177417755127,
"logits/rejected": -2.107236623764038,
"logps/chosen": -78.60578155517578,
"logps/rejected": -81.1384506225586,
"loss": 0.6685,
"pred_label": 0.0,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.04031088575720787,
"rewards/margins": 0.052690792828798294,
"rewards/rejected": -0.09300167858600616,
"step": 100,
"use_label": 1522.0
},
{
"epoch": 0.1,
"eval_logits/chosen": -2.109715223312378,
"eval_logits/rejected": -2.0796475410461426,
"eval_logps/chosen": -71.95718383789062,
"eval_logps/rejected": -84.7625961303711,
"eval_loss": 0.6684110760688782,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.335317462682724,
"eval_rewards/chosen": -0.030566338449716568,
"eval_rewards/margins": 0.06307896971702576,
"eval_rewards/rejected": -0.09364530444145203,
"eval_runtime": 247.4954,
"eval_samples_per_second": 8.081,
"eval_steps_per_second": 0.255,
"eval_use_label": 1856.0,
"step": 100
},
{
"epoch": 0.12,
"grad_norm": 1.71875,
"learning_rate": 4.996723692767927e-06,
"logits/chosen": -2.12998104095459,
"logits/rejected": -2.1109042167663574,
"logps/chosen": -68.2921142578125,
"logps/rejected": -84.99057006835938,
"loss": 0.6713,
"pred_label": 0.07500000298023224,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.06523006409406662,
"rewards/margins": 0.0570509135723114,
"rewards/rejected": -0.12228099256753922,
"step": 110,
"use_label": 2185.925048828125
},
{
"epoch": 0.13,
"grad_norm": 1.046875,
"learning_rate": 4.9903757462135984e-06,
"logits/chosen": -2.3605504035949707,
"logits/rejected": -2.243201971054077,
"logps/chosen": -80.2857666015625,
"logps/rejected": -96.13867950439453,
"loss": 0.6667,
"pred_label": 1.875,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.062185365706682205,
"rewards/margins": 0.0815814733505249,
"rewards/rejected": -0.143766850233078,
"step": 120,
"use_label": 2344.125
},
{
"epoch": 0.14,
"grad_norm": 1.0078125,
"learning_rate": 4.980697142834315e-06,
"logits/chosen": -2.146286725997925,
"logits/rejected": -2.1618175506591797,
"logps/chosen": -67.6681137084961,
"logps/rejected": -78.9002456665039,
"loss": 0.6675,
"pred_label": 2.075000047683716,
"rewards/accuracies": 0.2750000059604645,
"rewards/chosen": -0.09194014966487885,
"rewards/margins": 0.04880703240633011,
"rewards/rejected": -0.14074717462062836,
"step": 130,
"use_label": 2503.925048828125
},
{
"epoch": 0.15,
"grad_norm": 2.28125,
"learning_rate": 4.967700826904229e-06,
"logits/chosen": -2.1254963874816895,
"logits/rejected": -2.160235643386841,
"logps/chosen": -74.917724609375,
"logps/rejected": -99.1263427734375,
"loss": 0.6684,
"pred_label": 5.050000190734863,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.15644724667072296,
"rewards/margins": 0.08642110973596573,
"rewards/rejected": -0.2428683489561081,
"step": 140,
"use_label": 2660.949951171875
},
{
"epoch": 0.16,
"grad_norm": 1.359375,
"learning_rate": 4.951404179843963e-06,
"logits/chosen": -2.1338083744049072,
"logits/rejected": -2.0785932540893555,
"logps/chosen": -57.580589294433594,
"logps/rejected": -64.5077133178711,
"loss": 0.6658,
"pred_label": 9.100000381469727,
"rewards/accuracies": 0.26875001192092896,
"rewards/chosen": -0.10085760056972504,
"rewards/margins": 0.09126537293195724,
"rewards/rejected": -0.19212298095226288,
"step": 150,
"use_label": 2816.89990234375
},
{
"epoch": 0.17,
"grad_norm": 2.15625,
"learning_rate": 4.931828996974498e-06,
"logits/chosen": -2.1478817462921143,
"logits/rejected": -2.1238207817077637,
"logps/chosen": -102.6265640258789,
"logps/rejected": -118.9216537475586,
"loss": 0.667,
"pred_label": 15.399999618530273,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.21023361384868622,
"rewards/margins": 0.16089467704296112,
"rewards/rejected": -0.37112829089164734,
"step": 160,
"use_label": 2970.60009765625
},
{
"epoch": 0.18,
"grad_norm": 2.140625,
"learning_rate": 4.909001458367867e-06,
"logits/chosen": -1.9664795398712158,
"logits/rejected": -1.9388923645019531,
"logps/chosen": -81.67234802246094,
"logps/rejected": -97.5047836303711,
"loss": 0.6635,
"pred_label": 23.475000381469727,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.17715924978256226,
"rewards/margins": 0.13409331440925598,
"rewards/rejected": -0.31125253438949585,
"step": 170,
"use_label": 3122.52490234375
},
{
"epoch": 0.19,
"grad_norm": 2.96875,
"learning_rate": 4.882952093833628e-06,
"logits/chosen": -1.896836519241333,
"logits/rejected": -1.9397751092910767,
"logps/chosen": -82.50892639160156,
"logps/rejected": -105.1452407836914,
"loss": 0.6701,
"pred_label": 29.649999618530273,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.20006974041461945,
"rewards/margins": 0.11658792197704315,
"rewards/rejected": -0.3166576623916626,
"step": 180,
"use_label": 3276.35009765625
},
{
"epoch": 0.2,
"grad_norm": 1.5859375,
"learning_rate": 4.853715742087947e-06,
"logits/chosen": -1.8957335948944092,
"logits/rejected": -1.8187646865844727,
"logps/chosen": -101.19456481933594,
"logps/rejected": -109.06144714355469,
"loss": 0.6648,
"pred_label": 35.17499923706055,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.27311572432518005,
"rewards/margins": 0.1226036325097084,
"rewards/rejected": -0.39571934938430786,
"step": 190,
"use_label": 3430.824951171875
},
{
"epoch": 0.21,
"grad_norm": 2.125,
"learning_rate": 4.821331504159906e-06,
"logits/chosen": -1.8366466760635376,
"logits/rejected": -1.8133814334869385,
"logps/chosen": -118.866943359375,
"logps/rejected": -123.262451171875,
"loss": 0.676,
"pred_label": 43.42499923706055,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.38812780380249023,
"rewards/margins": 0.1128091812133789,
"rewards/rejected": -0.5009369850158691,
"step": 200,
"use_label": 3582.574951171875
},
{
"epoch": 0.21,
"eval_logits/chosen": -1.7319464683532715,
"eval_logits/rejected": -1.6888620853424072,
"eval_logps/chosen": -106.19064331054688,
"eval_logps/rejected": -124.95625305175781,
"eval_loss": 0.6716896295547485,
"eval_pred_label": 61.55555725097656,
"eval_rewards/accuracies": 0.3214285671710968,
"eval_rewards/chosen": -0.372901052236557,
"eval_rewards/margins": 0.12268086522817612,
"eval_rewards/rejected": -0.49558189511299133,
"eval_runtime": 248.0123,
"eval_samples_per_second": 8.064,
"eval_steps_per_second": 0.254,
"eval_use_label": 3898.4443359375,
"step": 200
},
{
"epoch": 0.22,
"grad_norm": 2.515625,
"learning_rate": 4.7858426910973435e-06,
"logits/chosen": -1.9356311559677124,
"logits/rejected": -1.9080215692520142,
"logps/chosen": -93.94760131835938,
"logps/rejected": -106.8377456665039,
"loss": 0.6743,
"pred_label": 81.25,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.259369432926178,
"rewards/margins": 0.13343419134616852,
"rewards/rejected": -0.3928036093711853,
"step": 210,
"use_label": 4208.75
},
{
"epoch": 0.23,
"grad_norm": 2.34375,
"learning_rate": 4.747296766042161e-06,
"logits/chosen": -1.8437402248382568,
"logits/rejected": -1.810903549194336,
"logps/chosen": -100.77757263183594,
"logps/rejected": -112.38002014160156,
"loss": 0.6651,
"pred_label": 92.5999984741211,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.24724093079566956,
"rewards/margins": 0.1804189234972,
"rewards/rejected": -0.427659809589386,
"step": 220,
"use_label": 4357.39990234375
},
{
"epoch": 0.24,
"grad_norm": 1.828125,
"learning_rate": 4.705745280752586e-06,
"logits/chosen": -1.5612363815307617,
"logits/rejected": -1.494425654411316,
"logps/chosen": -108.3369369506836,
"logps/rejected": -121.25785064697266,
"loss": 0.6784,
"pred_label": 107.0,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.29597795009613037,
"rewards/margins": 0.18698883056640625,
"rewards/rejected": -0.482966810464859,
"step": 230,
"use_label": 4503.0
},
{
"epoch": 0.25,
"grad_norm": 3.109375,
"learning_rate": 4.661243806657256e-06,
"logits/chosen": -1.279926061630249,
"logits/rejected": -1.207486629486084,
"logps/chosen": -94.6622314453125,
"logps/rejected": -121.74755859375,
"loss": 0.68,
"pred_label": 124.05000305175781,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.30948689579963684,
"rewards/margins": 0.16862434148788452,
"rewards/rejected": -0.4781111776828766,
"step": 240,
"use_label": 4645.9501953125
},
{
"epoch": 0.26,
"grad_norm": 1.8203125,
"learning_rate": 4.613851860533367e-06,
"logits/chosen": -1.4483808279037476,
"logits/rejected": -1.535796880722046,
"logps/chosen": -88.96175384521484,
"logps/rejected": -106.26942443847656,
"loss": 0.678,
"pred_label": 140.64999389648438,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.2383408546447754,
"rewards/margins": 0.1766553670167923,
"rewards/rejected": -0.4149962067604065,
"step": 250,
"use_label": 4789.35009765625
},
{
"epoch": 0.27,
"grad_norm": 2.359375,
"learning_rate": 4.563632824908252e-06,
"logits/chosen": -1.5566436052322388,
"logits/rejected": -1.472214937210083,
"logps/chosen": -101.36164855957031,
"logps/rejected": -132.7355194091797,
"loss": 0.6768,
"pred_label": 156.85000610351562,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.404205858707428,
"rewards/margins": 0.1837155818939209,
"rewards/rejected": -0.5879215002059937,
"step": 260,
"use_label": 4933.14990234375
},
{
"epoch": 0.28,
"grad_norm": 2.265625,
"learning_rate": 4.510653863290871e-06,
"logits/chosen": -1.5190045833587646,
"logits/rejected": -1.5413776636123657,
"logps/chosen": -123.2553482055664,
"logps/rejected": -132.5965118408203,
"loss": 0.6699,
"pred_label": 174.9499969482422,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.478261798620224,
"rewards/margins": 0.1528010368347168,
"rewards/rejected": -0.6310628056526184,
"step": 270,
"use_label": 5075.0498046875
},
{
"epoch": 0.29,
"grad_norm": 1.390625,
"learning_rate": 4.454985830346574e-06,
"logits/chosen": -1.4161837100982666,
"logits/rejected": -1.461897850036621,
"logps/chosen": -97.59378051757812,
"logps/rejected": -113.92098236083984,
"loss": 0.6682,
"pred_label": 182.6750030517578,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.36361420154571533,
"rewards/margins": 0.11149580776691437,
"rewards/rejected": -0.4751099944114685,
"step": 280,
"use_label": 5227.3251953125
},
{
"epoch": 0.3,
"grad_norm": 2.0,
"learning_rate": 4.396703177135262e-06,
"logits/chosen": -1.1572140455245972,
"logits/rejected": -1.1582170724868774,
"logps/chosen": -113.84346008300781,
"logps/rejected": -136.17958068847656,
"loss": 0.6783,
"pred_label": 200.5749969482422,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.3756815791130066,
"rewards/margins": 0.2912302017211914,
"rewards/rejected": -0.666911780834198,
"step": 290,
"use_label": 5369.4248046875
},
{
"epoch": 0.31,
"grad_norm": 2.59375,
"learning_rate": 4.335883851539693e-06,
"logits/chosen": -0.7739458084106445,
"logits/rejected": -0.8222519159317017,
"logps/chosen": -96.963623046875,
"logps/rejected": -129.59027099609375,
"loss": 0.6728,
"pred_label": 223.0749969482422,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.43035492300987244,
"rewards/margins": 0.2433358132839203,
"rewards/rejected": -0.6736907958984375,
"step": 300,
"use_label": 5506.9248046875
},
{
"epoch": 0.31,
"eval_logits/chosen": -0.7414401173591614,
"eval_logits/rejected": -0.6762140393257141,
"eval_logps/chosen": -116.0198745727539,
"eval_logps/rejected": -145.98529052734375,
"eval_loss": 0.6783695220947266,
"eval_pred_label": 270.96826171875,
"eval_rewards/accuracies": 0.3373015820980072,
"eval_rewards/chosen": -0.4711931049823761,
"eval_rewards/margins": 0.23467905819416046,
"eval_rewards/rejected": -0.7058721780776978,
"eval_runtime": 248.0617,
"eval_samples_per_second": 8.063,
"eval_steps_per_second": 0.254,
"eval_use_label": 5793.03173828125,
"step": 300
},
{
"epoch": 0.32,
"grad_norm": 2.21875,
"learning_rate": 4.2726091940171055e-06,
"logits/chosen": -1.1870858669281006,
"logits/rejected": -1.1604619026184082,
"logps/chosen": -93.13883972167969,
"logps/rejected": -115.36534118652344,
"loss": 0.6683,
"pred_label": 314.92498779296875,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.4414878487586975,
"rewards/margins": 0.11716248840093613,
"rewards/rejected": -0.5586503148078918,
"step": 310,
"use_label": 6079.0751953125
},
{
"epoch": 0.33,
"grad_norm": 2.34375,
"learning_rate": 4.206963828813555e-06,
"logits/chosen": -0.7778801918029785,
"logits/rejected": -0.8481136560440063,
"logps/chosen": -115.88163757324219,
"logps/rejected": -152.60556030273438,
"loss": 0.6715,
"pred_label": 340.4750061035156,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.41940560936927795,
"rewards/margins": 0.2641361653804779,
"rewards/rejected": -0.6835418343544006,
"step": 320,
"use_label": 6213.52490234375
},
{
"epoch": 0.35,
"grad_norm": 2.8125,
"learning_rate": 4.139035550786495e-06,
"logits/chosen": -0.7590861320495605,
"logits/rejected": -0.6889998316764832,
"logps/chosen": -90.45745086669922,
"logps/rejected": -116.98609924316406,
"loss": 0.6718,
"pred_label": 364.04998779296875,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.269859254360199,
"rewards/margins": 0.2574610114097595,
"rewards/rejected": -0.5273202657699585,
"step": 330,
"use_label": 6349.9501953125
},
{
"epoch": 0.36,
"grad_norm": 2.703125,
"learning_rate": 4.068915207986931e-06,
"logits/chosen": -0.6209542155265808,
"logits/rejected": -0.47464966773986816,
"logps/chosen": -102.54121398925781,
"logps/rejected": -130.24276733398438,
"loss": 0.678,
"pred_label": 391.07501220703125,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.395341694355011,
"rewards/margins": 0.19944116473197937,
"rewards/rejected": -0.594782829284668,
"step": 340,
"use_label": 6482.9248046875
},
{
"epoch": 0.37,
"grad_norm": 1.8203125,
"learning_rate": 3.996696580158211e-06,
"logits/chosen": -0.7593547701835632,
"logits/rejected": -0.6881019473075867,
"logps/chosen": -92.08587646484375,
"logps/rejected": -110.56968688964844,
"loss": 0.6727,
"pred_label": 411.9750061035156,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.31440719962120056,
"rewards/margins": 0.14519965648651123,
"rewards/rejected": -0.4596068263053894,
"step": 350,
"use_label": 6622.02490234375
},
{
"epoch": 0.38,
"grad_norm": 1.9609375,
"learning_rate": 3.922476253313921e-06,
"logits/chosen": -0.996785044670105,
"logits/rejected": -0.9698454737663269,
"logps/chosen": -101.95857238769531,
"logps/rejected": -114.76066589355469,
"loss": 0.6783,
"pred_label": 427.95001220703125,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.32893818616867065,
"rewards/margins": 0.16835859417915344,
"rewards/rejected": -0.4972967505455017,
"step": 360,
"use_label": 6766.0498046875
},
{
"epoch": 0.39,
"grad_norm": 1.484375,
"learning_rate": 3.846353490562664e-06,
"logits/chosen": -1.0720884799957275,
"logits/rejected": -0.859793484210968,
"logps/chosen": -103.12544250488281,
"logps/rejected": -120.26700592041016,
"loss": 0.6626,
"pred_label": 443.875,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.29031243920326233,
"rewards/margins": 0.2187139242887497,
"rewards/rejected": -0.509026288986206,
"step": 370,
"use_label": 6910.125
},
{
"epoch": 0.4,
"grad_norm": 2.671875,
"learning_rate": 3.768430099352445e-06,
"logits/chosen": -0.4451161324977875,
"logits/rejected": -0.32113510370254517,
"logps/chosen": -106.78487396240234,
"logps/rejected": -132.46365356445312,
"loss": 0.6759,
"pred_label": 463.2749938964844,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.4723123610019684,
"rewards/margins": 0.1635245531797409,
"rewards/rejected": -0.6358368992805481,
"step": 380,
"use_label": 7050.72509765625
},
{
"epoch": 0.41,
"grad_norm": 2.875,
"learning_rate": 3.6888102953122307e-06,
"logits/chosen": -0.02978489175438881,
"logits/rejected": -0.03225391358137131,
"logps/chosen": -136.5101318359375,
"logps/rejected": -144.56173706054688,
"loss": 0.6827,
"pred_label": 495.82501220703125,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.516473650932312,
"rewards/margins": 0.25756725668907166,
"rewards/rejected": -0.774040937423706,
"step": 390,
"use_label": 7178.1748046875
},
{
"epoch": 0.42,
"grad_norm": 2.515625,
"learning_rate": 3.607600562872785e-06,
"logits/chosen": 0.09610392153263092,
"logits/rejected": 0.09092014282941818,
"logps/chosen": -117.39754486083984,
"logps/rejected": -152.1193084716797,
"loss": 0.6715,
"pred_label": 523.0250244140625,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.4636654853820801,
"rewards/margins": 0.2620038390159607,
"rewards/rejected": -0.725669264793396,
"step": 400,
"use_label": 7310.97509765625
},
{
"epoch": 0.42,
"eval_logits/chosen": 0.6419875621795654,
"eval_logits/rejected": 0.764842689037323,
"eval_logps/chosen": -113.52101135253906,
"eval_logps/rejected": -148.9146270751953,
"eval_loss": 0.6811794638633728,
"eval_pred_label": 572.6825561523438,
"eval_rewards/accuracies": 0.3551587164402008,
"eval_rewards/chosen": -0.44620463252067566,
"eval_rewards/margins": 0.2889607846736908,
"eval_rewards/rejected": -0.7351653575897217,
"eval_runtime": 247.9054,
"eval_samples_per_second": 8.068,
"eval_steps_per_second": 0.254,
"eval_use_label": 7595.3173828125,
"step": 400
},
{
"epoch": 0.43,
"grad_norm": 2.671875,
"learning_rate": 3.5249095128531863e-06,
"logits/chosen": 0.34008723497390747,
"logits/rejected": 0.07830000668764114,
"logps/chosen": -100.46337890625,
"logps/rejected": -124.54425048828125,
"loss": 0.6735,
"pred_label": 637.2999877929688,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.3595535457134247,
"rewards/margins": 0.30695658922195435,
"rewards/rejected": -0.6665101647377014,
"step": 410,
"use_label": 7860.7001953125
},
{
"epoch": 0.44,
"grad_norm": 2.53125,
"learning_rate": 3.4408477372034743e-06,
"logits/chosen": 0.0810169205069542,
"logits/rejected": 0.1531490534543991,
"logps/chosen": -104.22071838378906,
"logps/rejected": -134.35255432128906,
"loss": 0.6774,
"pred_label": 659.75,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.4057747721672058,
"rewards/margins": 0.256902813911438,
"rewards/rejected": -0.6626775860786438,
"step": 420,
"use_label": 7998.25
},
{
"epoch": 0.45,
"grad_norm": 2.03125,
"learning_rate": 3.355527661097728e-06,
"logits/chosen": 0.1920831948518753,
"logits/rejected": 0.327668160200119,
"logps/chosen": -127.31324768066406,
"logps/rejected": -138.50869750976562,
"loss": 0.6761,
"pred_label": 684.7249755859375,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.5084472894668579,
"rewards/margins": 0.1930726319551468,
"rewards/rejected": -0.7015198469161987,
"step": 430,
"use_label": 8133.27490234375
},
{
"epoch": 0.46,
"grad_norm": 3.46875,
"learning_rate": 3.269063392575352e-06,
"logits/chosen": 0.2943039536476135,
"logits/rejected": 0.04520421102643013,
"logps/chosen": -116.11759185791016,
"logps/rejected": -138.48878479003906,
"loss": 0.6761,
"pred_label": 708.7999877929688,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.48671650886535645,
"rewards/margins": 0.26555269956588745,
"rewards/rejected": -0.7522691488265991,
"step": 440,
"use_label": 8269.2001953125
},
{
"epoch": 0.47,
"grad_norm": 2.046875,
"learning_rate": 3.181570569931697e-06,
"logits/chosen": 0.16918572783470154,
"logits/rejected": -0.033099401742219925,
"logps/chosen": -121.46165466308594,
"logps/rejected": -148.68612670898438,
"loss": 0.6878,
"pred_label": 732.25,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.4864117503166199,
"rewards/margins": 0.24023088812828064,
"rewards/rejected": -0.7266427278518677,
"step": 450,
"use_label": 8405.75
},
{
"epoch": 0.48,
"grad_norm": 2.109375,
"learning_rate": 3.09316620706208e-06,
"logits/chosen": 0.21930424869060516,
"logits/rejected": 0.31035083532333374,
"logps/chosen": -91.38786315917969,
"logps/rejected": -110.15885925292969,
"loss": 0.6718,
"pred_label": 753.5999755859375,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.34389492869377136,
"rewards/margins": 0.23962631821632385,
"rewards/rejected": -0.5835212469100952,
"step": 460,
"use_label": 8544.400390625
},
{
"epoch": 0.49,
"grad_norm": 2.453125,
"learning_rate": 3.0039685369660785e-06,
"logits/chosen": 0.31231826543807983,
"logits/rejected": 0.4929059147834778,
"logps/chosen": -109.46476745605469,
"logps/rejected": -141.3244171142578,
"loss": 0.6786,
"pred_label": 777.9000244140625,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.4255266785621643,
"rewards/margins": 0.30392220616340637,
"rewards/rejected": -0.7294487953186035,
"step": 470,
"use_label": 8680.099609375
},
{
"epoch": 0.5,
"grad_norm": 1.6328125,
"learning_rate": 2.91409685362137e-06,
"logits/chosen": 0.5682842135429382,
"logits/rejected": 0.4352129399776459,
"logps/chosen": -135.01535034179688,
"logps/rejected": -157.18922424316406,
"loss": 0.6756,
"pred_label": 809.4000244140625,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.598731517791748,
"rewards/margins": 0.19433310627937317,
"rewards/rejected": -0.7930646538734436,
"step": 480,
"use_label": 8808.599609375
},
{
"epoch": 0.51,
"grad_norm": 1.8359375,
"learning_rate": 2.8236713524386085e-06,
"logits/chosen": 0.13114799559116364,
"logits/rejected": 0.3516528606414795,
"logps/chosen": -117.50811767578125,
"logps/rejected": -130.34207153320312,
"loss": 0.6686,
"pred_label": 831.2999877929688,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.5088413953781128,
"rewards/margins": 0.19557976722717285,
"rewards/rejected": -0.7044212222099304,
"step": 490,
"use_label": 8946.7001953125
},
{
"epoch": 0.52,
"grad_norm": 1.9296875,
"learning_rate": 2.7328129695107205e-06,
"logits/chosen": 0.332313597202301,
"logits/rejected": 0.04164884611964226,
"logps/chosen": -148.10968017578125,
"logps/rejected": -173.51394653320312,
"loss": 0.6744,
"pred_label": 849.9749755859375,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.7197140455245972,
"rewards/margins": 0.24881935119628906,
"rewards/rejected": -0.9685333967208862,
"step": 500,
"use_label": 9088.025390625
},
{
"epoch": 0.52,
"eval_logits/chosen": 0.6149206757545471,
"eval_logits/rejected": 0.7128078937530518,
"eval_logps/chosen": -120.11334228515625,
"eval_logps/rejected": -151.15725708007812,
"eval_loss": 0.672174334526062,
"eval_pred_label": 893.8412475585938,
"eval_rewards/accuracies": 0.341269850730896,
"eval_rewards/chosen": -0.5121279954910278,
"eval_rewards/margins": 0.24546381831169128,
"eval_rewards/rejected": -0.7575918436050415,
"eval_runtime": 247.8447,
"eval_samples_per_second": 8.07,
"eval_steps_per_second": 0.254,
"eval_use_label": 9378.1591796875,
"step": 500
},
{
"epoch": 0.53,
"grad_norm": 1.7578125,
"learning_rate": 2.641643219871597e-06,
"logits/chosen": 0.4815472662448883,
"logits/rejected": 0.2771294116973877,
"logps/chosen": -110.22953033447266,
"logps/rejected": -142.9767608642578,
"loss": 0.6765,
"pred_label": 940.7249755859375,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.4598473608493805,
"rewards/margins": 0.2644655704498291,
"rewards/rejected": -0.724312961101532,
"step": 510,
"use_label": 9661.275390625
},
{
"epoch": 0.54,
"grad_norm": 2.890625,
"learning_rate": 2.5502840349805074e-06,
"logits/chosen": 0.290465772151947,
"logits/rejected": 0.05848363786935806,
"logps/chosen": -115.6847915649414,
"logps/rejected": -137.01820373535156,
"loss": 0.6804,
"pred_label": 966.2000122070312,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.4329158365726471,
"rewards/margins": 0.21230947971343994,
"rewards/rejected": -0.6452253460884094,
"step": 520,
"use_label": 9795.7998046875
},
{
"epoch": 0.55,
"grad_norm": 2.015625,
"learning_rate": 2.4588575996495797e-06,
"logits/chosen": 0.5015053153038025,
"logits/rejected": 0.544513463973999,
"logps/chosen": -124.15202331542969,
"logps/rejected": -145.21820068359375,
"loss": 0.6847,
"pred_label": 992.7999877929688,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.49871382117271423,
"rewards/margins": 0.27802106738090515,
"rewards/rejected": -0.7767347693443298,
"step": 530,
"use_label": 9929.2001953125
},
{
"epoch": 0.57,
"grad_norm": 3.796875,
"learning_rate": 2.367486188632446e-06,
"logits/chosen": 0.7326034903526306,
"logits/rejected": 0.5614863634109497,
"logps/chosen": -132.87228393554688,
"logps/rejected": -159.64352416992188,
"loss": 0.6731,
"pred_label": 1025.175048828125,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.5613355040550232,
"rewards/margins": 0.30218708515167236,
"rewards/rejected": -0.8635226488113403,
"step": 540,
"use_label": 10056.8251953125
},
{
"epoch": 0.58,
"grad_norm": 2.4375,
"learning_rate": 2.276292003092593e-06,
"logits/chosen": 0.6115967631340027,
"logits/rejected": 0.6694309711456299,
"logps/chosen": -123.36216735839844,
"logps/rejected": -129.92201232910156,
"loss": 0.684,
"pred_label": 1044.2249755859375,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.553015947341919,
"rewards/margins": 0.14989802241325378,
"rewards/rejected": -0.7029139995574951,
"step": 550,
"use_label": 10197.775390625
},
{
"epoch": 0.59,
"grad_norm": 2.171875,
"learning_rate": 2.1853970071701415e-06,
"logits/chosen": 0.649623453617096,
"logits/rejected": 0.5992484092712402,
"logps/chosen": -117.4646987915039,
"logps/rejected": -130.9988555908203,
"loss": 0.6748,
"pred_label": 1060.5999755859375,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.47909289598464966,
"rewards/margins": 0.1609223484992981,
"rewards/rejected": -0.6400152444839478,
"step": 560,
"use_label": 10341.400390625
},
{
"epoch": 0.6,
"grad_norm": 2.5,
"learning_rate": 2.0949227648656194e-06,
"logits/chosen": 0.1570337414741516,
"logits/rejected": 0.2956157624721527,
"logps/chosen": -116.41545104980469,
"logps/rejected": -151.84051513671875,
"loss": 0.6765,
"pred_label": 1081.375,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.49815383553504944,
"rewards/margins": 0.2765403389930725,
"rewards/rejected": -0.7746941447257996,
"step": 570,
"use_label": 10480.625
},
{
"epoch": 0.61,
"grad_norm": 2.09375,
"learning_rate": 2.00499027745888e-06,
"logits/chosen": 0.3431427478790283,
"logits/rejected": 0.18610627949237823,
"logps/chosen": -128.4036865234375,
"logps/rejected": -150.36404418945312,
"loss": 0.677,
"pred_label": 1101.3499755859375,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.5349212288856506,
"rewards/margins": 0.18440793454647064,
"rewards/rejected": -0.7193291783332825,
"step": 580,
"use_label": 10620.650390625
},
{
"epoch": 0.62,
"grad_norm": 2.21875,
"learning_rate": 1.915719821680624e-06,
"logits/chosen": 0.18862374126911163,
"logits/rejected": 0.19857950508594513,
"logps/chosen": -134.26577758789062,
"logps/rejected": -165.70481872558594,
"loss": 0.6624,
"pred_label": 1121.5250244140625,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.5457721948623657,
"rewards/margins": 0.30153924226760864,
"rewards/rejected": -0.8473113775253296,
"step": 590,
"use_label": 10760.474609375
},
{
"epoch": 0.63,
"grad_norm": 3.125,
"learning_rate": 1.8272307888529276e-06,
"logits/chosen": 0.5807100534439087,
"logits/rejected": 0.25763237476348877,
"logps/chosen": -133.36561584472656,
"logps/rejected": -180.56822204589844,
"loss": 0.6784,
"pred_label": 1148.5999755859375,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.5903924703598022,
"rewards/margins": 0.3091353178024292,
"rewards/rejected": -0.8995277285575867,
"step": 600,
"use_label": 10893.400390625
},
{
"epoch": 0.63,
"eval_logits/chosen": 0.8859585523605347,
"eval_logits/rejected": 0.9939000606536865,
"eval_logps/chosen": -119.97545623779297,
"eval_logps/rejected": -156.7531280517578,
"eval_loss": 0.6791760325431824,
"eval_pred_label": 1206.1904296875,
"eval_rewards/accuracies": 0.3511904776096344,
"eval_rewards/chosen": -0.5107490420341492,
"eval_rewards/margins": 0.30280154943466187,
"eval_rewards/rejected": -0.8135506510734558,
"eval_runtime": 247.9094,
"eval_samples_per_second": 8.067,
"eval_steps_per_second": 0.254,
"eval_use_label": 11169.8095703125,
"step": 600
},
{
"epoch": 0.64,
"grad_norm": 3.234375,
"learning_rate": 1.739641525213929e-06,
"logits/chosen": 0.6684261560440063,
"logits/rejected": 0.5376627445220947,
"logps/chosen": -112.9523696899414,
"logps/rejected": -154.13601684570312,
"loss": 0.6711,
"pred_label": 1272.925048828125,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.5404548645019531,
"rewards/margins": 0.259638249874115,
"rewards/rejected": -0.8000930547714233,
"step": 610,
"use_label": 11433.0751953125
},
{
"epoch": 0.65,
"grad_norm": 2.296875,
"learning_rate": 1.6530691736402317e-06,
"logits/chosen": 0.6429753303527832,
"logits/rejected": 0.5887765288352966,
"logps/chosen": -125.70455169677734,
"logps/rejected": -175.96624755859375,
"loss": 0.6734,
"pred_label": 1297.300048828125,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.6399649381637573,
"rewards/margins": 0.3187193274497986,
"rewards/rejected": -0.9586843252182007,
"step": 620,
"use_label": 11568.7001953125
},
{
"epoch": 0.66,
"grad_norm": 2.046875,
"learning_rate": 1.5676295169786864e-06,
"logits/chosen": 0.8863061666488647,
"logits/rejected": 0.5724608302116394,
"logps/chosen": -149.8594970703125,
"logps/rejected": -178.8180694580078,
"loss": 0.6716,
"pred_label": 1321.8499755859375,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.7683452367782593,
"rewards/margins": 0.2745322585105896,
"rewards/rejected": -1.042877435684204,
"step": 630,
"use_label": 11704.150390625
},
{
"epoch": 0.67,
"grad_norm": 2.453125,
"learning_rate": 1.4834368231970922e-06,
"logits/chosen": 0.6763439178466797,
"logits/rejected": 0.8844535946846008,
"logps/chosen": -157.50503540039062,
"logps/rejected": -179.59378051757812,
"loss": 0.6738,
"pred_label": 1351.8499755859375,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.8581286668777466,
"rewards/margins": 0.28607478737831116,
"rewards/rejected": -1.1442034244537354,
"step": 640,
"use_label": 11834.150390625
},
{
"epoch": 0.68,
"grad_norm": 2.921875,
"learning_rate": 1.4006036925609245e-06,
"logits/chosen": 0.5444064736366272,
"logits/rejected": 1.0297753810882568,
"logps/chosen": -149.0248565673828,
"logps/rejected": -186.53790283203125,
"loss": 0.6803,
"pred_label": 1381.949951171875,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.7583541870117188,
"rewards/margins": 0.32841619849205017,
"rewards/rejected": -1.0867704153060913,
"step": 650,
"use_label": 11964.0498046875
},
{
"epoch": 0.69,
"grad_norm": 2.21875,
"learning_rate": 1.3192409070404582e-06,
"logits/chosen": 1.0406488180160522,
"logits/rejected": 0.8413463830947876,
"logps/chosen": -115.27984619140625,
"logps/rejected": -135.30836486816406,
"loss": 0.6782,
"pred_label": 1410.574951171875,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.5715335607528687,
"rewards/margins": 0.21567881107330322,
"rewards/rejected": -0.7872124910354614,
"step": 660,
"use_label": 12095.4248046875
},
{
"epoch": 0.7,
"grad_norm": 2.28125,
"learning_rate": 1.2394572821496953e-06,
"logits/chosen": 0.3095243275165558,
"logits/rejected": 0.21946246922016144,
"logps/chosen": -121.89112854003906,
"logps/rejected": -147.43978881835938,
"loss": 0.6854,
"pred_label": 1429.925048828125,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.5618449449539185,
"rewards/margins": 0.2343917340040207,
"rewards/rejected": -0.7962367534637451,
"step": 670,
"use_label": 12236.0751953125
},
{
"epoch": 0.71,
"grad_norm": 1.9921875,
"learning_rate": 1.1613595214152713e-06,
"logits/chosen": 0.8114501237869263,
"logits/rejected": 0.7095287442207336,
"logps/chosen": -152.98135375976562,
"logps/rejected": -175.5968475341797,
"loss": 0.6781,
"pred_label": 1451.175048828125,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.6692850589752197,
"rewards/margins": 0.2704187035560608,
"rewards/rejected": -0.9397038221359253,
"step": 680,
"use_label": 12374.8251953125
},
{
"epoch": 0.72,
"grad_norm": 2.59375,
"learning_rate": 1.0850520736699362e-06,
"logits/chosen": 0.7043443322181702,
"logits/rejected": 0.5973688364028931,
"logps/chosen": -175.5553436279297,
"logps/rejected": -207.06423950195312,
"loss": 0.6749,
"pred_label": 1481.625,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.7385014891624451,
"rewards/margins": 0.37226757407188416,
"rewards/rejected": -1.1107690334320068,
"step": 690,
"use_label": 12504.375
},
{
"epoch": 0.73,
"grad_norm": 1.75,
"learning_rate": 1.0106369933615043e-06,
"logits/chosen": 0.44649118185043335,
"logits/rejected": 0.6410871744155884,
"logps/chosen": -136.0747833251953,
"logps/rejected": -162.20260620117188,
"loss": 0.6783,
"pred_label": 1506.0999755859375,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": -0.6972768902778625,
"rewards/margins": 0.23985597491264343,
"rewards/rejected": -0.9371329545974731,
"step": 700,
"use_label": 12639.900390625
},
{
"epoch": 0.73,
"eval_logits/chosen": 1.192717432975769,
"eval_logits/rejected": 1.2994883060455322,
"eval_logps/chosen": -135.23951721191406,
"eval_logps/rejected": -171.3760528564453,
"eval_loss": 0.6756439805030823,
"eval_pred_label": 1558.5238037109375,
"eval_rewards/accuracies": 0.3670634925365448,
"eval_rewards/chosen": -0.6633896827697754,
"eval_rewards/margins": 0.29639023542404175,
"eval_rewards/rejected": -0.9597799181938171,
"eval_runtime": 247.9992,
"eval_samples_per_second": 8.065,
"eval_steps_per_second": 0.254,
"eval_use_label": 12921.4765625,
"step": 700
},
{
"epoch": 0.74,
"grad_norm": 3.421875,
"learning_rate": 9.382138040640714e-07,
"logits/chosen": 0.6493266820907593,
"logits/rejected": 0.6850475072860718,
"logps/chosen": -134.1044158935547,
"logps/rejected": -165.8421173095703,
"loss": 0.6766,
"pred_label": 1616.875,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.7103394269943237,
"rewards/margins": 0.2792840301990509,
"rewards/rejected": -0.9896234273910522,
"step": 710,
"use_label": 13193.125
},
{
"epoch": 0.75,
"grad_norm": 2.203125,
"learning_rate": 8.678793653740633e-07,
"logits/chosen": 0.7393421530723572,
"logits/rejected": 0.6282132267951965,
"logps/chosen": -108.0786361694336,
"logps/rejected": -142.93173217773438,
"loss": 0.6886,
"pred_label": 1639.2750244140625,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": -0.5120818614959717,
"rewards/margins": 0.260955274105072,
"rewards/rejected": -0.7730370759963989,
"step": 720,
"use_label": 13330.724609375
},
{
"epoch": 0.76,
"grad_norm": 2.984375,
"learning_rate": 7.997277433690984e-07,
"logits/chosen": 0.7698175311088562,
"logits/rejected": 0.6512314677238464,
"logps/chosen": -120.15057373046875,
"logps/rejected": -147.58602905273438,
"loss": 0.6705,
"pred_label": 1667.324951171875,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.49796366691589355,
"rewards/margins": 0.33586567640304565,
"rewards/rejected": -0.8338292837142944,
"step": 730,
"use_label": 13462.6748046875
},
{
"epoch": 0.77,
"grad_norm": 1.8984375,
"learning_rate": 7.338500848029603e-07,
"logits/chosen": 0.9725875854492188,
"logits/rejected": 0.894719123840332,
"logps/chosen": -113.24635314941406,
"logps/rejected": -142.79689025878906,
"loss": 0.6779,
"pred_label": 1692.25,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.5262097716331482,
"rewards/margins": 0.26758259534835815,
"rewards/rejected": -0.7937922477722168,
"step": 740,
"use_label": 13597.75
},
{
"epoch": 0.79,
"grad_norm": 2.0,
"learning_rate": 6.70334495204884e-07,
"logits/chosen": 0.8061111569404602,
"logits/rejected": 0.7181490659713745,
"logps/chosen": -142.5282745361328,
"logps/rejected": -173.8203887939453,
"loss": 0.6814,
"pred_label": 1716.25,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.6499794125556946,
"rewards/margins": 0.24588195979595184,
"rewards/rejected": -0.8958613276481628,
"step": 750,
"use_label": 13733.75
},
{
"epoch": 0.8,
"grad_norm": 2.203125,
"learning_rate": 6.092659210462232e-07,
"logits/chosen": 0.6536890268325806,
"logits/rejected": 0.6605783104896545,
"logps/chosen": -102.72889709472656,
"logps/rejected": -149.5058135986328,
"loss": 0.6733,
"pred_label": 1741.074951171875,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.48761066794395447,
"rewards/margins": 0.28703850507736206,
"rewards/rejected": -0.7746490836143494,
"step": 760,
"use_label": 13868.9248046875
},
{
"epoch": 0.81,
"grad_norm": 2.375,
"learning_rate": 5.507260361320738e-07,
"logits/chosen": 0.6806662082672119,
"logits/rejected": 0.5722958445549011,
"logps/chosen": -151.7313690185547,
"logps/rejected": -176.56260681152344,
"loss": 0.6719,
"pred_label": 1766.5250244140625,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.6818407773971558,
"rewards/margins": 0.3464636206626892,
"rewards/rejected": -1.0283044576644897,
"step": 770,
"use_label": 14003.474609375
},
{
"epoch": 0.82,
"grad_norm": 3.03125,
"learning_rate": 4.947931323697983e-07,
"logits/chosen": 0.6961285471916199,
"logits/rejected": 0.45934200286865234,
"logps/chosen": -136.76901245117188,
"logps/rejected": -162.4578094482422,
"loss": 0.6822,
"pred_label": 1797.9000244140625,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.6396945714950562,
"rewards/margins": 0.2714308798313141,
"rewards/rejected": -0.9111254811286926,
"step": 780,
"use_label": 14132.099609375
},
{
"epoch": 0.83,
"grad_norm": 1.8046875,
"learning_rate": 4.4154201506053985e-07,
"logits/chosen": 0.824557900428772,
"logits/rejected": 0.8706857562065125,
"logps/chosen": -113.81227111816406,
"logps/rejected": -128.26260375976562,
"loss": 0.6771,
"pred_label": 1823.199951171875,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.4897800385951996,
"rewards/margins": 0.2714278995990753,
"rewards/rejected": -0.7612079381942749,
"step": 790,
"use_label": 14266.7998046875
},
{
"epoch": 0.84,
"grad_norm": 2.453125,
"learning_rate": 3.910439028537638e-07,
"logits/chosen": 0.6243492364883423,
"logits/rejected": 0.7354862689971924,
"logps/chosen": -111.455322265625,
"logps/rejected": -141.9140625,
"loss": 0.6776,
"pred_label": 1847.300048828125,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.5140770673751831,
"rewards/margins": 0.28408390283584595,
"rewards/rejected": -0.7981609106063843,
"step": 800,
"use_label": 14402.7001953125
},
{
"epoch": 0.84,
"eval_logits/chosen": 1.3586419820785522,
"eval_logits/rejected": 1.4788893461227417,
"eval_logps/chosen": -123.90097045898438,
"eval_logps/rejected": -161.67913818359375,
"eval_loss": 0.6800512671470642,
"eval_pred_label": 1900.4920654296875,
"eval_rewards/accuracies": 0.3531745970249176,
"eval_rewards/chosen": -0.5500041842460632,
"eval_rewards/margins": 0.312806636095047,
"eval_rewards/rejected": -0.8628108501434326,
"eval_runtime": 247.6869,
"eval_samples_per_second": 8.075,
"eval_steps_per_second": 0.254,
"eval_use_label": 14683.5078125,
"step": 800
},
{
"epoch": 0.85,
"grad_norm": 3.0,
"learning_rate": 3.4336633249862084e-07,
"logits/chosen": 0.6561521291732788,
"logits/rejected": 0.6886910200119019,
"logps/chosen": -128.8015899658203,
"logps/rejected": -164.35073852539062,
"loss": 0.678,
"pred_label": 1961.699951171875,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.5894675254821777,
"rewards/margins": 0.26825448870658875,
"rewards/rejected": -0.8577221035957336,
"step": 810,
"use_label": 14952.2998046875
},
{
"epoch": 0.86,
"grad_norm": 2.203125,
"learning_rate": 2.98573068519539e-07,
"logits/chosen": 0.7671118974685669,
"logits/rejected": 0.7413855791091919,
"logps/chosen": -113.36724853515625,
"logps/rejected": -125.75687408447266,
"loss": 0.6785,
"pred_label": 1987.5,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": -0.533771276473999,
"rewards/margins": 0.18785560131072998,
"rewards/rejected": -0.721626877784729,
"step": 820,
"use_label": 15086.5
},
{
"epoch": 0.87,
"grad_norm": 1.7734375,
"learning_rate": 2.5672401793681854e-07,
"logits/chosen": 0.9424182176589966,
"logits/rejected": 1.0951740741729736,
"logps/chosen": -104.4168472290039,
"logps/rejected": -135.86708068847656,
"loss": 0.6757,
"pred_label": 2014.300048828125,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.4637536108493805,
"rewards/margins": 0.3284408450126648,
"rewards/rejected": -0.7921944260597229,
"step": 830,
"use_label": 15219.7001953125
},
{
"epoch": 0.88,
"grad_norm": 3.546875,
"learning_rate": 2.178751501463036e-07,
"logits/chosen": 0.880671501159668,
"logits/rejected": 0.7017362117767334,
"logps/chosen": -107.72086334228516,
"logps/rejected": -113.772216796875,
"loss": 0.6804,
"pred_label": 2034.0250244140625,
"rewards/accuracies": 0.23749999701976776,
"rewards/chosen": -0.48421382904052734,
"rewards/margins": 0.11718887090682983,
"rewards/rejected": -0.6014026999473572,
"step": 840,
"use_label": 15359.974609375
},
{
"epoch": 0.89,
"grad_norm": 2.1875,
"learning_rate": 1.820784220652766e-07,
"logits/chosen": 0.7102145552635193,
"logits/rejected": 0.6271827816963196,
"logps/chosen": -143.84104919433594,
"logps/rejected": -181.9434814453125,
"loss": 0.661,
"pred_label": 2054.27490234375,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.5962380170822144,
"rewards/margins": 0.3919173777103424,
"rewards/rejected": -0.9881553649902344,
"step": 850,
"use_label": 15499.724609375
},
{
"epoch": 0.9,
"grad_norm": 2.03125,
"learning_rate": 1.4938170864468636e-07,
"logits/chosen": 0.856406569480896,
"logits/rejected": 0.9847167730331421,
"logps/chosen": -134.33340454101562,
"logps/rejected": -162.52786254882812,
"loss": 0.6682,
"pred_label": 2086.074951171875,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.5313155651092529,
"rewards/margins": 0.33722516894340515,
"rewards/rejected": -0.8685407638549805,
"step": 860,
"use_label": 15627.9248046875
},
{
"epoch": 0.91,
"grad_norm": 2.453125,
"learning_rate": 1.1982873884064466e-07,
"logits/chosen": 0.7517425417900085,
"logits/rejected": 0.7345870137214661,
"logps/chosen": -139.02523803710938,
"logps/rejected": -179.33541870117188,
"loss": 0.681,
"pred_label": 2114.550048828125,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.5771310925483704,
"rewards/margins": 0.393027126789093,
"rewards/rejected": -0.9701582193374634,
"step": 870,
"use_label": 15759.4501953125
},
{
"epoch": 0.92,
"grad_norm": 1.3359375,
"learning_rate": 9.345903713082305e-08,
"logits/chosen": 0.8886432647705078,
"logits/rejected": 0.9018303751945496,
"logps/chosen": -118.17820739746094,
"logps/rejected": -171.14111328125,
"loss": 0.6788,
"pred_label": 2148.22509765625,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.5542714595794678,
"rewards/margins": 0.41032201051712036,
"rewards/rejected": -0.9645935297012329,
"step": 880,
"use_label": 15885.775390625
},
{
"epoch": 0.93,
"grad_norm": 3.03125,
"learning_rate": 7.030787065396866e-08,
"logits/chosen": 0.9458627700805664,
"logits/rejected": 0.860288143157959,
"logps/chosen": -113.8023452758789,
"logps/rejected": -145.21006774902344,
"loss": 0.68,
"pred_label": 2174.175048828125,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.4997434616088867,
"rewards/margins": 0.2181231528520584,
"rewards/rejected": -0.7178665399551392,
"step": 890,
"use_label": 16019.8251953125
},
{
"epoch": 0.94,
"grad_norm": 2.265625,
"learning_rate": 5.0406202043228604e-08,
"logits/chosen": 1.2760592699050903,
"logits/rejected": 1.0304285287857056,
"logps/chosen": -123.82283020019531,
"logps/rejected": -180.8885955810547,
"loss": 0.6751,
"pred_label": 2197.10009765625,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.5441134572029114,
"rewards/margins": 0.37965571880340576,
"rewards/rejected": -0.9237691164016724,
"step": 900,
"use_label": 16156.900390625
},
{
"epoch": 0.94,
"eval_logits/chosen": 1.3693251609802246,
"eval_logits/rejected": 1.4904797077178955,
"eval_logps/chosen": -123.65629577636719,
"eval_logps/rejected": -161.58062744140625,
"eval_loss": 0.678970456123352,
"eval_pred_label": 2251.015869140625,
"eval_rewards/accuracies": 0.3571428656578064,
"eval_rewards/chosen": -0.547557532787323,
"eval_rewards/margins": 0.31426796317100525,
"eval_rewards/rejected": -0.8618254065513611,
"eval_runtime": 247.8741,
"eval_samples_per_second": 8.069,
"eval_steps_per_second": 0.254,
"eval_use_label": 16436.984375,
"step": 900
},
{
"epoch": 0.95,
"grad_norm": 1.8984375,
"learning_rate": 3.378064801637687e-08,
"logits/chosen": 0.8874324560165405,
"logits/rejected": 0.9277682304382324,
"logps/chosen": -107.72428131103516,
"logps/rejected": -141.05999755859375,
"loss": 0.6748,
"pred_label": 2314.64990234375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.47065839171409607,
"rewards/margins": 0.3038247525691986,
"rewards/rejected": -0.7744830846786499,
"step": 910,
"use_label": 16703.349609375
},
{
"epoch": 0.96,
"grad_norm": 2.359375,
"learning_rate": 2.0453443778310766e-08,
"logits/chosen": 1.0454901456832886,
"logits/rejected": 1.0777199268341064,
"logps/chosen": -97.26091003417969,
"logps/rejected": -147.07302856445312,
"loss": 0.6799,
"pred_label": 2345.5,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.4330506920814514,
"rewards/margins": 0.3231905996799469,
"rewards/rejected": -0.7562412023544312,
"step": 920,
"use_label": 16832.5
},
{
"epoch": 0.97,
"grad_norm": 2.96875,
"learning_rate": 1.0442413283435759e-08,
"logits/chosen": 1.0506960153579712,
"logits/rejected": 0.9065178632736206,
"logps/chosen": -111.4305648803711,
"logps/rejected": -146.4462432861328,
"loss": 0.6772,
"pred_label": 2373.125,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.4827675223350525,
"rewards/margins": 0.2878049314022064,
"rewards/rejected": -0.7705724835395813,
"step": 930,
"use_label": 16964.875
},
{
"epoch": 0.98,
"grad_norm": 2.515625,
"learning_rate": 3.760945397705828e-09,
"logits/chosen": 1.234220027923584,
"logits/rejected": 0.9605228304862976,
"logps/chosen": -124.61787414550781,
"logps/rejected": -160.95223999023438,
"loss": 0.6706,
"pred_label": 2397.75,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.5566731691360474,
"rewards/margins": 0.27601632475852966,
"rewards/rejected": -0.8326894640922546,
"step": 940,
"use_label": 17100.25
},
{
"epoch": 0.99,
"grad_norm": 2.375,
"learning_rate": 4.1797599220405605e-10,
"logits/chosen": 0.7550326585769653,
"logits/rejected": 0.6674235463142395,
"logps/chosen": -136.30941772460938,
"logps/rejected": -160.4894561767578,
"loss": 0.6729,
"pred_label": 2425.39990234375,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.5969915390014648,
"rewards/margins": 0.21693304181098938,
"rewards/rejected": -0.8139246106147766,
"step": 950,
"use_label": 17232.599609375
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6760230718482851,
"train_runtime": 20063.9235,
"train_samples_per_second": 3.047,
"train_steps_per_second": 0.048
}
],
"logging_steps": 10,
"max_steps": 955,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}