|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.1666858196258545, |
|
"logits/rejected": -2.182244300842285, |
|
"logps/chosen": -12.368609428405762, |
|
"logps/rejected": -24.687644958496094, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.2111542224884033, |
|
"logits/rejected": -2.2718067169189453, |
|
"logps/chosen": -57.56840133666992, |
|
"logps/rejected": -65.20916748046875, |
|
"loss": 0.693, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2569444477558136, |
|
"rewards/chosen": 0.0011389791034162045, |
|
"rewards/margins": 0.0002508986508473754, |
|
"rewards/rejected": 0.0008880805689841509, |
|
"step": 10, |
|
"use_label": 90.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.242893695831299, |
|
"logits/rejected": -2.279961109161377, |
|
"logps/chosen": -56.537681579589844, |
|
"logps/rejected": -68.3794174194336, |
|
"loss": 0.6924, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": 0.006626849062740803, |
|
"rewards/margins": 0.001654049614444375, |
|
"rewards/rejected": 0.004972799215465784, |
|
"step": 20, |
|
"use_label": 242.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.2637219429016113, |
|
"logits/rejected": -2.2480521202087402, |
|
"logps/chosen": -53.993507385253906, |
|
"logps/rejected": -67.89700317382812, |
|
"loss": 0.6919, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.016421381384134293, |
|
"rewards/margins": 0.002580237342044711, |
|
"rewards/rejected": 0.013841142877936363, |
|
"step": 30, |
|
"use_label": 402.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.2831993103027344, |
|
"logits/rejected": -2.2760486602783203, |
|
"logps/chosen": -55.59602737426758, |
|
"logps/rejected": -66.58573913574219, |
|
"loss": 0.6909, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.20624999701976776, |
|
"rewards/chosen": 0.018266689032316208, |
|
"rewards/margins": 0.0004533957107923925, |
|
"rewards/rejected": 0.017813291400671005, |
|
"step": 40, |
|
"use_label": 562.0 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -2.344376564025879, |
|
"logits/rejected": -2.3342297077178955, |
|
"logps/chosen": -69.12073516845703, |
|
"logps/rejected": -84.67558288574219, |
|
"loss": 0.6889, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.02673395536839962, |
|
"rewards/margins": 0.00583356199786067, |
|
"rewards/rejected": 0.020900394767522812, |
|
"step": 50, |
|
"use_label": 722.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.3030121326446533, |
|
"logits/rejected": -2.3094825744628906, |
|
"logps/chosen": -82.04167175292969, |
|
"logps/rejected": -90.7291488647461, |
|
"loss": 0.6876, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.036534082144498825, |
|
"rewards/margins": 0.013860121369361877, |
|
"rewards/rejected": 0.022673960775136948, |
|
"step": 60, |
|
"use_label": 882.0 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -2.345569610595703, |
|
"logits/rejected": -2.3263676166534424, |
|
"logps/chosen": -77.1853256225586, |
|
"logps/rejected": -77.63880920410156, |
|
"loss": 0.685, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": 0.025494003668427467, |
|
"rewards/margins": 0.016305232420563698, |
|
"rewards/rejected": 0.009188770316541195, |
|
"step": 70, |
|
"use_label": 1042.0 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.241882801055908, |
|
"logits/rejected": -2.195146322250366, |
|
"logps/chosen": -81.66094207763672, |
|
"logps/rejected": -89.08940124511719, |
|
"loss": 0.6805, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.003909807652235031, |
|
"rewards/margins": 0.025169039145112038, |
|
"rewards/rejected": -0.021259231492877007, |
|
"step": 80, |
|
"use_label": 1202.0 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.1871695518493652, |
|
"logits/rejected": -2.2313501834869385, |
|
"logps/chosen": -62.76741409301758, |
|
"logps/rejected": -81.16191101074219, |
|
"loss": 0.6747, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.016776535660028458, |
|
"rewards/margins": 0.048332639038562775, |
|
"rewards/rejected": -0.06510917842388153, |
|
"step": 90, |
|
"use_label": 1362.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -2.1414177417755127, |
|
"logits/rejected": -2.107236623764038, |
|
"logps/chosen": -78.60578155517578, |
|
"logps/rejected": -81.1384506225586, |
|
"loss": 0.6685, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.04031088575720787, |
|
"rewards/margins": 0.052690792828798294, |
|
"rewards/rejected": -0.09300167858600616, |
|
"step": 100, |
|
"use_label": 1522.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.109715223312378, |
|
"eval_logits/rejected": -2.0796475410461426, |
|
"eval_logps/chosen": -71.95718383789062, |
|
"eval_logps/rejected": -84.7625961303711, |
|
"eval_loss": 0.6684110760688782, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.335317462682724, |
|
"eval_rewards/chosen": -0.030566338449716568, |
|
"eval_rewards/margins": 0.06307896971702576, |
|
"eval_rewards/rejected": -0.09364530444145203, |
|
"eval_runtime": 247.4954, |
|
"eval_samples_per_second": 8.081, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 1856.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 4.996723692767927e-06, |
|
"logits/chosen": -2.12998104095459, |
|
"logits/rejected": -2.1109042167663574, |
|
"logps/chosen": -68.2921142578125, |
|
"logps/rejected": -84.99057006835938, |
|
"loss": 0.6713, |
|
"pred_label": 0.07500000298023224, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.06523006409406662, |
|
"rewards/margins": 0.0570509135723114, |
|
"rewards/rejected": -0.12228099256753922, |
|
"step": 110, |
|
"use_label": 2185.925048828125 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -2.3605504035949707, |
|
"logits/rejected": -2.243201971054077, |
|
"logps/chosen": -80.2857666015625, |
|
"logps/rejected": -96.13867950439453, |
|
"loss": 0.6667, |
|
"pred_label": 1.875, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.062185365706682205, |
|
"rewards/margins": 0.0815814733505249, |
|
"rewards/rejected": -0.143766850233078, |
|
"step": 120, |
|
"use_label": 2344.125 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 4.980697142834315e-06, |
|
"logits/chosen": -2.146286725997925, |
|
"logits/rejected": -2.1618175506591797, |
|
"logps/chosen": -67.6681137084961, |
|
"logps/rejected": -78.9002456665039, |
|
"loss": 0.6675, |
|
"pred_label": 2.075000047683716, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.09194014966487885, |
|
"rewards/margins": 0.04880703240633011, |
|
"rewards/rejected": -0.14074717462062836, |
|
"step": 130, |
|
"use_label": 2503.925048828125 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -2.1254963874816895, |
|
"logits/rejected": -2.160235643386841, |
|
"logps/chosen": -74.917724609375, |
|
"logps/rejected": -99.1263427734375, |
|
"loss": 0.6684, |
|
"pred_label": 5.050000190734863, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.15644724667072296, |
|
"rewards/margins": 0.08642110973596573, |
|
"rewards/rejected": -0.2428683489561081, |
|
"step": 140, |
|
"use_label": 2660.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 4.951404179843963e-06, |
|
"logits/chosen": -2.1338083744049072, |
|
"logits/rejected": -2.0785932540893555, |
|
"logps/chosen": -57.580589294433594, |
|
"logps/rejected": -64.5077133178711, |
|
"loss": 0.6658, |
|
"pred_label": 9.100000381469727, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.10085760056972504, |
|
"rewards/margins": 0.09126537293195724, |
|
"rewards/rejected": -0.19212298095226288, |
|
"step": 150, |
|
"use_label": 2816.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -2.1478817462921143, |
|
"logits/rejected": -2.1238207817077637, |
|
"logps/chosen": -102.6265640258789, |
|
"logps/rejected": -118.9216537475586, |
|
"loss": 0.667, |
|
"pred_label": 15.399999618530273, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.21023361384868622, |
|
"rewards/margins": 0.16089467704296112, |
|
"rewards/rejected": -0.37112829089164734, |
|
"step": 160, |
|
"use_label": 2970.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.909001458367867e-06, |
|
"logits/chosen": -1.9664795398712158, |
|
"logits/rejected": -1.9388923645019531, |
|
"logps/chosen": -81.67234802246094, |
|
"logps/rejected": -97.5047836303711, |
|
"loss": 0.6635, |
|
"pred_label": 23.475000381469727, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.17715924978256226, |
|
"rewards/margins": 0.13409331440925598, |
|
"rewards/rejected": -0.31125253438949585, |
|
"step": 170, |
|
"use_label": 3122.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -1.896836519241333, |
|
"logits/rejected": -1.9397751092910767, |
|
"logps/chosen": -82.50892639160156, |
|
"logps/rejected": -105.1452407836914, |
|
"loss": 0.6701, |
|
"pred_label": 29.649999618530273, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.20006974041461945, |
|
"rewards/margins": 0.11658792197704315, |
|
"rewards/rejected": -0.3166576623916626, |
|
"step": 180, |
|
"use_label": 3276.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 4.853715742087947e-06, |
|
"logits/chosen": -1.8957335948944092, |
|
"logits/rejected": -1.8187646865844727, |
|
"logps/chosen": -101.19456481933594, |
|
"logps/rejected": -109.06144714355469, |
|
"loss": 0.6648, |
|
"pred_label": 35.17499923706055, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.27311572432518005, |
|
"rewards/margins": 0.1226036325097084, |
|
"rewards/rejected": -0.39571934938430786, |
|
"step": 190, |
|
"use_label": 3430.824951171875 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.125, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -1.8366466760635376, |
|
"logits/rejected": -1.8133814334869385, |
|
"logps/chosen": -118.866943359375, |
|
"logps/rejected": -123.262451171875, |
|
"loss": 0.676, |
|
"pred_label": 43.42499923706055, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.38812780380249023, |
|
"rewards/margins": 0.1128091812133789, |
|
"rewards/rejected": -0.5009369850158691, |
|
"step": 200, |
|
"use_label": 3582.574951171875 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.7319464683532715, |
|
"eval_logits/rejected": -1.6888620853424072, |
|
"eval_logps/chosen": -106.19064331054688, |
|
"eval_logps/rejected": -124.95625305175781, |
|
"eval_loss": 0.6716896295547485, |
|
"eval_pred_label": 61.55555725097656, |
|
"eval_rewards/accuracies": 0.3214285671710968, |
|
"eval_rewards/chosen": -0.372901052236557, |
|
"eval_rewards/margins": 0.12268086522817612, |
|
"eval_rewards/rejected": -0.49558189511299133, |
|
"eval_runtime": 248.0123, |
|
"eval_samples_per_second": 8.064, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 3898.4443359375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 4.7858426910973435e-06, |
|
"logits/chosen": -1.9356311559677124, |
|
"logits/rejected": -1.9080215692520142, |
|
"logps/chosen": -93.94760131835938, |
|
"logps/rejected": -106.8377456665039, |
|
"loss": 0.6743, |
|
"pred_label": 81.25, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.259369432926178, |
|
"rewards/margins": 0.13343419134616852, |
|
"rewards/rejected": -0.3928036093711853, |
|
"step": 210, |
|
"use_label": 4208.75 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -1.8437402248382568, |
|
"logits/rejected": -1.810903549194336, |
|
"logps/chosen": -100.77757263183594, |
|
"logps/rejected": -112.38002014160156, |
|
"loss": 0.6651, |
|
"pred_label": 92.5999984741211, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.24724093079566956, |
|
"rewards/margins": 0.1804189234972, |
|
"rewards/rejected": -0.427659809589386, |
|
"step": 220, |
|
"use_label": 4357.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 4.705745280752586e-06, |
|
"logits/chosen": -1.5612363815307617, |
|
"logits/rejected": -1.494425654411316, |
|
"logps/chosen": -108.3369369506836, |
|
"logps/rejected": -121.25785064697266, |
|
"loss": 0.6784, |
|
"pred_label": 107.0, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.29597795009613037, |
|
"rewards/margins": 0.18698883056640625, |
|
"rewards/rejected": -0.482966810464859, |
|
"step": 230, |
|
"use_label": 4503.0 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -1.279926061630249, |
|
"logits/rejected": -1.207486629486084, |
|
"logps/chosen": -94.6622314453125, |
|
"logps/rejected": -121.74755859375, |
|
"loss": 0.68, |
|
"pred_label": 124.05000305175781, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.30948689579963684, |
|
"rewards/margins": 0.16862434148788452, |
|
"rewards/rejected": -0.4781111776828766, |
|
"step": 240, |
|
"use_label": 4645.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 4.613851860533367e-06, |
|
"logits/chosen": -1.4483808279037476, |
|
"logits/rejected": -1.535796880722046, |
|
"logps/chosen": -88.96175384521484, |
|
"logps/rejected": -106.26942443847656, |
|
"loss": 0.678, |
|
"pred_label": 140.64999389648438, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.2383408546447754, |
|
"rewards/margins": 0.1766553670167923, |
|
"rewards/rejected": -0.4149962067604065, |
|
"step": 250, |
|
"use_label": 4789.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -1.5566436052322388, |
|
"logits/rejected": -1.472214937210083, |
|
"logps/chosen": -101.36164855957031, |
|
"logps/rejected": -132.7355194091797, |
|
"loss": 0.6768, |
|
"pred_label": 156.85000610351562, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.404205858707428, |
|
"rewards/margins": 0.1837155818939209, |
|
"rewards/rejected": -0.5879215002059937, |
|
"step": 260, |
|
"use_label": 4933.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 4.510653863290871e-06, |
|
"logits/chosen": -1.5190045833587646, |
|
"logits/rejected": -1.5413776636123657, |
|
"logps/chosen": -123.2553482055664, |
|
"logps/rejected": -132.5965118408203, |
|
"loss": 0.6699, |
|
"pred_label": 174.9499969482422, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.478261798620224, |
|
"rewards/margins": 0.1528010368347168, |
|
"rewards/rejected": -0.6310628056526184, |
|
"step": 270, |
|
"use_label": 5075.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -1.4161837100982666, |
|
"logits/rejected": -1.461897850036621, |
|
"logps/chosen": -97.59378051757812, |
|
"logps/rejected": -113.92098236083984, |
|
"loss": 0.6682, |
|
"pred_label": 182.6750030517578, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.36361420154571533, |
|
"rewards/margins": 0.11149580776691437, |
|
"rewards/rejected": -0.4751099944114685, |
|
"step": 280, |
|
"use_label": 5227.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0, |
|
"learning_rate": 4.396703177135262e-06, |
|
"logits/chosen": -1.1572140455245972, |
|
"logits/rejected": -1.1582170724868774, |
|
"logps/chosen": -113.84346008300781, |
|
"logps/rejected": -136.17958068847656, |
|
"loss": 0.6783, |
|
"pred_label": 200.5749969482422, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.3756815791130066, |
|
"rewards/margins": 0.2912302017211914, |
|
"rewards/rejected": -0.666911780834198, |
|
"step": 290, |
|
"use_label": 5369.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -0.7739458084106445, |
|
"logits/rejected": -0.8222519159317017, |
|
"logps/chosen": -96.963623046875, |
|
"logps/rejected": -129.59027099609375, |
|
"loss": 0.6728, |
|
"pred_label": 223.0749969482422, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.43035492300987244, |
|
"rewards/margins": 0.2433358132839203, |
|
"rewards/rejected": -0.6736907958984375, |
|
"step": 300, |
|
"use_label": 5506.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -0.7414401173591614, |
|
"eval_logits/rejected": -0.6762140393257141, |
|
"eval_logps/chosen": -116.0198745727539, |
|
"eval_logps/rejected": -145.98529052734375, |
|
"eval_loss": 0.6783695220947266, |
|
"eval_pred_label": 270.96826171875, |
|
"eval_rewards/accuracies": 0.3373015820980072, |
|
"eval_rewards/chosen": -0.4711931049823761, |
|
"eval_rewards/margins": 0.23467905819416046, |
|
"eval_rewards/rejected": -0.7058721780776978, |
|
"eval_runtime": 248.0617, |
|
"eval_samples_per_second": 8.063, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 5793.03173828125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 4.2726091940171055e-06, |
|
"logits/chosen": -1.1870858669281006, |
|
"logits/rejected": -1.1604619026184082, |
|
"logps/chosen": -93.13883972167969, |
|
"logps/rejected": -115.36534118652344, |
|
"loss": 0.6683, |
|
"pred_label": 314.92498779296875, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.4414878487586975, |
|
"rewards/margins": 0.11716248840093613, |
|
"rewards/rejected": -0.5586503148078918, |
|
"step": 310, |
|
"use_label": 6079.0751953125 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -0.7778801918029785, |
|
"logits/rejected": -0.8481136560440063, |
|
"logps/chosen": -115.88163757324219, |
|
"logps/rejected": -152.60556030273438, |
|
"loss": 0.6715, |
|
"pred_label": 340.4750061035156, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.41940560936927795, |
|
"rewards/margins": 0.2641361653804779, |
|
"rewards/rejected": -0.6835418343544006, |
|
"step": 320, |
|
"use_label": 6213.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 4.139035550786495e-06, |
|
"logits/chosen": -0.7590861320495605, |
|
"logits/rejected": -0.6889998316764832, |
|
"logps/chosen": -90.45745086669922, |
|
"logps/rejected": -116.98609924316406, |
|
"loss": 0.6718, |
|
"pred_label": 364.04998779296875, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.269859254360199, |
|
"rewards/margins": 0.2574610114097595, |
|
"rewards/rejected": -0.5273202657699585, |
|
"step": 330, |
|
"use_label": 6349.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -0.6209542155265808, |
|
"logits/rejected": -0.47464966773986816, |
|
"logps/chosen": -102.54121398925781, |
|
"logps/rejected": -130.24276733398438, |
|
"loss": 0.678, |
|
"pred_label": 391.07501220703125, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.395341694355011, |
|
"rewards/margins": 0.19944116473197937, |
|
"rewards/rejected": -0.594782829284668, |
|
"step": 340, |
|
"use_label": 6482.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 3.996696580158211e-06, |
|
"logits/chosen": -0.7593547701835632, |
|
"logits/rejected": -0.6881019473075867, |
|
"logps/chosen": -92.08587646484375, |
|
"logps/rejected": -110.56968688964844, |
|
"loss": 0.6727, |
|
"pred_label": 411.9750061035156, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.31440719962120056, |
|
"rewards/margins": 0.14519965648651123, |
|
"rewards/rejected": -0.4596068263053894, |
|
"step": 350, |
|
"use_label": 6622.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -0.996785044670105, |
|
"logits/rejected": -0.9698454737663269, |
|
"logps/chosen": -101.95857238769531, |
|
"logps/rejected": -114.76066589355469, |
|
"loss": 0.6783, |
|
"pred_label": 427.95001220703125, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.32893818616867065, |
|
"rewards/margins": 0.16835859417915344, |
|
"rewards/rejected": -0.4972967505455017, |
|
"step": 360, |
|
"use_label": 6766.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.846353490562664e-06, |
|
"logits/chosen": -1.0720884799957275, |
|
"logits/rejected": -0.859793484210968, |
|
"logps/chosen": -103.12544250488281, |
|
"logps/rejected": -120.26700592041016, |
|
"loss": 0.6626, |
|
"pred_label": 443.875, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.29031243920326233, |
|
"rewards/margins": 0.2187139242887497, |
|
"rewards/rejected": -0.509026288986206, |
|
"step": 370, |
|
"use_label": 6910.125 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -0.4451161324977875, |
|
"logits/rejected": -0.32113510370254517, |
|
"logps/chosen": -106.78487396240234, |
|
"logps/rejected": -132.46365356445312, |
|
"loss": 0.6759, |
|
"pred_label": 463.2749938964844, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.4723123610019684, |
|
"rewards/margins": 0.1635245531797409, |
|
"rewards/rejected": -0.6358368992805481, |
|
"step": 380, |
|
"use_label": 7050.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.875, |
|
"learning_rate": 3.6888102953122307e-06, |
|
"logits/chosen": -0.02978489175438881, |
|
"logits/rejected": -0.03225391358137131, |
|
"logps/chosen": -136.5101318359375, |
|
"logps/rejected": -144.56173706054688, |
|
"loss": 0.6827, |
|
"pred_label": 495.82501220703125, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.516473650932312, |
|
"rewards/margins": 0.25756725668907166, |
|
"rewards/rejected": -0.774040937423706, |
|
"step": 390, |
|
"use_label": 7178.1748046875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": 0.09610392153263092, |
|
"logits/rejected": 0.09092014282941818, |
|
"logps/chosen": -117.39754486083984, |
|
"logps/rejected": -152.1193084716797, |
|
"loss": 0.6715, |
|
"pred_label": 523.0250244140625, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.4636654853820801, |
|
"rewards/margins": 0.2620038390159607, |
|
"rewards/rejected": -0.725669264793396, |
|
"step": 400, |
|
"use_label": 7310.97509765625 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.6419875621795654, |
|
"eval_logits/rejected": 0.764842689037323, |
|
"eval_logps/chosen": -113.52101135253906, |
|
"eval_logps/rejected": -148.9146270751953, |
|
"eval_loss": 0.6811794638633728, |
|
"eval_pred_label": 572.6825561523438, |
|
"eval_rewards/accuracies": 0.3551587164402008, |
|
"eval_rewards/chosen": -0.44620463252067566, |
|
"eval_rewards/margins": 0.2889607846736908, |
|
"eval_rewards/rejected": -0.7351653575897217, |
|
"eval_runtime": 247.9054, |
|
"eval_samples_per_second": 8.068, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 7595.3173828125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 3.5249095128531863e-06, |
|
"logits/chosen": 0.34008723497390747, |
|
"logits/rejected": 0.07830000668764114, |
|
"logps/chosen": -100.46337890625, |
|
"logps/rejected": -124.54425048828125, |
|
"loss": 0.6735, |
|
"pred_label": 637.2999877929688, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.3595535457134247, |
|
"rewards/margins": 0.30695658922195435, |
|
"rewards/rejected": -0.6665101647377014, |
|
"step": 410, |
|
"use_label": 7860.7001953125 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": 0.0810169205069542, |
|
"logits/rejected": 0.1531490534543991, |
|
"logps/chosen": -104.22071838378906, |
|
"logps/rejected": -134.35255432128906, |
|
"loss": 0.6774, |
|
"pred_label": 659.75, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.4057747721672058, |
|
"rewards/margins": 0.256902813911438, |
|
"rewards/rejected": -0.6626775860786438, |
|
"step": 420, |
|
"use_label": 7998.25 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 3.355527661097728e-06, |
|
"logits/chosen": 0.1920831948518753, |
|
"logits/rejected": 0.327668160200119, |
|
"logps/chosen": -127.31324768066406, |
|
"logps/rejected": -138.50869750976562, |
|
"loss": 0.6761, |
|
"pred_label": 684.7249755859375, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.5084472894668579, |
|
"rewards/margins": 0.1930726319551468, |
|
"rewards/rejected": -0.7015198469161987, |
|
"step": 430, |
|
"use_label": 8133.27490234375 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": 0.2943039536476135, |
|
"logits/rejected": 0.04520421102643013, |
|
"logps/chosen": -116.11759185791016, |
|
"logps/rejected": -138.48878479003906, |
|
"loss": 0.6761, |
|
"pred_label": 708.7999877929688, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.48671650886535645, |
|
"rewards/margins": 0.26555269956588745, |
|
"rewards/rejected": -0.7522691488265991, |
|
"step": 440, |
|
"use_label": 8269.2001953125 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 3.181570569931697e-06, |
|
"logits/chosen": 0.16918572783470154, |
|
"logits/rejected": -0.033099401742219925, |
|
"logps/chosen": -121.46165466308594, |
|
"logps/rejected": -148.68612670898438, |
|
"loss": 0.6878, |
|
"pred_label": 732.25, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.4864117503166199, |
|
"rewards/margins": 0.24023088812828064, |
|
"rewards/rejected": -0.7266427278518677, |
|
"step": 450, |
|
"use_label": 8405.75 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": 0.21930424869060516, |
|
"logits/rejected": 0.31035083532333374, |
|
"logps/chosen": -91.38786315917969, |
|
"logps/rejected": -110.15885925292969, |
|
"loss": 0.6718, |
|
"pred_label": 753.5999755859375, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.34389492869377136, |
|
"rewards/margins": 0.23962631821632385, |
|
"rewards/rejected": -0.5835212469100952, |
|
"step": 460, |
|
"use_label": 8544.400390625 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 3.0039685369660785e-06, |
|
"logits/chosen": 0.31231826543807983, |
|
"logits/rejected": 0.4929059147834778, |
|
"logps/chosen": -109.46476745605469, |
|
"logps/rejected": -141.3244171142578, |
|
"loss": 0.6786, |
|
"pred_label": 777.9000244140625, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.4255266785621643, |
|
"rewards/margins": 0.30392220616340637, |
|
"rewards/rejected": -0.7294487953186035, |
|
"step": 470, |
|
"use_label": 8680.099609375 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": 0.5682842135429382, |
|
"logits/rejected": 0.4352129399776459, |
|
"logps/chosen": -135.01535034179688, |
|
"logps/rejected": -157.18922424316406, |
|
"loss": 0.6756, |
|
"pred_label": 809.4000244140625, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.598731517791748, |
|
"rewards/margins": 0.19433310627937317, |
|
"rewards/rejected": -0.7930646538734436, |
|
"step": 480, |
|
"use_label": 8808.599609375 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 2.8236713524386085e-06, |
|
"logits/chosen": 0.13114799559116364, |
|
"logits/rejected": 0.3516528606414795, |
|
"logps/chosen": -117.50811767578125, |
|
"logps/rejected": -130.34207153320312, |
|
"loss": 0.6686, |
|
"pred_label": 831.2999877929688, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.5088413953781128, |
|
"rewards/margins": 0.19557976722717285, |
|
"rewards/rejected": -0.7044212222099304, |
|
"step": 490, |
|
"use_label": 8946.7001953125 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": 0.332313597202301, |
|
"logits/rejected": 0.04164884611964226, |
|
"logps/chosen": -148.10968017578125, |
|
"logps/rejected": -173.51394653320312, |
|
"loss": 0.6744, |
|
"pred_label": 849.9749755859375, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.7197140455245972, |
|
"rewards/margins": 0.24881935119628906, |
|
"rewards/rejected": -0.9685333967208862, |
|
"step": 500, |
|
"use_label": 9088.025390625 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.6149206757545471, |
|
"eval_logits/rejected": 0.7128078937530518, |
|
"eval_logps/chosen": -120.11334228515625, |
|
"eval_logps/rejected": -151.15725708007812, |
|
"eval_loss": 0.672174334526062, |
|
"eval_pred_label": 893.8412475585938, |
|
"eval_rewards/accuracies": 0.341269850730896, |
|
"eval_rewards/chosen": -0.5121279954910278, |
|
"eval_rewards/margins": 0.24546381831169128, |
|
"eval_rewards/rejected": -0.7575918436050415, |
|
"eval_runtime": 247.8447, |
|
"eval_samples_per_second": 8.07, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 9378.1591796875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 2.641643219871597e-06, |
|
"logits/chosen": 0.4815472662448883, |
|
"logits/rejected": 0.2771294116973877, |
|
"logps/chosen": -110.22953033447266, |
|
"logps/rejected": -142.9767608642578, |
|
"loss": 0.6765, |
|
"pred_label": 940.7249755859375, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.4598473608493805, |
|
"rewards/margins": 0.2644655704498291, |
|
"rewards/rejected": -0.724312961101532, |
|
"step": 510, |
|
"use_label": 9661.275390625 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": 0.290465772151947, |
|
"logits/rejected": 0.05848363786935806, |
|
"logps/chosen": -115.6847915649414, |
|
"logps/rejected": -137.01820373535156, |
|
"loss": 0.6804, |
|
"pred_label": 966.2000122070312, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.4329158365726471, |
|
"rewards/margins": 0.21230947971343994, |
|
"rewards/rejected": -0.6452253460884094, |
|
"step": 520, |
|
"use_label": 9795.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 2.4588575996495797e-06, |
|
"logits/chosen": 0.5015053153038025, |
|
"logits/rejected": 0.544513463973999, |
|
"logps/chosen": -124.15202331542969, |
|
"logps/rejected": -145.21820068359375, |
|
"loss": 0.6847, |
|
"pred_label": 992.7999877929688, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.49871382117271423, |
|
"rewards/margins": 0.27802106738090515, |
|
"rewards/rejected": -0.7767347693443298, |
|
"step": 530, |
|
"use_label": 9929.2001953125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": 0.7326034903526306, |
|
"logits/rejected": 0.5614863634109497, |
|
"logps/chosen": -132.87228393554688, |
|
"logps/rejected": -159.64352416992188, |
|
"loss": 0.6731, |
|
"pred_label": 1025.175048828125, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.5613355040550232, |
|
"rewards/margins": 0.30218708515167236, |
|
"rewards/rejected": -0.8635226488113403, |
|
"step": 540, |
|
"use_label": 10056.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 2.276292003092593e-06, |
|
"logits/chosen": 0.6115967631340027, |
|
"logits/rejected": 0.6694309711456299, |
|
"logps/chosen": -123.36216735839844, |
|
"logps/rejected": -129.92201232910156, |
|
"loss": 0.684, |
|
"pred_label": 1044.2249755859375, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.553015947341919, |
|
"rewards/margins": 0.14989802241325378, |
|
"rewards/rejected": -0.7029139995574951, |
|
"step": 550, |
|
"use_label": 10197.775390625 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": 0.649623453617096, |
|
"logits/rejected": 0.5992484092712402, |
|
"logps/chosen": -117.4646987915039, |
|
"logps/rejected": -130.9988555908203, |
|
"loss": 0.6748, |
|
"pred_label": 1060.5999755859375, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.47909289598464966, |
|
"rewards/margins": 0.1609223484992981, |
|
"rewards/rejected": -0.6400152444839478, |
|
"step": 560, |
|
"use_label": 10341.400390625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.5, |
|
"learning_rate": 2.0949227648656194e-06, |
|
"logits/chosen": 0.1570337414741516, |
|
"logits/rejected": 0.2956157624721527, |
|
"logps/chosen": -116.41545104980469, |
|
"logps/rejected": -151.84051513671875, |
|
"loss": 0.6765, |
|
"pred_label": 1081.375, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.49815383553504944, |
|
"rewards/margins": 0.2765403389930725, |
|
"rewards/rejected": -0.7746941447257996, |
|
"step": 570, |
|
"use_label": 10480.625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": 0.3431427478790283, |
|
"logits/rejected": 0.18610627949237823, |
|
"logps/chosen": -128.4036865234375, |
|
"logps/rejected": -150.36404418945312, |
|
"loss": 0.677, |
|
"pred_label": 1101.3499755859375, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.5349212288856506, |
|
"rewards/margins": 0.18440793454647064, |
|
"rewards/rejected": -0.7193291783332825, |
|
"step": 580, |
|
"use_label": 10620.650390625 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 1.915719821680624e-06, |
|
"logits/chosen": 0.18862374126911163, |
|
"logits/rejected": 0.19857950508594513, |
|
"logps/chosen": -134.26577758789062, |
|
"logps/rejected": -165.70481872558594, |
|
"loss": 0.6624, |
|
"pred_label": 1121.5250244140625, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.5457721948623657, |
|
"rewards/margins": 0.30153924226760864, |
|
"rewards/rejected": -0.8473113775253296, |
|
"step": 590, |
|
"use_label": 10760.474609375 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.125, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": 0.5807100534439087, |
|
"logits/rejected": 0.25763237476348877, |
|
"logps/chosen": -133.36561584472656, |
|
"logps/rejected": -180.56822204589844, |
|
"loss": 0.6784, |
|
"pred_label": 1148.5999755859375, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.5903924703598022, |
|
"rewards/margins": 0.3091353178024292, |
|
"rewards/rejected": -0.8995277285575867, |
|
"step": 600, |
|
"use_label": 10893.400390625 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.8859585523605347, |
|
"eval_logits/rejected": 0.9939000606536865, |
|
"eval_logps/chosen": -119.97545623779297, |
|
"eval_logps/rejected": -156.7531280517578, |
|
"eval_loss": 0.6791760325431824, |
|
"eval_pred_label": 1206.1904296875, |
|
"eval_rewards/accuracies": 0.3511904776096344, |
|
"eval_rewards/chosen": -0.5107490420341492, |
|
"eval_rewards/margins": 0.30280154943466187, |
|
"eval_rewards/rejected": -0.8135506510734558, |
|
"eval_runtime": 247.9094, |
|
"eval_samples_per_second": 8.067, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 11169.8095703125, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 1.739641525213929e-06, |
|
"logits/chosen": 0.6684261560440063, |
|
"logits/rejected": 0.5376627445220947, |
|
"logps/chosen": -112.9523696899414, |
|
"logps/rejected": -154.13601684570312, |
|
"loss": 0.6711, |
|
"pred_label": 1272.925048828125, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.5404548645019531, |
|
"rewards/margins": 0.259638249874115, |
|
"rewards/rejected": -0.8000930547714233, |
|
"step": 610, |
|
"use_label": 11433.0751953125 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": 0.6429753303527832, |
|
"logits/rejected": 0.5887765288352966, |
|
"logps/chosen": -125.70455169677734, |
|
"logps/rejected": -175.96624755859375, |
|
"loss": 0.6734, |
|
"pred_label": 1297.300048828125, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.6399649381637573, |
|
"rewards/margins": 0.3187193274497986, |
|
"rewards/rejected": -0.9586843252182007, |
|
"step": 620, |
|
"use_label": 11568.7001953125 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 1.5676295169786864e-06, |
|
"logits/chosen": 0.8863061666488647, |
|
"logits/rejected": 0.5724608302116394, |
|
"logps/chosen": -149.8594970703125, |
|
"logps/rejected": -178.8180694580078, |
|
"loss": 0.6716, |
|
"pred_label": 1321.8499755859375, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.7683452367782593, |
|
"rewards/margins": 0.2745322585105896, |
|
"rewards/rejected": -1.042877435684204, |
|
"step": 630, |
|
"use_label": 11704.150390625 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": 0.6763439178466797, |
|
"logits/rejected": 0.8844535946846008, |
|
"logps/chosen": -157.50503540039062, |
|
"logps/rejected": -179.59378051757812, |
|
"loss": 0.6738, |
|
"pred_label": 1351.8499755859375, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8581286668777466, |
|
"rewards/margins": 0.28607478737831116, |
|
"rewards/rejected": -1.1442034244537354, |
|
"step": 640, |
|
"use_label": 11834.150390625 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 1.4006036925609245e-06, |
|
"logits/chosen": 0.5444064736366272, |
|
"logits/rejected": 1.0297753810882568, |
|
"logps/chosen": -149.0248565673828, |
|
"logps/rejected": -186.53790283203125, |
|
"loss": 0.6803, |
|
"pred_label": 1381.949951171875, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.7583541870117188, |
|
"rewards/margins": 0.32841619849205017, |
|
"rewards/rejected": -1.0867704153060913, |
|
"step": 650, |
|
"use_label": 11964.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": 1.0406488180160522, |
|
"logits/rejected": 0.8413463830947876, |
|
"logps/chosen": -115.27984619140625, |
|
"logps/rejected": -135.30836486816406, |
|
"loss": 0.6782, |
|
"pred_label": 1410.574951171875, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.5715335607528687, |
|
"rewards/margins": 0.21567881107330322, |
|
"rewards/rejected": -0.7872124910354614, |
|
"step": 660, |
|
"use_label": 12095.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 1.2394572821496953e-06, |
|
"logits/chosen": 0.3095243275165558, |
|
"logits/rejected": 0.21946246922016144, |
|
"logps/chosen": -121.89112854003906, |
|
"logps/rejected": -147.43978881835938, |
|
"loss": 0.6854, |
|
"pred_label": 1429.925048828125, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.5618449449539185, |
|
"rewards/margins": 0.2343917340040207, |
|
"rewards/rejected": -0.7962367534637451, |
|
"step": 670, |
|
"use_label": 12236.0751953125 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": 0.8114501237869263, |
|
"logits/rejected": 0.7095287442207336, |
|
"logps/chosen": -152.98135375976562, |
|
"logps/rejected": -175.5968475341797, |
|
"loss": 0.6781, |
|
"pred_label": 1451.175048828125, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.6692850589752197, |
|
"rewards/margins": 0.2704187035560608, |
|
"rewards/rejected": -0.9397038221359253, |
|
"step": 680, |
|
"use_label": 12374.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 1.0850520736699362e-06, |
|
"logits/chosen": 0.7043443322181702, |
|
"logits/rejected": 0.5973688364028931, |
|
"logps/chosen": -175.5553436279297, |
|
"logps/rejected": -207.06423950195312, |
|
"loss": 0.6749, |
|
"pred_label": 1481.625, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.7385014891624451, |
|
"rewards/margins": 0.37226757407188416, |
|
"rewards/rejected": -1.1107690334320068, |
|
"step": 690, |
|
"use_label": 12504.375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.75, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": 0.44649118185043335, |
|
"logits/rejected": 0.6410871744155884, |
|
"logps/chosen": -136.0747833251953, |
|
"logps/rejected": -162.20260620117188, |
|
"loss": 0.6783, |
|
"pred_label": 1506.0999755859375, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.6972768902778625, |
|
"rewards/margins": 0.23985597491264343, |
|
"rewards/rejected": -0.9371329545974731, |
|
"step": 700, |
|
"use_label": 12639.900390625 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 1.192717432975769, |
|
"eval_logits/rejected": 1.2994883060455322, |
|
"eval_logps/chosen": -135.23951721191406, |
|
"eval_logps/rejected": -171.3760528564453, |
|
"eval_loss": 0.6756439805030823, |
|
"eval_pred_label": 1558.5238037109375, |
|
"eval_rewards/accuracies": 0.3670634925365448, |
|
"eval_rewards/chosen": -0.6633896827697754, |
|
"eval_rewards/margins": 0.29639023542404175, |
|
"eval_rewards/rejected": -0.9597799181938171, |
|
"eval_runtime": 247.9992, |
|
"eval_samples_per_second": 8.065, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 12921.4765625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 9.382138040640714e-07, |
|
"logits/chosen": 0.6493266820907593, |
|
"logits/rejected": 0.6850475072860718, |
|
"logps/chosen": -134.1044158935547, |
|
"logps/rejected": -165.8421173095703, |
|
"loss": 0.6766, |
|
"pred_label": 1616.875, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.7103394269943237, |
|
"rewards/margins": 0.2792840301990509, |
|
"rewards/rejected": -0.9896234273910522, |
|
"step": 710, |
|
"use_label": 13193.125 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": 0.7393421530723572, |
|
"logits/rejected": 0.6282132267951965, |
|
"logps/chosen": -108.0786361694336, |
|
"logps/rejected": -142.93173217773438, |
|
"loss": 0.6886, |
|
"pred_label": 1639.2750244140625, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.5120818614959717, |
|
"rewards/margins": 0.260955274105072, |
|
"rewards/rejected": -0.7730370759963989, |
|
"step": 720, |
|
"use_label": 13330.724609375 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 7.997277433690984e-07, |
|
"logits/chosen": 0.7698175311088562, |
|
"logits/rejected": 0.6512314677238464, |
|
"logps/chosen": -120.15057373046875, |
|
"logps/rejected": -147.58602905273438, |
|
"loss": 0.6705, |
|
"pred_label": 1667.324951171875, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.49796366691589355, |
|
"rewards/margins": 0.33586567640304565, |
|
"rewards/rejected": -0.8338292837142944, |
|
"step": 730, |
|
"use_label": 13462.6748046875 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": 0.9725875854492188, |
|
"logits/rejected": 0.894719123840332, |
|
"logps/chosen": -113.24635314941406, |
|
"logps/rejected": -142.79689025878906, |
|
"loss": 0.6779, |
|
"pred_label": 1692.25, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5262097716331482, |
|
"rewards/margins": 0.26758259534835815, |
|
"rewards/rejected": -0.7937922477722168, |
|
"step": 740, |
|
"use_label": 13597.75 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.0, |
|
"learning_rate": 6.70334495204884e-07, |
|
"logits/chosen": 0.8061111569404602, |
|
"logits/rejected": 0.7181490659713745, |
|
"logps/chosen": -142.5282745361328, |
|
"logps/rejected": -173.8203887939453, |
|
"loss": 0.6814, |
|
"pred_label": 1716.25, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.6499794125556946, |
|
"rewards/margins": 0.24588195979595184, |
|
"rewards/rejected": -0.8958613276481628, |
|
"step": 750, |
|
"use_label": 13733.75 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": 0.6536890268325806, |
|
"logits/rejected": 0.6605783104896545, |
|
"logps/chosen": -102.72889709472656, |
|
"logps/rejected": -149.5058135986328, |
|
"loss": 0.6733, |
|
"pred_label": 1741.074951171875, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.48761066794395447, |
|
"rewards/margins": 0.28703850507736206, |
|
"rewards/rejected": -0.7746490836143494, |
|
"step": 760, |
|
"use_label": 13868.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.375, |
|
"learning_rate": 5.507260361320738e-07, |
|
"logits/chosen": 0.6806662082672119, |
|
"logits/rejected": 0.5722958445549011, |
|
"logps/chosen": -151.7313690185547, |
|
"logps/rejected": -176.56260681152344, |
|
"loss": 0.6719, |
|
"pred_label": 1766.5250244140625, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.6818407773971558, |
|
"rewards/margins": 0.3464636206626892, |
|
"rewards/rejected": -1.0283044576644897, |
|
"step": 770, |
|
"use_label": 14003.474609375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": 0.6961285471916199, |
|
"logits/rejected": 0.45934200286865234, |
|
"logps/chosen": -136.76901245117188, |
|
"logps/rejected": -162.4578094482422, |
|
"loss": 0.6822, |
|
"pred_label": 1797.9000244140625, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.6396945714950562, |
|
"rewards/margins": 0.2714308798313141, |
|
"rewards/rejected": -0.9111254811286926, |
|
"step": 780, |
|
"use_label": 14132.099609375 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8046875, |
|
"learning_rate": 4.4154201506053985e-07, |
|
"logits/chosen": 0.824557900428772, |
|
"logits/rejected": 0.8706857562065125, |
|
"logps/chosen": -113.81227111816406, |
|
"logps/rejected": -128.26260375976562, |
|
"loss": 0.6771, |
|
"pred_label": 1823.199951171875, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.4897800385951996, |
|
"rewards/margins": 0.2714278995990753, |
|
"rewards/rejected": -0.7612079381942749, |
|
"step": 790, |
|
"use_label": 14266.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": 0.6243492364883423, |
|
"logits/rejected": 0.7354862689971924, |
|
"logps/chosen": -111.455322265625, |
|
"logps/rejected": -141.9140625, |
|
"loss": 0.6776, |
|
"pred_label": 1847.300048828125, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5140770673751831, |
|
"rewards/margins": 0.28408390283584595, |
|
"rewards/rejected": -0.7981609106063843, |
|
"step": 800, |
|
"use_label": 14402.7001953125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.3586419820785522, |
|
"eval_logits/rejected": 1.4788893461227417, |
|
"eval_logps/chosen": -123.90097045898438, |
|
"eval_logps/rejected": -161.67913818359375, |
|
"eval_loss": 0.6800512671470642, |
|
"eval_pred_label": 1900.4920654296875, |
|
"eval_rewards/accuracies": 0.3531745970249176, |
|
"eval_rewards/chosen": -0.5500041842460632, |
|
"eval_rewards/margins": 0.312806636095047, |
|
"eval_rewards/rejected": -0.8628108501434326, |
|
"eval_runtime": 247.6869, |
|
"eval_samples_per_second": 8.075, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 14683.5078125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.0, |
|
"learning_rate": 3.4336633249862084e-07, |
|
"logits/chosen": 0.6561521291732788, |
|
"logits/rejected": 0.6886910200119019, |
|
"logps/chosen": -128.8015899658203, |
|
"logps/rejected": -164.35073852539062, |
|
"loss": 0.678, |
|
"pred_label": 1961.699951171875, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.5894675254821777, |
|
"rewards/margins": 0.26825448870658875, |
|
"rewards/rejected": -0.8577221035957336, |
|
"step": 810, |
|
"use_label": 14952.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": 0.7671118974685669, |
|
"logits/rejected": 0.7413855791091919, |
|
"logps/chosen": -113.36724853515625, |
|
"logps/rejected": -125.75687408447266, |
|
"loss": 0.6785, |
|
"pred_label": 1987.5, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.533771276473999, |
|
"rewards/margins": 0.18785560131072998, |
|
"rewards/rejected": -0.721626877784729, |
|
"step": 820, |
|
"use_label": 15086.5 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 2.5672401793681854e-07, |
|
"logits/chosen": 0.9424182176589966, |
|
"logits/rejected": 1.0951740741729736, |
|
"logps/chosen": -104.4168472290039, |
|
"logps/rejected": -135.86708068847656, |
|
"loss": 0.6757, |
|
"pred_label": 2014.300048828125, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.4637536108493805, |
|
"rewards/margins": 0.3284408450126648, |
|
"rewards/rejected": -0.7921944260597229, |
|
"step": 830, |
|
"use_label": 15219.7001953125 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": 0.880671501159668, |
|
"logits/rejected": 0.7017362117767334, |
|
"logps/chosen": -107.72086334228516, |
|
"logps/rejected": -113.772216796875, |
|
"loss": 0.6804, |
|
"pred_label": 2034.0250244140625, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": -0.48421382904052734, |
|
"rewards/margins": 0.11718887090682983, |
|
"rewards/rejected": -0.6014026999473572, |
|
"step": 840, |
|
"use_label": 15359.974609375 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 1.820784220652766e-07, |
|
"logits/chosen": 0.7102145552635193, |
|
"logits/rejected": 0.6271827816963196, |
|
"logps/chosen": -143.84104919433594, |
|
"logps/rejected": -181.9434814453125, |
|
"loss": 0.661, |
|
"pred_label": 2054.27490234375, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.5962380170822144, |
|
"rewards/margins": 0.3919173777103424, |
|
"rewards/rejected": -0.9881553649902344, |
|
"step": 850, |
|
"use_label": 15499.724609375 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": 0.856406569480896, |
|
"logits/rejected": 0.9847167730331421, |
|
"logps/chosen": -134.33340454101562, |
|
"logps/rejected": -162.52786254882812, |
|
"loss": 0.6682, |
|
"pred_label": 2086.074951171875, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.5313155651092529, |
|
"rewards/margins": 0.33722516894340515, |
|
"rewards/rejected": -0.8685407638549805, |
|
"step": 860, |
|
"use_label": 15627.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 1.1982873884064466e-07, |
|
"logits/chosen": 0.7517425417900085, |
|
"logits/rejected": 0.7345870137214661, |
|
"logps/chosen": -139.02523803710938, |
|
"logps/rejected": -179.33541870117188, |
|
"loss": 0.681, |
|
"pred_label": 2114.550048828125, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.5771310925483704, |
|
"rewards/margins": 0.393027126789093, |
|
"rewards/rejected": -0.9701582193374634, |
|
"step": 870, |
|
"use_label": 15759.4501953125 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": 0.8886432647705078, |
|
"logits/rejected": 0.9018303751945496, |
|
"logps/chosen": -118.17820739746094, |
|
"logps/rejected": -171.14111328125, |
|
"loss": 0.6788, |
|
"pred_label": 2148.22509765625, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.5542714595794678, |
|
"rewards/margins": 0.41032201051712036, |
|
"rewards/rejected": -0.9645935297012329, |
|
"step": 880, |
|
"use_label": 15885.775390625 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 7.030787065396866e-08, |
|
"logits/chosen": 0.9458627700805664, |
|
"logits/rejected": 0.860288143157959, |
|
"logps/chosen": -113.8023452758789, |
|
"logps/rejected": -145.21006774902344, |
|
"loss": 0.68, |
|
"pred_label": 2174.175048828125, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.4997434616088867, |
|
"rewards/margins": 0.2181231528520584, |
|
"rewards/rejected": -0.7178665399551392, |
|
"step": 890, |
|
"use_label": 16019.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": 1.2760592699050903, |
|
"logits/rejected": 1.0304285287857056, |
|
"logps/chosen": -123.82283020019531, |
|
"logps/rejected": -180.8885955810547, |
|
"loss": 0.6751, |
|
"pred_label": 2197.10009765625, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.5441134572029114, |
|
"rewards/margins": 0.37965571880340576, |
|
"rewards/rejected": -0.9237691164016724, |
|
"step": 900, |
|
"use_label": 16156.900390625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 1.3693251609802246, |
|
"eval_logits/rejected": 1.4904797077178955, |
|
"eval_logps/chosen": -123.65629577636719, |
|
"eval_logps/rejected": -161.58062744140625, |
|
"eval_loss": 0.678970456123352, |
|
"eval_pred_label": 2251.015869140625, |
|
"eval_rewards/accuracies": 0.3571428656578064, |
|
"eval_rewards/chosen": -0.547557532787323, |
|
"eval_rewards/margins": 0.31426796317100525, |
|
"eval_rewards/rejected": -0.8618254065513611, |
|
"eval_runtime": 247.8741, |
|
"eval_samples_per_second": 8.069, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 16436.984375, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 3.378064801637687e-08, |
|
"logits/chosen": 0.8874324560165405, |
|
"logits/rejected": 0.9277682304382324, |
|
"logps/chosen": -107.72428131103516, |
|
"logps/rejected": -141.05999755859375, |
|
"loss": 0.6748, |
|
"pred_label": 2314.64990234375, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.47065839171409607, |
|
"rewards/margins": 0.3038247525691986, |
|
"rewards/rejected": -0.7744830846786499, |
|
"step": 910, |
|
"use_label": 16703.349609375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": 1.0454901456832886, |
|
"logits/rejected": 1.0777199268341064, |
|
"logps/chosen": -97.26091003417969, |
|
"logps/rejected": -147.07302856445312, |
|
"loss": 0.6799, |
|
"pred_label": 2345.5, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.4330506920814514, |
|
"rewards/margins": 0.3231905996799469, |
|
"rewards/rejected": -0.7562412023544312, |
|
"step": 920, |
|
"use_label": 16832.5 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 1.0442413283435759e-08, |
|
"logits/chosen": 1.0506960153579712, |
|
"logits/rejected": 0.9065178632736206, |
|
"logps/chosen": -111.4305648803711, |
|
"logps/rejected": -146.4462432861328, |
|
"loss": 0.6772, |
|
"pred_label": 2373.125, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.4827675223350525, |
|
"rewards/margins": 0.2878049314022064, |
|
"rewards/rejected": -0.7705724835395813, |
|
"step": 930, |
|
"use_label": 16964.875 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": 1.234220027923584, |
|
"logits/rejected": 0.9605228304862976, |
|
"logps/chosen": -124.61787414550781, |
|
"logps/rejected": -160.95223999023438, |
|
"loss": 0.6706, |
|
"pred_label": 2397.75, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.5566731691360474, |
|
"rewards/margins": 0.27601632475852966, |
|
"rewards/rejected": -0.8326894640922546, |
|
"step": 940, |
|
"use_label": 17100.25 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.375, |
|
"learning_rate": 4.1797599220405605e-10, |
|
"logits/chosen": 0.7550326585769653, |
|
"logits/rejected": 0.6674235463142395, |
|
"logps/chosen": -136.30941772460938, |
|
"logps/rejected": -160.4894561767578, |
|
"loss": 0.6729, |
|
"pred_label": 2425.39990234375, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5969915390014648, |
|
"rewards/margins": 0.21693304181098938, |
|
"rewards/rejected": -0.8139246106147766, |
|
"step": 950, |
|
"use_label": 17232.599609375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6760230718482851, |
|
"train_runtime": 20063.9235, |
|
"train_samples_per_second": 3.047, |
|
"train_steps_per_second": 0.048 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|