|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.1666858196258545, |
|
"logits/rejected": -2.182244300842285, |
|
"logps/chosen": -12.368609428405762, |
|
"logps/rejected": -24.687644958496094, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.2113068103790283, |
|
"logits/rejected": -2.2719719409942627, |
|
"logps/chosen": -57.57659149169922, |
|
"logps/rejected": -65.19544219970703, |
|
"loss": 0.693, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2152777761220932, |
|
"rewards/chosen": 0.001057142741046846, |
|
"rewards/margins": 3.17241829179693e-05, |
|
"rewards/rejected": 0.001025418401695788, |
|
"step": 10, |
|
"use_label": 90.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.243159770965576, |
|
"logits/rejected": -2.2802278995513916, |
|
"logps/chosen": -56.544715881347656, |
|
"logps/rejected": -68.35901641845703, |
|
"loss": 0.6924, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": 0.006556531880050898, |
|
"rewards/margins": 0.001379690133035183, |
|
"rewards/rejected": 0.005176841747015715, |
|
"step": 20, |
|
"use_label": 242.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.2634024620056152, |
|
"logits/rejected": -2.2475943565368652, |
|
"logps/chosen": -53.98667526245117, |
|
"logps/rejected": -67.89213562011719, |
|
"loss": 0.692, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.01648966409265995, |
|
"rewards/margins": 0.002599921775981784, |
|
"rewards/rejected": 0.013889740221202374, |
|
"step": 30, |
|
"use_label": 402.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.2825467586517334, |
|
"logits/rejected": -2.2754693031311035, |
|
"logps/chosen": -55.582061767578125, |
|
"logps/rejected": -66.59407043457031, |
|
"loss": 0.6909, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.21250000596046448, |
|
"rewards/chosen": 0.018406417220830917, |
|
"rewards/margins": 0.0006764450808987021, |
|
"rewards/rejected": 0.017729971557855606, |
|
"step": 40, |
|
"use_label": 562.0 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -2.3444912433624268, |
|
"logits/rejected": -2.3341281414031982, |
|
"logps/chosen": -69.13630676269531, |
|
"logps/rejected": -84.64376831054688, |
|
"loss": 0.6889, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.02657836303114891, |
|
"rewards/margins": 0.005359734408557415, |
|
"rewards/rejected": 0.021218623965978622, |
|
"step": 50, |
|
"use_label": 722.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.3026936054229736, |
|
"logits/rejected": -2.309264659881592, |
|
"logps/chosen": -82.00704193115234, |
|
"logps/rejected": -90.7305908203125, |
|
"loss": 0.6874, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.03688042238354683, |
|
"rewards/margins": 0.014220851473510265, |
|
"rewards/rejected": 0.02265957184135914, |
|
"step": 60, |
|
"use_label": 882.0 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -2.344853401184082, |
|
"logits/rejected": -2.3261306285858154, |
|
"logps/chosen": -77.20336151123047, |
|
"logps/rejected": -77.6347885131836, |
|
"loss": 0.6851, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.02531364932656288, |
|
"rewards/margins": 0.01608472317457199, |
|
"rewards/rejected": 0.009228924289345741, |
|
"step": 70, |
|
"use_label": 1042.0 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.241945743560791, |
|
"logits/rejected": -2.195178985595703, |
|
"logps/chosen": -81.6376953125, |
|
"logps/rejected": -89.05104064941406, |
|
"loss": 0.6814, |
|
"pred_label": 0.9750000238418579, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.004142354242503643, |
|
"rewards/margins": 0.025017932057380676, |
|
"rewards/rejected": -0.02087557688355446, |
|
"step": 80, |
|
"use_label": 1201.0250244140625 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.1907405853271484, |
|
"logits/rejected": -2.232959270477295, |
|
"logps/chosen": -62.31688690185547, |
|
"logps/rejected": -80.38573455810547, |
|
"loss": 0.6812, |
|
"pred_label": 3.0999999046325684, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.012271342799067497, |
|
"rewards/margins": 0.04507603123784065, |
|
"rewards/rejected": -0.0573473684489727, |
|
"step": 90, |
|
"use_label": 1358.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -2.132638454437256, |
|
"logits/rejected": -2.0995519161224365, |
|
"logps/chosen": -76.97563171386719, |
|
"logps/rejected": -79.27615356445312, |
|
"loss": 0.6818, |
|
"pred_label": 7.150000095367432, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.02400936186313629, |
|
"rewards/margins": 0.05036945268511772, |
|
"rewards/rejected": -0.07437881827354431, |
|
"step": 100, |
|
"use_label": 1514.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.097480297088623, |
|
"eval_logits/rejected": -2.0663790702819824, |
|
"eval_logps/chosen": -69.46318054199219, |
|
"eval_logps/rejected": -80.35824584960938, |
|
"eval_loss": 0.6813791394233704, |
|
"eval_pred_label": 22.539682388305664, |
|
"eval_rewards/accuracies": 0.3392857015132904, |
|
"eval_rewards/chosen": -0.005626226309686899, |
|
"eval_rewards/margins": 0.04397555813193321, |
|
"eval_rewards/rejected": -0.04960178583860397, |
|
"eval_runtime": 245.3242, |
|
"eval_samples_per_second": 8.152, |
|
"eval_steps_per_second": 0.257, |
|
"eval_use_label": 1833.4603271484375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 4.996723692767927e-06, |
|
"logits/chosen": -2.114673137664795, |
|
"logits/rejected": -2.094468355178833, |
|
"logps/chosen": -63.9236946105957, |
|
"logps/rejected": -79.44518280029297, |
|
"loss": 0.6827, |
|
"pred_label": 34.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.02154584601521492, |
|
"rewards/margins": 0.04528125748038292, |
|
"rewards/rejected": -0.06682710349559784, |
|
"step": 110, |
|
"use_label": 2152.0 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -2.2926628589630127, |
|
"logits/rejected": -2.177788257598877, |
|
"logps/chosen": -83.48246002197266, |
|
"logps/rejected": -97.60291290283203, |
|
"loss": 0.683, |
|
"pred_label": 44.67499923706055, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.0941522866487503, |
|
"rewards/margins": 0.06425690650939941, |
|
"rewards/rejected": -0.15840919315814972, |
|
"step": 120, |
|
"use_label": 2301.324951171875 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 4.980697142834315e-06, |
|
"logits/chosen": -2.0968613624572754, |
|
"logits/rejected": -2.1124091148376465, |
|
"logps/chosen": -66.370849609375, |
|
"logps/rejected": -77.3319320678711, |
|
"loss": 0.6845, |
|
"pred_label": 57.57500076293945, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.07896758615970612, |
|
"rewards/margins": 0.04609644412994385, |
|
"rewards/rejected": -0.12506404519081116, |
|
"step": 130, |
|
"use_label": 2448.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -2.1041221618652344, |
|
"logits/rejected": -2.138929843902588, |
|
"logps/chosen": -68.11909484863281, |
|
"logps/rejected": -90.16340637207031, |
|
"loss": 0.6868, |
|
"pred_label": 73.75, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.08846104890108109, |
|
"rewards/margins": 0.0647779330611229, |
|
"rewards/rejected": -0.15323898196220398, |
|
"step": 140, |
|
"use_label": 2592.25 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 4.951404179843963e-06, |
|
"logits/chosen": -2.1765952110290527, |
|
"logits/rejected": -2.125175714492798, |
|
"logps/chosen": -54.37804412841797, |
|
"logps/rejected": -58.982269287109375, |
|
"loss": 0.6809, |
|
"pred_label": 91.3499984741211, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.06883221119642258, |
|
"rewards/margins": 0.06803621351718903, |
|
"rewards/rejected": -0.136868417263031, |
|
"step": 150, |
|
"use_label": 2734.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -2.2455694675445557, |
|
"logits/rejected": -2.213240623474121, |
|
"logps/chosen": -94.4081802368164, |
|
"logps/rejected": -107.48802185058594, |
|
"loss": 0.6857, |
|
"pred_label": 115.55000305175781, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.12804970145225525, |
|
"rewards/margins": 0.12874242663383484, |
|
"rewards/rejected": -0.2567921280860901, |
|
"step": 160, |
|
"use_label": 2870.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 4.909001458367867e-06, |
|
"logits/chosen": -2.1201233863830566, |
|
"logits/rejected": -2.0822367668151855, |
|
"logps/chosen": -75.75311279296875, |
|
"logps/rejected": -87.55944061279297, |
|
"loss": 0.6869, |
|
"pred_label": 141.85000610351562, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.1179669052362442, |
|
"rewards/margins": 0.09383226186037064, |
|
"rewards/rejected": -0.21179917454719543, |
|
"step": 170, |
|
"use_label": 3004.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -2.1013779640197754, |
|
"logits/rejected": -2.121537685394287, |
|
"logps/chosen": -70.6474838256836, |
|
"logps/rejected": -89.79743957519531, |
|
"loss": 0.685, |
|
"pred_label": 161.3249969482422, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.08145526796579361, |
|
"rewards/margins": 0.08172430098056793, |
|
"rewards/rejected": -0.16317956149578094, |
|
"step": 180, |
|
"use_label": 3144.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 4.853715742087947e-06, |
|
"logits/chosen": -2.1533255577087402, |
|
"logits/rejected": -2.104222297668457, |
|
"logps/chosen": -87.3572998046875, |
|
"logps/rejected": -91.95249938964844, |
|
"loss": 0.6862, |
|
"pred_label": 181.39999389648438, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13474301993846893, |
|
"rewards/margins": 0.08988693356513977, |
|
"rewards/rejected": -0.2246299535036087, |
|
"step": 190, |
|
"use_label": 3284.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -2.137516736984253, |
|
"logits/rejected": -2.13090443611145, |
|
"logps/chosen": -94.10081481933594, |
|
"logps/rejected": -95.15316009521484, |
|
"loss": 0.6818, |
|
"pred_label": 205.875, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.14046669006347656, |
|
"rewards/margins": 0.07937734574079514, |
|
"rewards/rejected": -0.2198440283536911, |
|
"step": 200, |
|
"use_label": 3420.125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.021465301513672, |
|
"eval_logits/rejected": -1.9937611818313599, |
|
"eval_logps/chosen": -82.4782485961914, |
|
"eval_logps/rejected": -99.20675659179688, |
|
"eval_loss": 0.6860649585723877, |
|
"eval_pred_label": 258.79364013671875, |
|
"eval_rewards/accuracies": 0.3373015820980072, |
|
"eval_rewards/chosen": -0.13577698171138763, |
|
"eval_rewards/margins": 0.10230996459722519, |
|
"eval_rewards/rejected": -0.23808695375919342, |
|
"eval_runtime": 245.9338, |
|
"eval_samples_per_second": 8.132, |
|
"eval_steps_per_second": 0.256, |
|
"eval_use_label": 3701.206298828125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 4.7858426910973435e-06, |
|
"logits/chosen": -2.1574149131774902, |
|
"logits/rejected": -2.1307334899902344, |
|
"logps/chosen": -77.64894104003906, |
|
"logps/rejected": -89.26710510253906, |
|
"loss": 0.6828, |
|
"pred_label": 313.32501220703125, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.09638272225856781, |
|
"rewards/margins": 0.12071452289819717, |
|
"rewards/rejected": -0.2170972377061844, |
|
"step": 210, |
|
"use_label": 3976.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -2.1187565326690674, |
|
"logits/rejected": -2.102626323699951, |
|
"logps/chosen": -90.67762756347656, |
|
"logps/rejected": -96.60699462890625, |
|
"loss": 0.6884, |
|
"pred_label": 343.875, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.1462414264678955, |
|
"rewards/margins": 0.12368818372488022, |
|
"rewards/rejected": -0.2699296176433563, |
|
"step": 220, |
|
"use_label": 4106.125 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 4.705745280752586e-06, |
|
"logits/chosen": -2.1437509059906006, |
|
"logits/rejected": -2.084073781967163, |
|
"logps/chosen": -90.86326599121094, |
|
"logps/rejected": -96.72235870361328, |
|
"loss": 0.6875, |
|
"pred_label": 378.6000061035156, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.12124122679233551, |
|
"rewards/margins": 0.11637073755264282, |
|
"rewards/rejected": -0.23761197924613953, |
|
"step": 230, |
|
"use_label": 4231.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -2.1431565284729004, |
|
"logits/rejected": -2.1365227699279785, |
|
"logps/chosen": -71.16796875, |
|
"logps/rejected": -91.01861572265625, |
|
"loss": 0.6846, |
|
"pred_label": 403.125, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.07454425096511841, |
|
"rewards/margins": 0.09627760201692581, |
|
"rewards/rejected": -0.17082183063030243, |
|
"step": 240, |
|
"use_label": 4366.875 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 4.613851860533367e-06, |
|
"logits/chosen": -2.1595332622528076, |
|
"logits/rejected": -2.183953285217285, |
|
"logps/chosen": -71.86934661865234, |
|
"logps/rejected": -80.0597152709961, |
|
"loss": 0.6844, |
|
"pred_label": 422.25, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.06741674989461899, |
|
"rewards/margins": 0.08548234403133392, |
|
"rewards/rejected": -0.1528991013765335, |
|
"step": 250, |
|
"use_label": 4507.75 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -2.1189560890197754, |
|
"logits/rejected": -2.071620464324951, |
|
"logps/chosen": -77.1129150390625, |
|
"logps/rejected": -101.45845031738281, |
|
"loss": 0.6837, |
|
"pred_label": 445.79998779296875, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.16171860694885254, |
|
"rewards/margins": 0.11343212425708771, |
|
"rewards/rejected": -0.27515071630477905, |
|
"step": 260, |
|
"use_label": 4644.2001953125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.510653863290871e-06, |
|
"logits/chosen": -2.1512458324432373, |
|
"logits/rejected": -2.164412021636963, |
|
"logps/chosen": -91.74055480957031, |
|
"logps/rejected": -95.13731384277344, |
|
"loss": 0.6883, |
|
"pred_label": 470.04998779296875, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.16311386227607727, |
|
"rewards/margins": 0.0933571308851242, |
|
"rewards/rejected": -0.2564709782600403, |
|
"step": 270, |
|
"use_label": 4779.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -2.0734293460845947, |
|
"logits/rejected": -2.1033730506896973, |
|
"logps/chosen": -76.7903823852539, |
|
"logps/rejected": -86.99803161621094, |
|
"loss": 0.6858, |
|
"pred_label": 494.9750061035156, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.15558014810085297, |
|
"rewards/margins": 0.050300367176532745, |
|
"rewards/rejected": -0.2058805227279663, |
|
"step": 280, |
|
"use_label": 4915.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 4.396703177135262e-06, |
|
"logits/chosen": -1.9870249032974243, |
|
"logits/rejected": -1.956434965133667, |
|
"logps/chosen": -89.98160552978516, |
|
"logps/rejected": -99.75212097167969, |
|
"loss": 0.6905, |
|
"pred_label": 527.0499877929688, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.13706301152706146, |
|
"rewards/margins": 0.16557420790195465, |
|
"rewards/rejected": -0.3026372492313385, |
|
"step": 290, |
|
"use_label": 5042.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -1.9497883319854736, |
|
"logits/rejected": -1.964604377746582, |
|
"logps/chosen": -68.64933013916016, |
|
"logps/rejected": -91.48945617675781, |
|
"loss": 0.6848, |
|
"pred_label": 561.8499755859375, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.14721202850341797, |
|
"rewards/margins": 0.14547064900398254, |
|
"rewards/rejected": -0.2926826477050781, |
|
"step": 300, |
|
"use_label": 5168.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.9156862497329712, |
|
"eval_logits/rejected": -1.8827954530715942, |
|
"eval_logps/chosen": -89.57630920410156, |
|
"eval_logps/rejected": -109.2765884399414, |
|
"eval_loss": 0.6877307295799255, |
|
"eval_pred_label": 626.1270141601562, |
|
"eval_rewards/accuracies": 0.341269850730896, |
|
"eval_rewards/chosen": -0.20675767958164215, |
|
"eval_rewards/margins": 0.13202756643295288, |
|
"eval_rewards/rejected": -0.33878523111343384, |
|
"eval_runtime": 246.2269, |
|
"eval_samples_per_second": 8.123, |
|
"eval_steps_per_second": 0.256, |
|
"eval_use_label": 5437.873046875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.5, |
|
"learning_rate": 4.2726091940171055e-06, |
|
"logits/chosen": -2.043640613555908, |
|
"logits/rejected": -2.01674222946167, |
|
"logps/chosen": -72.24534606933594, |
|
"logps/rejected": -89.407470703125, |
|
"loss": 0.6865, |
|
"pred_label": 688.9500122070312, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.23255303502082825, |
|
"rewards/margins": 0.06651856750249863, |
|
"rewards/rejected": -0.29907160997390747, |
|
"step": 310, |
|
"use_label": 5705.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -1.9597671031951904, |
|
"logits/rejected": -1.9893718957901, |
|
"logps/chosen": -94.37977600097656, |
|
"logps/rejected": -118.25643157958984, |
|
"loss": 0.6871, |
|
"pred_label": 724.375, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.20438706874847412, |
|
"rewards/margins": 0.13566336035728455, |
|
"rewards/rejected": -0.34005045890808105, |
|
"step": 320, |
|
"use_label": 5829.625 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 4.139035550786495e-06, |
|
"logits/chosen": -1.989506483078003, |
|
"logits/rejected": -1.9580066204071045, |
|
"logps/chosen": -73.50363159179688, |
|
"logps/rejected": -87.75289154052734, |
|
"loss": 0.683, |
|
"pred_label": 754.4500122070312, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.1003209576010704, |
|
"rewards/margins": 0.13466720283031464, |
|
"rewards/rejected": -0.23498816788196564, |
|
"step": 330, |
|
"use_label": 5959.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -2.0428695678710938, |
|
"logits/rejected": -2.016120195388794, |
|
"logps/chosen": -74.91081237792969, |
|
"logps/rejected": -93.89201354980469, |
|
"loss": 0.6894, |
|
"pred_label": 786.4749755859375, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.11903776973485947, |
|
"rewards/margins": 0.11223740875720978, |
|
"rewards/rejected": -0.23127520084381104, |
|
"step": 340, |
|
"use_label": 6087.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 3.996696580158211e-06, |
|
"logits/chosen": -2.0441341400146484, |
|
"logits/rejected": -2.0229620933532715, |
|
"logps/chosen": -73.9575424194336, |
|
"logps/rejected": -86.34129333496094, |
|
"loss": 0.6869, |
|
"pred_label": 817.5250244140625, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.133123978972435, |
|
"rewards/margins": 0.08419892936944962, |
|
"rewards/rejected": -0.2173229157924652, |
|
"step": 350, |
|
"use_label": 6216.47509765625 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -2.0575146675109863, |
|
"logits/rejected": -2.054591417312622, |
|
"logps/chosen": -82.88232421875, |
|
"logps/rejected": -90.05668640136719, |
|
"loss": 0.6863, |
|
"pred_label": 848.6500244140625, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.13817565143108368, |
|
"rewards/margins": 0.11208128929138184, |
|
"rewards/rejected": -0.2502569556236267, |
|
"step": 360, |
|
"use_label": 6345.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.75, |
|
"learning_rate": 3.846353490562664e-06, |
|
"logits/chosen": -2.076312780380249, |
|
"logits/rejected": -1.9995708465576172, |
|
"logps/chosen": -85.83981323242188, |
|
"logps/rejected": -95.1656723022461, |
|
"loss": 0.6844, |
|
"pred_label": 880.4249877929688, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.11745607852935791, |
|
"rewards/margins": 0.14055705070495605, |
|
"rewards/rejected": -0.2580130994319916, |
|
"step": 370, |
|
"use_label": 6473.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -2.0079166889190674, |
|
"logits/rejected": -1.986297845840454, |
|
"logps/chosen": -76.30638122558594, |
|
"logps/rejected": -93.93800354003906, |
|
"loss": 0.6924, |
|
"pred_label": 912.5999755859375, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.1675274670124054, |
|
"rewards/margins": 0.08305275440216064, |
|
"rewards/rejected": -0.25058022141456604, |
|
"step": 380, |
|
"use_label": 6601.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 3.6888102953122307e-06, |
|
"logits/chosen": -1.9291635751724243, |
|
"logits/rejected": -1.914608359336853, |
|
"logps/chosen": -101.44157409667969, |
|
"logps/rejected": -96.10136413574219, |
|
"loss": 0.6878, |
|
"pred_label": 952.8250122070312, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.1657881736755371, |
|
"rewards/margins": 0.12364902347326279, |
|
"rewards/rejected": -0.2894372344017029, |
|
"step": 390, |
|
"use_label": 6721.1748046875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": -1.8988447189331055, |
|
"logits/rejected": -1.8926557302474976, |
|
"logps/chosen": -87.97608947753906, |
|
"logps/rejected": -108.15446472167969, |
|
"loss": 0.6857, |
|
"pred_label": 987.5999755859375, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.16945099830627441, |
|
"rewards/margins": 0.11657001823186874, |
|
"rewards/rejected": -0.28602102398872375, |
|
"step": 400, |
|
"use_label": 6846.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.4529144763946533, |
|
"eval_logits/rejected": -1.4031411409378052, |
|
"eval_logps/chosen": -86.92367553710938, |
|
"eval_logps/rejected": -108.39134979248047, |
|
"eval_loss": 0.6884719133377075, |
|
"eval_pred_label": 1055.5555419921875, |
|
"eval_rewards/accuracies": 0.3531745970249176, |
|
"eval_rewards/chosen": -0.18023118376731873, |
|
"eval_rewards/margins": 0.14970164000988007, |
|
"eval_rewards/rejected": -0.32993283867836, |
|
"eval_runtime": 246.35, |
|
"eval_samples_per_second": 8.119, |
|
"eval_steps_per_second": 0.256, |
|
"eval_use_label": 7112.4443359375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.5249095128531863e-06, |
|
"logits/chosen": -1.289879560470581, |
|
"logits/rejected": -1.4085474014282227, |
|
"logps/chosen": -85.75054168701172, |
|
"logps/rejected": -96.24283599853516, |
|
"loss": 0.6874, |
|
"pred_label": 1135.699951171875, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.21242520213127136, |
|
"rewards/margins": 0.17107079923152924, |
|
"rewards/rejected": -0.3834960162639618, |
|
"step": 410, |
|
"use_label": 7362.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": -1.2336995601654053, |
|
"logits/rejected": -1.1623611450195312, |
|
"logps/chosen": -97.20266723632812, |
|
"logps/rejected": -117.6893081665039, |
|
"loss": 0.6882, |
|
"pred_label": 1171.425048828125, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.3355943560600281, |
|
"rewards/margins": 0.16045086085796356, |
|
"rewards/rejected": -0.49604520201683044, |
|
"step": 420, |
|
"use_label": 7486.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 3.355527661097728e-06, |
|
"logits/chosen": -1.3129976987838745, |
|
"logits/rejected": -1.2275488376617432, |
|
"logps/chosen": -106.88911437988281, |
|
"logps/rejected": -112.3751449584961, |
|
"loss": 0.6918, |
|
"pred_label": 1207.9749755859375, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3042059540748596, |
|
"rewards/margins": 0.13597823679447174, |
|
"rewards/rejected": -0.44018417596817017, |
|
"step": 430, |
|
"use_label": 7610.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": -1.3159044981002808, |
|
"logits/rejected": -1.413769006729126, |
|
"logps/chosen": -90.12797546386719, |
|
"logps/rejected": -101.85379028320312, |
|
"loss": 0.6858, |
|
"pred_label": 1242.5, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.22682049870491028, |
|
"rewards/margins": 0.159098818898201, |
|
"rewards/rejected": -0.3859192728996277, |
|
"step": 440, |
|
"use_label": 7735.5 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.375, |
|
"learning_rate": 3.181570569931697e-06, |
|
"logits/chosen": -1.4389588832855225, |
|
"logits/rejected": -1.5265202522277832, |
|
"logps/chosen": -96.37947845458984, |
|
"logps/rejected": -113.1718521118164, |
|
"loss": 0.6951, |
|
"pred_label": 1281.3499755859375, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.2355901300907135, |
|
"rewards/margins": 0.13590970635414124, |
|
"rewards/rejected": -0.37149983644485474, |
|
"step": 450, |
|
"use_label": 7856.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": -1.2455997467041016, |
|
"logits/rejected": -1.1902601718902588, |
|
"logps/chosen": -72.07853698730469, |
|
"logps/rejected": -84.86478424072266, |
|
"loss": 0.6842, |
|
"pred_label": 1311.824951171875, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.1508016437292099, |
|
"rewards/margins": 0.1797787994146347, |
|
"rewards/rejected": -0.330580472946167, |
|
"step": 460, |
|
"use_label": 7986.1748046875 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 3.0039685369660785e-06, |
|
"logits/chosen": -1.175449252128601, |
|
"logits/rejected": -1.0759943723678589, |
|
"logps/chosen": -88.91249084472656, |
|
"logps/rejected": -110.02799987792969, |
|
"loss": 0.6873, |
|
"pred_label": 1345.1500244140625, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.22000393271446228, |
|
"rewards/margins": 0.1964809000492096, |
|
"rewards/rejected": -0.4164848327636719, |
|
"step": 470, |
|
"use_label": 8112.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": -1.0014227628707886, |
|
"logits/rejected": -1.0880533456802368, |
|
"logps/chosen": -99.41879272460938, |
|
"logps/rejected": -120.02769470214844, |
|
"loss": 0.6868, |
|
"pred_label": 1391.25, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.24276605248451233, |
|
"rewards/margins": 0.17868337035179138, |
|
"rewards/rejected": -0.4214494228363037, |
|
"step": 480, |
|
"use_label": 8226.75 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 2.8236713524386085e-06, |
|
"logits/chosen": -1.0729541778564453, |
|
"logits/rejected": -0.9298813939094543, |
|
"logps/chosen": -88.73147583007812, |
|
"logps/rejected": -94.53245544433594, |
|
"loss": 0.6921, |
|
"pred_label": 1428.9000244140625, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.22107498347759247, |
|
"rewards/margins": 0.12524999678134918, |
|
"rewards/rejected": -0.34632498025894165, |
|
"step": 490, |
|
"use_label": 8349.099609375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": -0.8902079463005066, |
|
"logits/rejected": -1.065393090248108, |
|
"logps/chosen": -113.58573150634766, |
|
"logps/rejected": -131.9083709716797, |
|
"loss": 0.6894, |
|
"pred_label": 1462.4000244140625, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.37447452545166016, |
|
"rewards/margins": 0.17800332605838776, |
|
"rewards/rejected": -0.5524778962135315, |
|
"step": 500, |
|
"use_label": 8475.599609375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -0.6888664960861206, |
|
"eval_logits/rejected": -0.5997034311294556, |
|
"eval_logps/chosen": -97.52025604248047, |
|
"eval_logps/rejected": -120.9921646118164, |
|
"eval_loss": 0.6891720294952393, |
|
"eval_pred_label": 1530.5714111328125, |
|
"eval_rewards/accuracies": 0.3551587164402008, |
|
"eval_rewards/chosen": -0.28619715571403503, |
|
"eval_rewards/margins": 0.1697438359260559, |
|
"eval_rewards/rejected": -0.45594096183776855, |
|
"eval_runtime": 246.2759, |
|
"eval_samples_per_second": 8.121, |
|
"eval_steps_per_second": 0.256, |
|
"eval_use_label": 8741.4287109375, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 2.641643219871597e-06, |
|
"logits/chosen": -0.7708507776260376, |
|
"logits/rejected": -0.882653534412384, |
|
"logps/chosen": -90.50456237792969, |
|
"logps/rejected": -116.84162902832031, |
|
"loss": 0.686, |
|
"pred_label": 1610.5999755859375, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.2625977396965027, |
|
"rewards/margins": 0.20036396384239197, |
|
"rewards/rejected": -0.4629616141319275, |
|
"step": 510, |
|
"use_label": 8991.400390625 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": -0.8800374865531921, |
|
"logits/rejected": -1.038163185119629, |
|
"logps/chosen": -100.99266052246094, |
|
"logps/rejected": -116.75798034667969, |
|
"loss": 0.6895, |
|
"pred_label": 1653.0, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.2859944701194763, |
|
"rewards/margins": 0.15662841498851776, |
|
"rewards/rejected": -0.4426229000091553, |
|
"step": 520, |
|
"use_label": 9109.0 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 2.4588575996495797e-06, |
|
"logits/chosen": -0.8304817080497742, |
|
"logits/rejected": -0.7847825288772583, |
|
"logps/chosen": -105.92545318603516, |
|
"logps/rejected": -117.15931701660156, |
|
"loss": 0.6895, |
|
"pred_label": 1692.175048828125, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.316447913646698, |
|
"rewards/margins": 0.17969803512096405, |
|
"rewards/rejected": -0.49614596366882324, |
|
"step": 530, |
|
"use_label": 9229.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": -0.67156982421875, |
|
"logits/rejected": -0.8070074319839478, |
|
"logps/chosen": -112.666748046875, |
|
"logps/rejected": -131.92593383789062, |
|
"loss": 0.6896, |
|
"pred_label": 1734.375, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.35928016901016235, |
|
"rewards/margins": 0.22706659138202667, |
|
"rewards/rejected": -0.5863467454910278, |
|
"step": 540, |
|
"use_label": 9347.625 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 2.276292003092593e-06, |
|
"logits/chosen": -0.7944391369819641, |
|
"logits/rejected": -0.7596977353096008, |
|
"logps/chosen": -107.38740539550781, |
|
"logps/rejected": -111.28292083740234, |
|
"loss": 0.6887, |
|
"pred_label": 1775.7249755859375, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3932684063911438, |
|
"rewards/margins": 0.12325477600097656, |
|
"rewards/rejected": -0.5165232419967651, |
|
"step": 550, |
|
"use_label": 9466.275390625 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": -0.7152852416038513, |
|
"logits/rejected": -0.7174454927444458, |
|
"logps/chosen": -104.6649398803711, |
|
"logps/rejected": -117.61528015136719, |
|
"loss": 0.6901, |
|
"pred_label": 1814.375, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.3510952889919281, |
|
"rewards/margins": 0.15508435666561127, |
|
"rewards/rejected": -0.5061796307563782, |
|
"step": 560, |
|
"use_label": 9587.625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.125, |
|
"learning_rate": 2.0949227648656194e-06, |
|
"logits/chosen": -0.925454318523407, |
|
"logits/rejected": -0.849765956401825, |
|
"logps/chosen": -100.53346252441406, |
|
"logps/rejected": -131.70309448242188, |
|
"loss": 0.6872, |
|
"pred_label": 1852.2249755859375, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3393338620662689, |
|
"rewards/margins": 0.23398590087890625, |
|
"rewards/rejected": -0.5733197927474976, |
|
"step": 570, |
|
"use_label": 9709.775390625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": -0.7680953145027161, |
|
"logits/rejected": -0.8566532135009766, |
|
"logps/chosen": -111.98583984375, |
|
"logps/rejected": -131.1743927001953, |
|
"loss": 0.6879, |
|
"pred_label": 1893.7750244140625, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.37074294686317444, |
|
"rewards/margins": 0.1566895693540573, |
|
"rewards/rejected": -0.5274325013160706, |
|
"step": 580, |
|
"use_label": 9828.224609375 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.915719821680624e-06, |
|
"logits/chosen": -0.8080962300300598, |
|
"logits/rejected": -0.7905328869819641, |
|
"logps/chosen": -125.2184066772461, |
|
"logps/rejected": -148.79432678222656, |
|
"loss": 0.6891, |
|
"pred_label": 1939.25, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.4552985727787018, |
|
"rewards/margins": 0.22290782630443573, |
|
"rewards/rejected": -0.6782063245773315, |
|
"step": 590, |
|
"use_label": 9942.75 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": -0.5244548320770264, |
|
"logits/rejected": -0.7590290904045105, |
|
"logps/chosen": -122.6807632446289, |
|
"logps/rejected": -162.36203002929688, |
|
"loss": 0.6881, |
|
"pred_label": 1992.0, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.48354387283325195, |
|
"rewards/margins": 0.23392179608345032, |
|
"rewards/rejected": -0.7174656391143799, |
|
"step": 600, |
|
"use_label": 10050.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.35794487595558167, |
|
"eval_logits/rejected": -0.2547617554664612, |
|
"eval_logps/chosen": -107.16178131103516, |
|
"eval_logps/rejected": -135.9844512939453, |
|
"eval_loss": 0.6918326616287231, |
|
"eval_pred_label": 2082.3173828125, |
|
"eval_rewards/accuracies": 0.3531745970249176, |
|
"eval_rewards/chosen": -0.3826123774051666, |
|
"eval_rewards/margins": 0.22325147688388824, |
|
"eval_rewards/rejected": -0.6058638095855713, |
|
"eval_runtime": 248.3104, |
|
"eval_samples_per_second": 8.054, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 10293.6826171875, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 1.739641525213929e-06, |
|
"logits/chosen": -0.572044312953949, |
|
"logits/rejected": -0.654716432094574, |
|
"logps/chosen": -95.46563720703125, |
|
"logps/rejected": -132.0639190673828, |
|
"loss": 0.6926, |
|
"pred_label": 2185.449951171875, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.3655874729156494, |
|
"rewards/margins": 0.21378450095653534, |
|
"rewards/rejected": -0.579371988773346, |
|
"step": 610, |
|
"use_label": 10520.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": -0.7425838708877563, |
|
"logits/rejected": -0.7612688541412354, |
|
"logps/chosen": -98.45491790771484, |
|
"logps/rejected": -139.22779846191406, |
|
"loss": 0.6874, |
|
"pred_label": 2228.10009765625, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.3674684762954712, |
|
"rewards/margins": 0.22383132576942444, |
|
"rewards/rejected": -0.591299831867218, |
|
"step": 620, |
|
"use_label": 10637.900390625 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.5676295169786864e-06, |
|
"logits/chosen": -0.5626051425933838, |
|
"logits/rejected": -0.7373117208480835, |
|
"logps/chosen": -109.76419830322266, |
|
"logps/rejected": -132.89573669433594, |
|
"loss": 0.6861, |
|
"pred_label": 2268.074951171875, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.3673921525478363, |
|
"rewards/margins": 0.2162620723247528, |
|
"rewards/rejected": -0.5836542844772339, |
|
"step": 630, |
|
"use_label": 10757.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": -0.70842045545578, |
|
"logits/rejected": -0.5356844663619995, |
|
"logps/chosen": -115.94453430175781, |
|
"logps/rejected": -132.53977966308594, |
|
"loss": 0.6881, |
|
"pred_label": 2312.199951171875, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.4425238072872162, |
|
"rewards/margins": 0.23113970458507538, |
|
"rewards/rejected": -0.6736636161804199, |
|
"step": 640, |
|
"use_label": 10873.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.4006036925609245e-06, |
|
"logits/chosen": -0.7530516386032104, |
|
"logits/rejected": -0.39667490124702454, |
|
"logps/chosen": -117.97354888916016, |
|
"logps/rejected": -148.5204620361328, |
|
"loss": 0.6907, |
|
"pred_label": 2364.60009765625, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.4478411078453064, |
|
"rewards/margins": 0.25875502824783325, |
|
"rewards/rejected": -0.7065961956977844, |
|
"step": 650, |
|
"use_label": 10981.400390625 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": -0.4164413511753082, |
|
"logits/rejected": -0.5387105345726013, |
|
"logps/chosen": -93.08172607421875, |
|
"logps/rejected": -106.9631576538086, |
|
"loss": 0.6884, |
|
"pred_label": 2410.39990234375, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.3495523929595947, |
|
"rewards/margins": 0.1542079746723175, |
|
"rewards/rejected": -0.5037603378295898, |
|
"step": 660, |
|
"use_label": 11095.599609375 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 1.2394572821496953e-06, |
|
"logits/chosen": -0.9564473032951355, |
|
"logits/rejected": -1.0122594833374023, |
|
"logps/chosen": -100.20994567871094, |
|
"logps/rejected": -121.32554626464844, |
|
"loss": 0.6935, |
|
"pred_label": 2446.14990234375, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.3450331687927246, |
|
"rewards/margins": 0.19006122648715973, |
|
"rewards/rejected": -0.5350943803787231, |
|
"step": 670, |
|
"use_label": 11219.849609375 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": -0.588452935218811, |
|
"logits/rejected": -0.6323766708374023, |
|
"logps/chosen": -125.20991516113281, |
|
"logps/rejected": -139.94993591308594, |
|
"loss": 0.6902, |
|
"pred_label": 2485.10009765625, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.3915707468986511, |
|
"rewards/margins": 0.19166378676891327, |
|
"rewards/rejected": -0.5832345485687256, |
|
"step": 680, |
|
"use_label": 11340.900390625 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 1.0850520736699362e-06, |
|
"logits/chosen": -0.6506579518318176, |
|
"logits/rejected": -0.7167869806289673, |
|
"logps/chosen": -144.53038024902344, |
|
"logps/rejected": -167.38192749023438, |
|
"loss": 0.6898, |
|
"pred_label": 2534.75, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.42825189232826233, |
|
"rewards/margins": 0.28569427132606506, |
|
"rewards/rejected": -0.7139460444450378, |
|
"step": 690, |
|
"use_label": 11451.25 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": -0.8556931614875793, |
|
"logits/rejected": -0.6913198232650757, |
|
"logps/chosen": -105.3968505859375, |
|
"logps/rejected": -124.95710754394531, |
|
"loss": 0.6913, |
|
"pred_label": 2580.824951171875, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.39049768447875977, |
|
"rewards/margins": 0.17418017983436584, |
|
"rewards/rejected": -0.564677894115448, |
|
"step": 700, |
|
"use_label": 11565.1748046875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -0.3469957709312439, |
|
"eval_logits/rejected": -0.24619349837303162, |
|
"eval_logps/chosen": -104.32471466064453, |
|
"eval_logps/rejected": -133.26370239257812, |
|
"eval_loss": 0.6898515224456787, |
|
"eval_pred_label": 2673.52392578125, |
|
"eval_rewards/accuracies": 0.3670634925365448, |
|
"eval_rewards/chosen": -0.35424166917800903, |
|
"eval_rewards/margins": 0.22441466152668, |
|
"eval_rewards/rejected": -0.5786563754081726, |
|
"eval_runtime": 248.2749, |
|
"eval_samples_per_second": 8.056, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 11806.4765625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 9.382138040640714e-07, |
|
"logits/chosen": -0.6519032716751099, |
|
"logits/rejected": -0.637380063533783, |
|
"logps/chosen": -102.23021697998047, |
|
"logps/rejected": -127.60137939453125, |
|
"loss": 0.6903, |
|
"pred_label": 2771.699951171875, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.3915974497795105, |
|
"rewards/margins": 0.21561889350414276, |
|
"rewards/rejected": -0.6072162985801697, |
|
"step": 710, |
|
"use_label": 12038.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": -0.6509895324707031, |
|
"logits/rejected": -0.6935362815856934, |
|
"logps/chosen": -87.30061340332031, |
|
"logps/rejected": -114.2796630859375, |
|
"loss": 0.6903, |
|
"pred_label": 2811.47509765625, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.30430155992507935, |
|
"rewards/margins": 0.18221500515937805, |
|
"rewards/rejected": -0.486516535282135, |
|
"step": 720, |
|
"use_label": 12158.525390625 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 7.997277433690984e-07, |
|
"logits/chosen": -0.6035222411155701, |
|
"logits/rejected": -0.65208500623703, |
|
"logps/chosen": -100.17440032958984, |
|
"logps/rejected": -119.87808990478516, |
|
"loss": 0.6865, |
|
"pred_label": 2850.0, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.2982019782066345, |
|
"rewards/margins": 0.2585477828979492, |
|
"rewards/rejected": -0.5567497611045837, |
|
"step": 730, |
|
"use_label": 12280.0 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": -0.4770827293395996, |
|
"logits/rejected": -0.5081530213356018, |
|
"logps/chosen": -94.86068725585938, |
|
"logps/rejected": -116.67037200927734, |
|
"loss": 0.6916, |
|
"pred_label": 2886.125, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.34235304594039917, |
|
"rewards/margins": 0.19017408788204193, |
|
"rewards/rejected": -0.5325270891189575, |
|
"step": 740, |
|
"use_label": 12403.875 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 6.70334495204884e-07, |
|
"logits/chosen": -0.5357509851455688, |
|
"logits/rejected": -0.594279408454895, |
|
"logps/chosen": -119.76139831542969, |
|
"logps/rejected": -145.1709747314453, |
|
"loss": 0.6905, |
|
"pred_label": 2929.22509765625, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.4223107397556305, |
|
"rewards/margins": 0.18705633282661438, |
|
"rewards/rejected": -0.6093670725822449, |
|
"step": 750, |
|
"use_label": 12520.775390625 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": -0.6737512350082397, |
|
"logits/rejected": -0.6523575186729431, |
|
"logps/chosen": -86.640625, |
|
"logps/rejected": -124.01812744140625, |
|
"loss": 0.6899, |
|
"pred_label": 2976.050048828125, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.32672789692878723, |
|
"rewards/margins": 0.1930442750453949, |
|
"rewards/rejected": -0.5197721719741821, |
|
"step": 760, |
|
"use_label": 12633.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 5.507260361320738e-07, |
|
"logits/chosen": -0.6238114833831787, |
|
"logits/rejected": -0.6686199307441711, |
|
"logps/chosen": -127.0525131225586, |
|
"logps/rejected": -142.44747924804688, |
|
"loss": 0.689, |
|
"pred_label": 3021.85009765625, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.43505221605300903, |
|
"rewards/margins": 0.25210094451904297, |
|
"rewards/rejected": -0.687153160572052, |
|
"step": 770, |
|
"use_label": 12748.150390625 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": -0.6369722485542297, |
|
"logits/rejected": -0.7722553014755249, |
|
"logps/chosen": -112.76126861572266, |
|
"logps/rejected": -133.56796264648438, |
|
"loss": 0.6915, |
|
"pred_label": 3075.72509765625, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.3996170461177826, |
|
"rewards/margins": 0.22261002659797668, |
|
"rewards/rejected": -0.6222270727157593, |
|
"step": 780, |
|
"use_label": 12854.275390625 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 4.4154201506053985e-07, |
|
"logits/chosen": -0.5256940126419067, |
|
"logits/rejected": -0.467402845621109, |
|
"logps/chosen": -95.73258209228516, |
|
"logps/rejected": -103.3360366821289, |
|
"loss": 0.6917, |
|
"pred_label": 3123.85009765625, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.30898317694664, |
|
"rewards/margins": 0.2029590606689453, |
|
"rewards/rejected": -0.5119422674179077, |
|
"step": 790, |
|
"use_label": 12966.150390625 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": -0.6677756905555725, |
|
"logits/rejected": -0.607046902179718, |
|
"logps/chosen": -92.61612701416016, |
|
"logps/rejected": -115.20296478271484, |
|
"loss": 0.6893, |
|
"pred_label": 3166.449951171875, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.3256850242614746, |
|
"rewards/margins": 0.20536477863788605, |
|
"rewards/rejected": -0.5310498476028442, |
|
"step": 800, |
|
"use_label": 13083.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -0.23666124045848846, |
|
"eval_logits/rejected": -0.1293245106935501, |
|
"eval_logps/chosen": -103.33552551269531, |
|
"eval_logps/rejected": -132.24159240722656, |
|
"eval_loss": 0.6903889179229736, |
|
"eval_pred_label": 3252.09521484375, |
|
"eval_rewards/accuracies": 0.363095223903656, |
|
"eval_rewards/chosen": -0.34434974193573, |
|
"eval_rewards/margins": 0.22408555448055267, |
|
"eval_rewards/rejected": -0.5684353113174438, |
|
"eval_runtime": 248.2839, |
|
"eval_samples_per_second": 8.055, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 13331.904296875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 3.4336633249862084e-07, |
|
"logits/chosen": -0.6630854606628418, |
|
"logits/rejected": -0.6445407867431641, |
|
"logps/chosen": -108.18148040771484, |
|
"logps/rejected": -135.99142456054688, |
|
"loss": 0.6901, |
|
"pred_label": 3350.35009765625, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.3832666873931885, |
|
"rewards/margins": 0.1908622682094574, |
|
"rewards/rejected": -0.5741289258003235, |
|
"step": 810, |
|
"use_label": 13563.650390625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": -0.6042599081993103, |
|
"logits/rejected": -0.6371781826019287, |
|
"logps/chosen": -94.31297302246094, |
|
"logps/rejected": -101.22802734375, |
|
"loss": 0.689, |
|
"pred_label": 3393.47509765625, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.3432285487651825, |
|
"rewards/margins": 0.13310988247394562, |
|
"rewards/rejected": -0.4763384461402893, |
|
"step": 820, |
|
"use_label": 13680.525390625 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 2.5672401793681854e-07, |
|
"logits/chosen": -0.5476540923118591, |
|
"logits/rejected": -0.43125781416893005, |
|
"logps/chosen": -86.91058349609375, |
|
"logps/rejected": -110.5887222290039, |
|
"loss": 0.6923, |
|
"pred_label": 3435.074951171875, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.2886909246444702, |
|
"rewards/margins": 0.25071993470191956, |
|
"rewards/rejected": -0.5394108295440674, |
|
"step": 830, |
|
"use_label": 13798.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": -0.5565081834793091, |
|
"logits/rejected": -0.6612057685852051, |
|
"logps/chosen": -89.98490142822266, |
|
"logps/rejected": -93.48139953613281, |
|
"loss": 0.6915, |
|
"pred_label": 3471.35009765625, |
|
"rewards/accuracies": 0.24375000596046448, |
|
"rewards/chosen": -0.306854248046875, |
|
"rewards/margins": 0.09164027869701385, |
|
"rewards/rejected": -0.39849454164505005, |
|
"step": 840, |
|
"use_label": 13922.650390625 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.820784220652766e-07, |
|
"logits/chosen": -0.6778563261032104, |
|
"logits/rejected": -0.73534095287323, |
|
"logps/chosen": -120.2663345336914, |
|
"logps/rejected": -149.02294921875, |
|
"loss": 0.6854, |
|
"pred_label": 3509.0, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.36049091815948486, |
|
"rewards/margins": 0.2984590530395508, |
|
"rewards/rejected": -0.6589499711990356, |
|
"step": 850, |
|
"use_label": 14045.0 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": -0.5929479002952576, |
|
"logits/rejected": -0.48117414116859436, |
|
"logps/chosen": -115.10990142822266, |
|
"logps/rejected": -133.1912841796875, |
|
"loss": 0.6892, |
|
"pred_label": 3556.324951171875, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.33908045291900635, |
|
"rewards/margins": 0.23609444499015808, |
|
"rewards/rejected": -0.5751749277114868, |
|
"step": 860, |
|
"use_label": 14157.6748046875 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 1.1982873884064466e-07, |
|
"logits/chosen": -0.6633087992668152, |
|
"logits/rejected": -0.6678288578987122, |
|
"logps/chosen": -117.92154693603516, |
|
"logps/rejected": -145.3701171875, |
|
"loss": 0.6893, |
|
"pred_label": 3603.75, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3660942316055298, |
|
"rewards/margins": 0.2644110918045044, |
|
"rewards/rejected": -0.6305053234100342, |
|
"step": 870, |
|
"use_label": 14270.25 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": -0.5895944237709045, |
|
"logits/rejected": -0.5510295629501343, |
|
"logps/chosen": -96.94719696044922, |
|
"logps/rejected": -141.16554260253906, |
|
"loss": 0.6891, |
|
"pred_label": 3651.0, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.3419613242149353, |
|
"rewards/margins": 0.32287630438804626, |
|
"rewards/rejected": -0.6648377180099487, |
|
"step": 880, |
|
"use_label": 14383.0 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 7.030787065396866e-08, |
|
"logits/chosen": -0.5159703493118286, |
|
"logits/rejected": -0.5519541501998901, |
|
"logps/chosen": -96.9026107788086, |
|
"logps/rejected": -120.7626724243164, |
|
"loss": 0.693, |
|
"pred_label": 3690.675048828125, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.3307461142539978, |
|
"rewards/margins": 0.1426464170217514, |
|
"rewards/rejected": -0.4733925461769104, |
|
"step": 890, |
|
"use_label": 14503.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": -0.2721698582172394, |
|
"logits/rejected": -0.407818466424942, |
|
"logps/chosen": -104.2662582397461, |
|
"logps/rejected": -149.70314025878906, |
|
"loss": 0.689, |
|
"pred_label": 3732.824951171875, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.3485477864742279, |
|
"rewards/margins": 0.2633667290210724, |
|
"rewards/rejected": -0.6119145154953003, |
|
"step": 900, |
|
"use_label": 14621.1748046875 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -0.2437347173690796, |
|
"eval_logits/rejected": -0.13671822845935822, |
|
"eval_logps/chosen": -103.0300521850586, |
|
"eval_logps/rejected": -131.91110229492188, |
|
"eval_loss": 0.6907457709312439, |
|
"eval_pred_label": 3821.52392578125, |
|
"eval_rewards/accuracies": 0.363095223903656, |
|
"eval_rewards/chosen": -0.3412950336933136, |
|
"eval_rewards/margins": 0.22383520007133484, |
|
"eval_rewards/rejected": -0.5651301741600037, |
|
"eval_runtime": 248.2504, |
|
"eval_samples_per_second": 8.056, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 14866.4765625, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 3.378064801637687e-08, |
|
"logits/chosen": -0.5370496511459351, |
|
"logits/rejected": -0.5028234720230103, |
|
"logps/chosen": -89.67744445800781, |
|
"logps/rejected": -113.96895599365234, |
|
"loss": 0.6882, |
|
"pred_label": 3916.52490234375, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.2901899218559265, |
|
"rewards/margins": 0.2133828103542328, |
|
"rewards/rejected": -0.5035727024078369, |
|
"step": 910, |
|
"use_label": 15101.474609375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": -0.43033066391944885, |
|
"logits/rejected": -0.4173038899898529, |
|
"logps/chosen": -80.09765625, |
|
"logps/rejected": -120.93513488769531, |
|
"loss": 0.6934, |
|
"pred_label": 3958.0, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.26141807436943054, |
|
"rewards/margins": 0.23344416916370392, |
|
"rewards/rejected": -0.49486222863197327, |
|
"step": 920, |
|
"use_label": 15220.0 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 1.0442413283435759e-08, |
|
"logits/chosen": -0.4513850212097168, |
|
"logits/rejected": -0.5099025964736938, |
|
"logps/chosen": -92.44239807128906, |
|
"logps/rejected": -119.61177062988281, |
|
"loss": 0.6878, |
|
"pred_label": 3998.60009765625, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.29288578033447266, |
|
"rewards/margins": 0.20934204757213593, |
|
"rewards/rejected": -0.502227783203125, |
|
"step": 930, |
|
"use_label": 15339.400390625 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.25, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": -0.3625331521034241, |
|
"logits/rejected": -0.5358187556266785, |
|
"logps/chosen": -103.41780090332031, |
|
"logps/rejected": -130.23828125, |
|
"loss": 0.691, |
|
"pred_label": 4038.60009765625, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.34467238187789917, |
|
"rewards/margins": 0.18087737262248993, |
|
"rewards/rejected": -0.5255497694015503, |
|
"step": 940, |
|
"use_label": 15459.400390625 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 4.1797599220405605e-10, |
|
"logits/chosen": -0.674268901348114, |
|
"logits/rejected": -0.7018919587135315, |
|
"logps/chosen": -114.91938781738281, |
|
"logps/rejected": -133.3175506591797, |
|
"loss": 0.6895, |
|
"pred_label": 4082.625, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.3830910325050354, |
|
"rewards/margins": 0.1591145098209381, |
|
"rewards/rejected": -0.5422054529190063, |
|
"step": 950, |
|
"use_label": 15575.375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6880922077838039, |
|
"train_runtime": 20023.3666, |
|
"train_samples_per_second": 3.053, |
|
"train_steps_per_second": 0.048 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|