|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6109660574412532e-09, |
|
"logits/chosen": -1.3665199279785156, |
|
"logits/rejected": -1.22934889793396, |
|
"logps/chosen": -4618.75, |
|
"logps/rejected": -2311.76708984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.610966057441253e-08, |
|
"logits/chosen": -1.2836003303527832, |
|
"logits/rejected": -1.2451469898223877, |
|
"logps/chosen": -2991.450439453125, |
|
"logps/rejected": -2506.193603515625, |
|
"loss": 0.7002, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": -0.003100518137216568, |
|
"rewards/margins": -0.0017652130918577313, |
|
"rewards/rejected": -0.0013353050453588367, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.221932114882506e-08, |
|
"logits/chosen": -1.289717435836792, |
|
"logits/rejected": -1.2764991521835327, |
|
"logps/chosen": -2514.9619140625, |
|
"logps/rejected": -2609.55224609375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004010486416518688, |
|
"rewards/margins": 0.0017830505967140198, |
|
"rewards/rejected": 0.0022274362854659557, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.83289817232376e-08, |
|
"logits/chosen": -1.2992498874664307, |
|
"logits/rejected": -1.2743134498596191, |
|
"logps/chosen": -2825.876220703125, |
|
"logps/rejected": -2155.24853515625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.008475597016513348, |
|
"rewards/margins": -0.002866474213078618, |
|
"rewards/rejected": 0.011342070996761322, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0443864229765012e-07, |
|
"logits/chosen": -1.2443135976791382, |
|
"logits/rejected": -1.2479488849639893, |
|
"logps/chosen": -2863.024169921875, |
|
"logps/rejected": -2658.76806640625, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.04540400952100754, |
|
"rewards/margins": 0.002538739936426282, |
|
"rewards/rejected": 0.042865268886089325, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -1.2135803699493408, |
|
"logits/rejected": -1.2339200973510742, |
|
"logps/chosen": -2587.803466796875, |
|
"logps/rejected": -2387.272705078125, |
|
"loss": 0.7147, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.11740453541278839, |
|
"rewards/margins": 0.009207578375935555, |
|
"rewards/rejected": 0.10819695144891739, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.566579634464752e-07, |
|
"logits/chosen": -1.1599149703979492, |
|
"logits/rejected": -1.2010291814804077, |
|
"logps/chosen": -2478.61767578125, |
|
"logps/rejected": -2504.965087890625, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.17490772902965546, |
|
"rewards/margins": 0.014275921508669853, |
|
"rewards/rejected": 0.16063180565834045, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.8276762402088773e-07, |
|
"logits/chosen": -1.2249014377593994, |
|
"logits/rejected": -1.1395671367645264, |
|
"logps/chosen": -1990.302734375, |
|
"logps/rejected": -1826.503662109375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.20387442409992218, |
|
"rewards/margins": 0.01711631938815117, |
|
"rewards/rejected": 0.18675807118415833, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0887728459530023e-07, |
|
"logits/chosen": -1.1765159368515015, |
|
"logits/rejected": -1.061958909034729, |
|
"logps/chosen": -2488.86083984375, |
|
"logps/rejected": -2211.66748046875, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.27337607741355896, |
|
"rewards/margins": 0.05351179838180542, |
|
"rewards/rejected": 0.21986432373523712, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.349869451697128e-07, |
|
"logits/chosen": -1.2903188467025757, |
|
"logits/rejected": -1.1929465532302856, |
|
"logps/chosen": -2553.20556640625, |
|
"logps/rejected": -1782.565185546875, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3111152946949005, |
|
"rewards/margins": 0.10488128662109375, |
|
"rewards/rejected": 0.20623397827148438, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -1.2507898807525635, |
|
"logits/rejected": -1.149072289466858, |
|
"logps/chosen": -2592.64892578125, |
|
"logps/rejected": -2194.07470703125, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3974040448665619, |
|
"rewards/margins": 0.13847002387046814, |
|
"rewards/rejected": 0.25893402099609375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -1.246272087097168, |
|
"eval_logits/rejected": -1.2047406435012817, |
|
"eval_logps/chosen": -2626.098388671875, |
|
"eval_logps/rejected": -2193.6435546875, |
|
"eval_loss": 0.6733300685882568, |
|
"eval_rewards/accuracies": 0.550000011920929, |
|
"eval_rewards/chosen": 0.4667675495147705, |
|
"eval_rewards/margins": 0.09803615510463715, |
|
"eval_rewards/rejected": 0.36873137950897217, |
|
"eval_runtime": 271.9101, |
|
"eval_samples_per_second": 7.355, |
|
"eval_steps_per_second": 0.46, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.8720626631853785e-07, |
|
"logits/chosen": -1.2401742935180664, |
|
"logits/rejected": -1.270200490951538, |
|
"logps/chosen": -2110.267822265625, |
|
"logps/rejected": -2461.110595703125, |
|
"loss": 0.7004, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.35654813051223755, |
|
"rewards/margins": -0.08482155203819275, |
|
"rewards/rejected": 0.4413697123527527, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.133159268929504e-07, |
|
"logits/chosen": -1.2979562282562256, |
|
"logits/rejected": -1.2625576257705688, |
|
"logps/chosen": -2472.685302734375, |
|
"logps/rejected": -2142.302490234375, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.37470149993896484, |
|
"rewards/margins": 0.10966293513774872, |
|
"rewards/rejected": 0.26503854990005493, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.3942558746736286e-07, |
|
"logits/chosen": -1.2801696062088013, |
|
"logits/rejected": -1.18109130859375, |
|
"logps/chosen": -2593.06103515625, |
|
"logps/rejected": -2058.24267578125, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.453033447265625, |
|
"rewards/margins": 0.13208410143852234, |
|
"rewards/rejected": 0.3209493160247803, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.6553524804177545e-07, |
|
"logits/chosen": -1.230254054069519, |
|
"logits/rejected": -1.23989737033844, |
|
"logps/chosen": -2341.89990234375, |
|
"logps/rejected": -2458.319580078125, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.476696252822876, |
|
"rewards/margins": 0.016019124537706375, |
|
"rewards/rejected": 0.4606771469116211, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.91644908616188e-07, |
|
"logits/chosen": -1.2208526134490967, |
|
"logits/rejected": -1.2657601833343506, |
|
"logps/chosen": -2545.86474609375, |
|
"logps/rejected": -2762.395751953125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.4996720850467682, |
|
"rewards/margins": -0.10669504106044769, |
|
"rewards/rejected": 0.6063671112060547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.1775456919060046e-07, |
|
"logits/chosen": -1.2546348571777344, |
|
"logits/rejected": -1.2102447748184204, |
|
"logps/chosen": -2498.013671875, |
|
"logps/rejected": -1981.0, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5435835123062134, |
|
"rewards/margins": 0.1602788269519806, |
|
"rewards/rejected": 0.3833047151565552, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.4386422976501305e-07, |
|
"logits/chosen": -1.1585718393325806, |
|
"logits/rejected": -1.1300784349441528, |
|
"logps/chosen": -2143.332763671875, |
|
"logps/rejected": -1890.7896728515625, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.3146580457687378, |
|
"rewards/margins": 0.0533723309636116, |
|
"rewards/rejected": 0.2612857222557068, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.699738903394256e-07, |
|
"logits/chosen": -1.2575573921203613, |
|
"logits/rejected": -1.2878539562225342, |
|
"logps/chosen": -2350.91015625, |
|
"logps/rejected": -2002.0416259765625, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.4910794794559479, |
|
"rewards/margins": 0.04307179898023605, |
|
"rewards/rejected": 0.4480076730251312, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.960835509138381e-07, |
|
"logits/chosen": -1.255906105041504, |
|
"logits/rejected": -1.2742435932159424, |
|
"logps/chosen": -1905.1875, |
|
"logps/rejected": -1874.7750244140625, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.6333888173103333, |
|
"rewards/margins": 0.05971439927816391, |
|
"rewards/rejected": 0.5736743211746216, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -1.2323532104492188, |
|
"logits/rejected": -1.2203179597854614, |
|
"logps/chosen": -2919.02880859375, |
|
"logps/rejected": -2549.182373046875, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.9707019925117493, |
|
"rewards/margins": 0.10560061037540436, |
|
"rewards/rejected": 0.8651013374328613, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -1.2029674053192139, |
|
"eval_logits/rejected": -1.162118911743164, |
|
"eval_logps/chosen": -2583.427001953125, |
|
"eval_logps/rejected": -2164.738525390625, |
|
"eval_loss": 0.6496742367744446, |
|
"eval_rewards/accuracies": 0.6039999723434448, |
|
"eval_rewards/chosen": 0.893484890460968, |
|
"eval_rewards/margins": 0.23570162057876587, |
|
"eval_rewards/rejected": 0.6577833294868469, |
|
"eval_runtime": 276.0725, |
|
"eval_samples_per_second": 7.244, |
|
"eval_steps_per_second": 0.453, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.483028720626631e-07, |
|
"logits/chosen": -1.2817461490631104, |
|
"logits/rejected": -1.21957266330719, |
|
"logps/chosen": -2260.506591796875, |
|
"logps/rejected": -2005.905029296875, |
|
"loss": 0.7114, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6090816259384155, |
|
"rewards/margins": 0.037415795028209686, |
|
"rewards/rejected": 0.5716658234596252, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.744125326370757e-07, |
|
"logits/chosen": -1.24350106716156, |
|
"logits/rejected": -1.1972558498382568, |
|
"logps/chosen": -2426.041748046875, |
|
"logps/rejected": -2175.470458984375, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5010141134262085, |
|
"rewards/margins": 0.11551054567098618, |
|
"rewards/rejected": 0.3855035901069641, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.005221932114882e-07, |
|
"logits/chosen": -1.1981332302093506, |
|
"logits/rejected": -1.1106714010238647, |
|
"logps/chosen": -2660.1396484375, |
|
"logps/rejected": -2036.3939208984375, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5493336915969849, |
|
"rewards/margins": 0.2223406583070755, |
|
"rewards/rejected": 0.3269929885864258, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.266318537859008e-07, |
|
"logits/chosen": -1.2315361499786377, |
|
"logits/rejected": -1.2117688655853271, |
|
"logps/chosen": -2486.411865234375, |
|
"logps/rejected": -2081.799072265625, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.795777440071106, |
|
"rewards/margins": 0.1994515359401703, |
|
"rewards/rejected": 0.5963259339332581, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.527415143603134e-07, |
|
"logits/chosen": -1.2543888092041016, |
|
"logits/rejected": -1.2065962553024292, |
|
"logps/chosen": -2412.635986328125, |
|
"logps/rejected": -2150.365234375, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5733842849731445, |
|
"rewards/margins": 0.214861199259758, |
|
"rewards/rejected": 0.3585231602191925, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.788511749347257e-07, |
|
"logits/chosen": -1.3239953517913818, |
|
"logits/rejected": -1.237247109413147, |
|
"logps/chosen": -3097.52001953125, |
|
"logps/rejected": -2403.084716796875, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7425528168678284, |
|
"rewards/margins": 0.21805015206336975, |
|
"rewards/rejected": 0.5245026350021362, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.049608355091383e-07, |
|
"logits/chosen": -1.3785080909729004, |
|
"logits/rejected": -1.310682773590088, |
|
"logps/chosen": -2970.88916015625, |
|
"logps/rejected": -2465.4521484375, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8264948725700378, |
|
"rewards/margins": 0.1802043616771698, |
|
"rewards/rejected": 0.6462904214859009, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.310704960835509e-07, |
|
"logits/chosen": -1.3364454507827759, |
|
"logits/rejected": -1.2810570001602173, |
|
"logps/chosen": -1845.6207275390625, |
|
"logps/rejected": -1552.9630126953125, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5993987321853638, |
|
"rewards/margins": 0.21035853028297424, |
|
"rewards/rejected": 0.38904014229774475, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.571801566579634e-07, |
|
"logits/chosen": -1.350436806678772, |
|
"logits/rejected": -1.248679757118225, |
|
"logps/chosen": -2682.272705078125, |
|
"logps/rejected": -2016.766845703125, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.896868109703064, |
|
"rewards/margins": 0.3260273337364197, |
|
"rewards/rejected": 0.5708408951759338, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.83289817232376e-07, |
|
"logits/chosen": -1.371492862701416, |
|
"logits/rejected": -1.3663508892059326, |
|
"logps/chosen": -2637.647705078125, |
|
"logps/rejected": -2172.9091796875, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6898142099380493, |
|
"rewards/margins": 0.30571404099464417, |
|
"rewards/rejected": 0.38410019874572754, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -1.261673092842102, |
|
"eval_logits/rejected": -1.220168948173523, |
|
"eval_logps/chosen": -2605.7470703125, |
|
"eval_logps/rejected": -2186.15283203125, |
|
"eval_loss": 0.6671658158302307, |
|
"eval_rewards/accuracies": 0.5899999737739563, |
|
"eval_rewards/chosen": 0.6702810525894165, |
|
"eval_rewards/margins": 0.22664184868335724, |
|
"eval_rewards/rejected": 0.44363921880722046, |
|
"eval_runtime": 276.9466, |
|
"eval_samples_per_second": 7.222, |
|
"eval_steps_per_second": 0.451, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.093994778067885e-07, |
|
"logits/chosen": -1.2656062841415405, |
|
"logits/rejected": -1.1738948822021484, |
|
"logps/chosen": -2773.645751953125, |
|
"logps/rejected": -2556.039306640625, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5250165462493896, |
|
"rewards/margins": 0.3903385102748871, |
|
"rewards/rejected": 0.13467800617218018, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.355091383812009e-07, |
|
"logits/chosen": -1.2407519817352295, |
|
"logits/rejected": -1.159961462020874, |
|
"logps/chosen": -2324.83154296875, |
|
"logps/rejected": -2033.7972412109375, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.46215081214904785, |
|
"rewards/margins": 0.13348433375358582, |
|
"rewards/rejected": 0.32866644859313965, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.616187989556135e-07, |
|
"logits/chosen": -1.1832590103149414, |
|
"logits/rejected": -1.2228165864944458, |
|
"logps/chosen": -2444.755859375, |
|
"logps/rejected": -2273.108154296875, |
|
"loss": 0.7001, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.41592496633529663, |
|
"rewards/margins": -0.03532214090228081, |
|
"rewards/rejected": 0.45124712586402893, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.877284595300261e-07, |
|
"logits/chosen": -1.3150399923324585, |
|
"logits/rejected": -1.2690991163253784, |
|
"logps/chosen": -2488.77099609375, |
|
"logps/rejected": -2145.29248046875, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3536873757839203, |
|
"rewards/margins": 0.19547489285469055, |
|
"rewards/rejected": 0.15821249783039093, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.138381201044386e-07, |
|
"logits/chosen": -1.3865059614181519, |
|
"logits/rejected": -1.3378633260726929, |
|
"logps/chosen": -2283.524169921875, |
|
"logps/rejected": -2091.947265625, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8051117658615112, |
|
"rewards/margins": 0.2420201301574707, |
|
"rewards/rejected": 0.5630916357040405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.399477806788512e-07, |
|
"logits/chosen": -1.414058804512024, |
|
"logits/rejected": -1.3964297771453857, |
|
"logps/chosen": -2432.849365234375, |
|
"logps/rejected": -2076.60107421875, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.3647016286849976, |
|
"rewards/margins": 0.44374021887779236, |
|
"rewards/rejected": 0.9209613800048828, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.660574412532637e-07, |
|
"logits/chosen": -1.3959238529205322, |
|
"logits/rejected": -1.378598928451538, |
|
"logps/chosen": -2425.37548828125, |
|
"logps/rejected": -1807.110107421875, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5852380990982056, |
|
"rewards/margins": 0.19205673038959503, |
|
"rewards/rejected": 0.39318135380744934, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.921671018276761e-07, |
|
"logits/chosen": -1.3709585666656494, |
|
"logits/rejected": -1.2900068759918213, |
|
"logps/chosen": -2094.26220703125, |
|
"logps/rejected": -2028.6328125, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4213894009590149, |
|
"rewards/margins": 0.14318980276584625, |
|
"rewards/rejected": 0.27819958329200745, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999897712489534e-07, |
|
"logits/chosen": -1.5162689685821533, |
|
"logits/rejected": -1.4461922645568848, |
|
"logps/chosen": -2573.37890625, |
|
"logps/rejected": -2211.701171875, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8437172174453735, |
|
"rewards/margins": 0.25329241156578064, |
|
"rewards/rejected": 0.5904248356819153, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999396722513154e-07, |
|
"logits/chosen": -1.3767322301864624, |
|
"logits/rejected": -1.356715440750122, |
|
"logps/chosen": -2870.7294921875, |
|
"logps/rejected": -2530.47119140625, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4322354197502136, |
|
"rewards/margins": 0.2268580198287964, |
|
"rewards/rejected": 0.20537741482257843, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.4016751050949097, |
|
"eval_logits/rejected": -1.3598042726516724, |
|
"eval_logps/chosen": -2644.43896484375, |
|
"eval_logps/rejected": -2221.267578125, |
|
"eval_loss": 0.7143814563751221, |
|
"eval_rewards/accuracies": 0.5680000185966492, |
|
"eval_rewards/chosen": 0.2833646237850189, |
|
"eval_rewards/margins": 0.190872922539711, |
|
"eval_rewards/rejected": 0.09249173104763031, |
|
"eval_runtime": 272.9743, |
|
"eval_samples_per_second": 7.327, |
|
"eval_steps_per_second": 0.458, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.99847828434916e-07, |
|
"logits/chosen": -1.4666458368301392, |
|
"logits/rejected": -1.4085910320281982, |
|
"logps/chosen": -2439.627197265625, |
|
"logps/rejected": -1939.951904296875, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6307787895202637, |
|
"rewards/margins": 0.3261147141456604, |
|
"rewards/rejected": 0.3046640455722809, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.99714247468688e-07, |
|
"logits/chosen": -1.6231820583343506, |
|
"logits/rejected": -1.5343637466430664, |
|
"logps/chosen": -2322.893798828125, |
|
"logps/rejected": -1743.1722412109375, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 1.083807110786438, |
|
"rewards/margins": 0.2774105966091156, |
|
"rewards/rejected": 0.8063966035842896, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.995389405066031e-07, |
|
"logits/chosen": -1.5276827812194824, |
|
"logits/rejected": -1.5160127878189087, |
|
"logps/chosen": -2009.1494140625, |
|
"logps/rejected": -1984.708251953125, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9781614542007446, |
|
"rewards/margins": 0.3814542889595032, |
|
"rewards/rejected": 0.5967071056365967, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.993219221867424e-07, |
|
"logits/chosen": -1.6460065841674805, |
|
"logits/rejected": -1.618194818496704, |
|
"logps/chosen": -3029.74072265625, |
|
"logps/rejected": -2566.05126953125, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7843648195266724, |
|
"rewards/margins": 0.4065024256706238, |
|
"rewards/rejected": 0.3778623044490814, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.990632106300731e-07, |
|
"logits/chosen": -1.6142499446868896, |
|
"logits/rejected": -1.526350975036621, |
|
"logps/chosen": -2914.735107421875, |
|
"logps/rejected": -2487.213623046875, |
|
"loss": 0.7076, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6956815123558044, |
|
"rewards/margins": 0.29298099875450134, |
|
"rewards/rejected": 0.4027004837989807, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.98762827438936e-07, |
|
"logits/chosen": -1.636639952659607, |
|
"logits/rejected": -1.5618512630462646, |
|
"logps/chosen": -2808.490478515625, |
|
"logps/rejected": -2267.61962890625, |
|
"loss": 0.755, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7195907831192017, |
|
"rewards/margins": 0.28500932455062866, |
|
"rewards/rejected": 0.434581458568573, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.98420797695241e-07, |
|
"logits/chosen": -1.7694892883300781, |
|
"logits/rejected": -1.6265687942504883, |
|
"logps/chosen": -2040.9447021484375, |
|
"logps/rejected": -1787.8951416015625, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03115728497505188, |
|
"rewards/margins": 0.17865832149982452, |
|
"rewards/rejected": -0.14750102162361145, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.980371499583729e-07, |
|
"logits/chosen": -1.6505523920059204, |
|
"logits/rejected": -1.5521347522735596, |
|
"logps/chosen": -2404.302490234375, |
|
"logps/rejected": -1731.2249755859375, |
|
"loss": 0.7161, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9065849184989929, |
|
"rewards/margins": 0.35126957297325134, |
|
"rewards/rejected": 0.555315375328064, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.976119162628079e-07, |
|
"logits/chosen": -1.6144253015518188, |
|
"logits/rejected": -1.5651835203170776, |
|
"logps/chosen": -2368.7080078125, |
|
"logps/rejected": -2117.01611328125, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7247565388679504, |
|
"rewards/margins": 0.07497567683458328, |
|
"rewards/rejected": 0.6497808694839478, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.971451321154368e-07, |
|
"logits/chosen": -1.65665602684021, |
|
"logits/rejected": -1.6473827362060547, |
|
"logps/chosen": -2541.382568359375, |
|
"logps/rejected": -2192.03369140625, |
|
"loss": 0.751, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.047736406326294, |
|
"rewards/margins": 0.14227107167243958, |
|
"rewards/rejected": 0.9054654240608215, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -1.5418598651885986, |
|
"eval_logits/rejected": -1.4749985933303833, |
|
"eval_logps/chosen": -2538.240478515625, |
|
"eval_logps/rejected": -2132.940185546875, |
|
"eval_loss": 0.6888701319694519, |
|
"eval_rewards/accuracies": 0.6019999980926514, |
|
"eval_rewards/chosen": 1.3453459739685059, |
|
"eval_rewards/margins": 0.3695811927318573, |
|
"eval_rewards/rejected": 0.9757645726203918, |
|
"eval_runtime": 276.6017, |
|
"eval_samples_per_second": 7.231, |
|
"eval_steps_per_second": 0.452, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.966368364926017e-07, |
|
"logits/chosen": -1.6798099279403687, |
|
"logits/rejected": -1.6286773681640625, |
|
"logps/chosen": -2536.083984375, |
|
"logps/rejected": -2251.199951171875, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9014034271240234, |
|
"rewards/margins": 0.2098127156496048, |
|
"rewards/rejected": 0.6915906667709351, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.960870718368407e-07, |
|
"logits/chosen": -1.5539346933364868, |
|
"logits/rejected": -1.4379912614822388, |
|
"logps/chosen": -3408.21044921875, |
|
"logps/rejected": -2854.2998046875, |
|
"loss": 0.7279, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.7003393769264221, |
|
"rewards/margins": 0.01242439728230238, |
|
"rewards/rejected": 0.6879148483276367, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.954958840533446e-07, |
|
"logits/chosen": -1.5695605278015137, |
|
"logits/rejected": -1.5862843990325928, |
|
"logps/chosen": -2218.409423828125, |
|
"logps/rejected": -2106.035888671875, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5118662118911743, |
|
"rewards/margins": 0.12286017835140228, |
|
"rewards/rejected": 0.3890060782432556, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.948633225061229e-07, |
|
"logits/chosen": -1.6302436590194702, |
|
"logits/rejected": -1.5741255283355713, |
|
"logps/chosen": -3048.370361328125, |
|
"logps/rejected": -2719.57666015625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4593297839164734, |
|
"rewards/margins": 0.29948073625564575, |
|
"rewards/rejected": 0.15984904766082764, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.94189440013883e-07, |
|
"logits/chosen": -1.4814682006835938, |
|
"logits/rejected": -1.423117995262146, |
|
"logps/chosen": -2182.054931640625, |
|
"logps/rejected": -2035.183837890625, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7267901301383972, |
|
"rewards/margins": 0.27085989713668823, |
|
"rewards/rejected": 0.45593029260635376, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.93474292845619e-07, |
|
"logits/chosen": -1.4905208349227905, |
|
"logits/rejected": -1.429957628250122, |
|
"logps/chosen": -2842.08740234375, |
|
"logps/rejected": -2393.443115234375, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.1947301626205444, |
|
"rewards/margins": 0.3226833939552307, |
|
"rewards/rejected": 0.8720466494560242, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.927179407159138e-07, |
|
"logits/chosen": -1.5065213441848755, |
|
"logits/rejected": -1.4873453378677368, |
|
"logps/chosen": -2420.403076171875, |
|
"logps/rejected": -2188.166015625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7575017213821411, |
|
"rewards/margins": 0.20866632461547852, |
|
"rewards/rejected": 0.5488353967666626, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.919204467799522e-07, |
|
"logits/chosen": -1.4751510620117188, |
|
"logits/rejected": -1.531491994857788, |
|
"logps/chosen": -1874.2708740234375, |
|
"logps/rejected": -2106.25830078125, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7245445251464844, |
|
"rewards/margins": 0.1843484789133072, |
|
"rewards/rejected": 0.5401960611343384, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.910818776282485e-07, |
|
"logits/chosen": -1.467524528503418, |
|
"logits/rejected": -1.3987579345703125, |
|
"logps/chosen": -2417.3818359375, |
|
"logps/rejected": -1983.3590087890625, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8371850848197937, |
|
"rewards/margins": 0.24946501851081848, |
|
"rewards/rejected": 0.5877200365066528, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.902023032810858e-07, |
|
"logits/chosen": -1.2535181045532227, |
|
"logits/rejected": -1.306420087814331, |
|
"logps/chosen": -2412.548095703125, |
|
"logps/rejected": -2069.903564453125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7285448312759399, |
|
"rewards/margins": 0.16802072525024414, |
|
"rewards/rejected": 0.5605241060256958, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -1.3380622863769531, |
|
"eval_logits/rejected": -1.2841229438781738, |
|
"eval_logps/chosen": -2588.1318359375, |
|
"eval_logps/rejected": -2176.009033203125, |
|
"eval_loss": 0.6643623113632202, |
|
"eval_rewards/accuracies": 0.621999979019165, |
|
"eval_rewards/chosen": 0.8464368581771851, |
|
"eval_rewards/margins": 0.30136004090309143, |
|
"eval_rewards/rejected": 0.5450767874717712, |
|
"eval_runtime": 271.1854, |
|
"eval_samples_per_second": 7.375, |
|
"eval_steps_per_second": 0.461, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.892817971826687e-07, |
|
"logits/chosen": -1.4630873203277588, |
|
"logits/rejected": -1.3637323379516602, |
|
"logps/chosen": -3049.18017578125, |
|
"logps/rejected": -2472.287353515625, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8837827444076538, |
|
"rewards/margins": 0.23291194438934326, |
|
"rewards/rejected": 0.6508709192276001, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.883204361949916e-07, |
|
"logits/chosen": -1.4127538204193115, |
|
"logits/rejected": -1.3411035537719727, |
|
"logps/chosen": -2637.56494140625, |
|
"logps/rejected": -2205.36328125, |
|
"loss": 0.7038, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8256186246871948, |
|
"rewards/margins": 0.11079633235931396, |
|
"rewards/rejected": 0.7148222923278809, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.873183005914202e-07, |
|
"logits/chosen": -1.4629735946655273, |
|
"logits/rejected": -1.4857399463653564, |
|
"logps/chosen": -2502.25732421875, |
|
"logps/rejected": -2328.2666015625, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7244993448257446, |
|
"rewards/margins": 0.07214044034481049, |
|
"rewards/rejected": 0.652358889579773, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.86275474049989e-07, |
|
"logits/chosen": -1.4567070007324219, |
|
"logits/rejected": -1.3931400775909424, |
|
"logps/chosen": -2890.63671875, |
|
"logps/rejected": -2213.41455078125, |
|
"loss": 0.7202, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.1594771146774292, |
|
"rewards/margins": 0.5113081932067871, |
|
"rewards/rejected": 0.6481689214706421, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.851920436464144e-07, |
|
"logits/chosen": -1.3868637084960938, |
|
"logits/rejected": -1.364241361618042, |
|
"logps/chosen": -2136.292236328125, |
|
"logps/rejected": -1974.369384765625, |
|
"loss": 0.729, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.8055019378662109, |
|
"rewards/margins": 0.1073065996170044, |
|
"rewards/rejected": 0.6981953978538513, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.840680998468231e-07, |
|
"logits/chosen": -1.3056769371032715, |
|
"logits/rejected": -1.2759480476379395, |
|
"logps/chosen": -2620.52880859375, |
|
"logps/rejected": -2473.78369140625, |
|
"loss": 0.777, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.027517640963196754, |
|
"rewards/margins": 0.15573057532310486, |
|
"rewards/rejected": -0.12821291387081146, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.82903736500199e-07, |
|
"logits/chosen": -1.4665504693984985, |
|
"logits/rejected": -1.394852638244629, |
|
"logps/chosen": -2662.813720703125, |
|
"logps/rejected": -2131.2236328125, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3333565294742584, |
|
"rewards/margins": 0.1078319326043129, |
|
"rewards/rejected": 0.22552458941936493, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.81699050830546e-07, |
|
"logits/chosen": -1.7163680791854858, |
|
"logits/rejected": -1.6032377481460571, |
|
"logps/chosen": -2856.134033203125, |
|
"logps/rejected": -2047.651611328125, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.5594689846038818, |
|
"rewards/margins": 0.5581027865409851, |
|
"rewards/rejected": 1.001366138458252, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.804541434287716e-07, |
|
"logits/chosen": -1.684930443763733, |
|
"logits/rejected": -1.6162173748016357, |
|
"logps/chosen": -2177.42919921875, |
|
"logps/rejected": -2031.7994384765625, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.99462890625, |
|
"rewards/margins": 0.3446377217769623, |
|
"rewards/rejected": 0.6499910950660706, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.791691182442852e-07, |
|
"logits/chosen": -1.5711638927459717, |
|
"logits/rejected": -1.599442958831787, |
|
"logps/chosen": -2674.912353515625, |
|
"logps/rejected": -2773.044189453125, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8802645802497864, |
|
"rewards/margins": 0.1743774712085724, |
|
"rewards/rejected": 0.7058870792388916, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": -1.4816925525665283, |
|
"eval_logits/rejected": -1.4526340961456299, |
|
"eval_logps/chosen": -2590.2763671875, |
|
"eval_logps/rejected": -2182.556640625, |
|
"eval_loss": 0.6724444627761841, |
|
"eval_rewards/accuracies": 0.6420000195503235, |
|
"eval_rewards/chosen": 0.8249886631965637, |
|
"eval_rewards/margins": 0.34538939595222473, |
|
"eval_rewards/rejected": 0.4795991778373718, |
|
"eval_runtime": 276.6062, |
|
"eval_samples_per_second": 7.23, |
|
"eval_steps_per_second": 0.452, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.7784408257632e-07, |
|
"logits/chosen": -1.5237572193145752, |
|
"logits/rejected": -1.545637845993042, |
|
"logps/chosen": -1991.960693359375, |
|
"logps/rejected": -2176.53271484375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6580592393875122, |
|
"rewards/margins": 0.20243236422538757, |
|
"rewards/rejected": 0.455626904964447, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.764791470649727e-07, |
|
"logits/chosen": -1.5924136638641357, |
|
"logits/rejected": -1.5551161766052246, |
|
"logps/chosen": -1903.432373046875, |
|
"logps/rejected": -1853.7086181640625, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.7805501222610474, |
|
"rewards/margins": 0.07067215442657471, |
|
"rewards/rejected": 0.7098779678344727, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.750744256819658e-07, |
|
"logits/chosen": -1.620234727859497, |
|
"logits/rejected": -1.572274088859558, |
|
"logps/chosen": -2286.07470703125, |
|
"logps/rejected": -2140.6611328125, |
|
"loss": 0.7207, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7918757200241089, |
|
"rewards/margins": -0.18175740540027618, |
|
"rewards/rejected": 0.9736331701278687, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.736300357211307e-07, |
|
"logits/chosen": -1.7339751720428467, |
|
"logits/rejected": -1.6665589809417725, |
|
"logps/chosen": -2919.00537109375, |
|
"logps/rejected": -2452.6064453125, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.843177318572998, |
|
"rewards/margins": 0.30998367071151733, |
|
"rewards/rejected": 0.5331936478614807, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.721460977886135e-07, |
|
"logits/chosen": -1.6689131259918213, |
|
"logits/rejected": -1.630812644958496, |
|
"logps/chosen": -2635.65673828125, |
|
"logps/rejected": -2379.21826171875, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.748379647731781, |
|
"rewards/margins": 0.33323729038238525, |
|
"rewards/rejected": 0.4151424467563629, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.706227357928043e-07, |
|
"logits/chosen": -1.6714589595794678, |
|
"logits/rejected": -1.5527595281600952, |
|
"logps/chosen": -2750.80908203125, |
|
"logps/rejected": -2120.307373046875, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7476462125778198, |
|
"rewards/margins": 0.1452573537826538, |
|
"rewards/rejected": 0.6023889183998108, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.690600769339914e-07, |
|
"logits/chosen": -1.5319817066192627, |
|
"logits/rejected": -1.5909537076950073, |
|
"logps/chosen": -2279.741943359375, |
|
"logps/rejected": -2419.86572265625, |
|
"loss": 0.7147, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.177549958229065, |
|
"rewards/margins": 0.08217627555131912, |
|
"rewards/rejected": 1.0953737497329712, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.6745825169374e-07, |
|
"logits/chosen": -1.6878665685653687, |
|
"logits/rejected": -1.612125039100647, |
|
"logps/chosen": -2866.05712890625, |
|
"logps/rejected": -2377.9013671875, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9150659441947937, |
|
"rewards/margins": 0.4293249547481537, |
|
"rewards/rejected": 0.4857410788536072, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.658173938239966e-07, |
|
"logits/chosen": -1.6150919198989868, |
|
"logits/rejected": -1.576047420501709, |
|
"logps/chosen": -2437.158935546875, |
|
"logps/rejected": -1969.7994384765625, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.29879194498062134, |
|
"rewards/margins": 0.09543965756893158, |
|
"rewards/rejected": 0.20335224270820618, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.64137640335921e-07, |
|
"logits/chosen": -1.7476589679718018, |
|
"logits/rejected": -1.7255117893218994, |
|
"logps/chosen": -2327.221923828125, |
|
"logps/rejected": -2246.793701171875, |
|
"loss": 0.8109, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8766447901725769, |
|
"rewards/margins": 0.11921733617782593, |
|
"rewards/rejected": 0.7574275732040405, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.576106309890747, |
|
"eval_logits/rejected": -1.5267276763916016, |
|
"eval_logps/chosen": -2557.870849609375, |
|
"eval_logps/rejected": -2155.783203125, |
|
"eval_loss": 0.6654534339904785, |
|
"eval_rewards/accuracies": 0.6380000114440918, |
|
"eval_rewards/chosen": 1.1490436792373657, |
|
"eval_rewards/margins": 0.40170982480049133, |
|
"eval_rewards/rejected": 0.7473338842391968, |
|
"eval_runtime": 276.556, |
|
"eval_samples_per_second": 7.232, |
|
"eval_steps_per_second": 0.452, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.624191314884461e-07, |
|
"logits/chosen": -1.592280626296997, |
|
"logits/rejected": -1.648185133934021, |
|
"logps/chosen": -2357.55517578125, |
|
"logps/rejected": -2246.37744140625, |
|
"loss": 0.8166, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.5510924458503723, |
|
"rewards/margins": -0.10927625000476837, |
|
"rewards/rejected": 0.6603686809539795, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.606620107765662e-07, |
|
"logits/chosen": -1.6358171701431274, |
|
"logits/rejected": -1.5666896104812622, |
|
"logps/chosen": -2290.347900390625, |
|
"logps/rejected": -2105.61474609375, |
|
"loss": 0.73, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.5445422530174255, |
|
"rewards/margins": 0.03851698711514473, |
|
"rewards/rejected": 0.5060251951217651, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.58866424919355e-07, |
|
"logits/chosen": -1.5668513774871826, |
|
"logits/rejected": -1.510426640510559, |
|
"logps/chosen": -2239.77783203125, |
|
"logps/rejected": -1951.4498291015625, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8219590187072754, |
|
"rewards/margins": 0.4823435842990875, |
|
"rewards/rejected": 0.3396154046058655, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.570325238477148e-07, |
|
"logits/chosen": -1.5893141031265259, |
|
"logits/rejected": -1.4693152904510498, |
|
"logps/chosen": -2884.8115234375, |
|
"logps/rejected": -2526.07763671875, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.786440372467041, |
|
"rewards/margins": 0.3468918800354004, |
|
"rewards/rejected": 0.4395485520362854, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.551604606918575e-07, |
|
"logits/chosen": -1.6071548461914062, |
|
"logits/rejected": -1.5999706983566284, |
|
"logps/chosen": -2922.397705078125, |
|
"logps/rejected": -2410.974365234375, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.24767303466796875, |
|
"rewards/margins": 0.08863957971334457, |
|
"rewards/rejected": 0.1590333878993988, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.532503917685178e-07, |
|
"logits/chosen": -1.5351760387420654, |
|
"logits/rejected": -1.479089379310608, |
|
"logps/chosen": -2803.19140625, |
|
"logps/rejected": -2390.5615234375, |
|
"loss": 0.6987, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.1132092475891113, |
|
"rewards/margins": 0.5485633611679077, |
|
"rewards/rejected": 0.5646459460258484, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.513024765679012e-07, |
|
"logits/chosen": -1.6146430969238281, |
|
"logits/rejected": -1.5676028728485107, |
|
"logps/chosen": -2616.34716796875, |
|
"logps/rejected": -2277.76611328125, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.0030187368392944, |
|
"rewards/margins": 0.23817987740039825, |
|
"rewards/rejected": 0.7648389339447021, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.493168777403662e-07, |
|
"logits/chosen": -1.543965220451355, |
|
"logits/rejected": -1.5248345136642456, |
|
"logps/chosen": -2174.06982421875, |
|
"logps/rejected": -2065.94140625, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7886601686477661, |
|
"rewards/margins": 0.3570699691772461, |
|
"rewards/rejected": 0.4315902590751648, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.472937610828436e-07, |
|
"logits/chosen": -1.489497423171997, |
|
"logits/rejected": -1.5204612016677856, |
|
"logps/chosen": -2214.174560546875, |
|
"logps/rejected": -2092.203125, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5309593081474304, |
|
"rewards/margins": 0.28473734855651855, |
|
"rewards/rejected": 0.24622204899787903, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.452332955249919e-07, |
|
"logits/chosen": -1.4669028520584106, |
|
"logits/rejected": -1.3905723094940186, |
|
"logps/chosen": -2331.98046875, |
|
"logps/rejected": -2148.8232421875, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7474310994148254, |
|
"rewards/margins": 0.1214209794998169, |
|
"rewards/rejected": 0.6260100603103638, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -1.4909840822219849, |
|
"eval_logits/rejected": -1.4486072063446045, |
|
"eval_logps/chosen": -2530.19140625, |
|
"eval_logps/rejected": -2130.6240234375, |
|
"eval_loss": 0.6835622787475586, |
|
"eval_rewards/accuracies": 0.6159999966621399, |
|
"eval_rewards/chosen": 1.4258359670639038, |
|
"eval_rewards/margins": 0.4269082844257355, |
|
"eval_rewards/rejected": 0.998927652835846, |
|
"eval_runtime": 274.4345, |
|
"eval_samples_per_second": 7.288, |
|
"eval_steps_per_second": 0.455, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.431356531150925e-07, |
|
"logits/chosen": -1.6749414205551147, |
|
"logits/rejected": -1.6013925075531006, |
|
"logps/chosen": -2510.5146484375, |
|
"logps/rejected": -2046.6390380859375, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.2002885341644287, |
|
"rewards/margins": 0.3314119875431061, |
|
"rewards/rejected": 0.8688764572143555, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.410010090056828e-07, |
|
"logits/chosen": -1.4817672967910767, |
|
"logits/rejected": -1.4551513195037842, |
|
"logps/chosen": -2566.467041015625, |
|
"logps/rejected": -2493.96630859375, |
|
"loss": 0.8313, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.86517333984375, |
|
"rewards/margins": 0.3244161009788513, |
|
"rewards/rejected": 0.5407571792602539, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.388295414389318e-07, |
|
"logits/chosen": -1.3311041593551636, |
|
"logits/rejected": -1.3033561706542969, |
|
"logps/chosen": -2344.6435546875, |
|
"logps/rejected": -1979.4986572265625, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.541279673576355, |
|
"rewards/margins": 0.37267619371414185, |
|
"rewards/rejected": 0.16860340535640717, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.366214317317562e-07, |
|
"logits/chosen": -1.4732763767242432, |
|
"logits/rejected": -1.4141404628753662, |
|
"logps/chosen": -2585.295166015625, |
|
"logps/rejected": -2203.73681640625, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7276701927185059, |
|
"rewards/margins": 0.4391177296638489, |
|
"rewards/rejected": 0.28855252265930176, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.343768642606813e-07, |
|
"logits/chosen": -1.6381938457489014, |
|
"logits/rejected": -1.5806336402893066, |
|
"logps/chosen": -2596.53662109375, |
|
"logps/rejected": -2025.1402587890625, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.1860891580581665, |
|
"rewards/margins": 0.20563539862632751, |
|
"rewards/rejected": 0.9804538488388062, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.320960264464448e-07, |
|
"logits/chosen": -1.6365054845809937, |
|
"logits/rejected": -1.6070477962493896, |
|
"logps/chosen": -1896.517822265625, |
|
"logps/rejected": -1744.9036865234375, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.994465172290802, |
|
"rewards/margins": 0.32086846232414246, |
|
"rewards/rejected": 0.6735965013504028, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.29779108738348e-07, |
|
"logits/chosen": -1.7710649967193604, |
|
"logits/rejected": -1.715428113937378, |
|
"logps/chosen": -2846.1181640625, |
|
"logps/rejected": -2703.55029296875, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7387205362319946, |
|
"rewards/margins": 0.45319658517837524, |
|
"rewards/rejected": 0.2855239510536194, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.274263045983528e-07, |
|
"logits/chosen": -1.702950119972229, |
|
"logits/rejected": -1.728872299194336, |
|
"logps/chosen": -2756.061767578125, |
|
"logps/rejected": -2911.74658203125, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.11702828109264374, |
|
"rewards/margins": 0.2605035901069641, |
|
"rewards/rejected": -0.14347527921199799, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.250378104849275e-07, |
|
"logits/chosen": -1.6368192434310913, |
|
"logits/rejected": -1.5715930461883545, |
|
"logps/chosen": -2486.092041015625, |
|
"logps/rejected": -2241.58984375, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.089656114578247, |
|
"rewards/margins": 0.5296434164047241, |
|
"rewards/rejected": 0.560012698173523, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.226138258366436e-07, |
|
"logits/chosen": -1.5675978660583496, |
|
"logits/rejected": -1.5653481483459473, |
|
"logps/chosen": -2241.11865234375, |
|
"logps/rejected": -2163.8515625, |
|
"loss": 0.7027, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.998638927936554, |
|
"rewards/margins": 0.19615396857261658, |
|
"rewards/rejected": 0.8024848699569702, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.5565454959869385, |
|
"eval_logits/rejected": -1.5094586610794067, |
|
"eval_logps/chosen": -2591.25048828125, |
|
"eval_logps/rejected": -2183.227783203125, |
|
"eval_loss": 0.6689639091491699, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": 0.8152462244033813, |
|
"eval_rewards/margins": 0.34235623478889465, |
|
"eval_rewards/rejected": 0.47289004921913147, |
|
"eval_runtime": 273.7847, |
|
"eval_samples_per_second": 7.305, |
|
"eval_steps_per_second": 0.457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.201545530555213e-07, |
|
"logits/chosen": -1.6780484914779663, |
|
"logits/rejected": -1.7151432037353516, |
|
"logps/chosen": -2156.362060546875, |
|
"logps/rejected": -2084.3037109375, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6366477012634277, |
|
"rewards/margins": 0.6162145137786865, |
|
"rewards/rejected": 0.020433183759450912, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.176601974901304e-07, |
|
"logits/chosen": -1.624211072921753, |
|
"logits/rejected": -1.633967638015747, |
|
"logps/chosen": -2648.594970703125, |
|
"logps/rejected": -2590.938720703125, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.47067689895629883, |
|
"rewards/margins": 0.21159331500530243, |
|
"rewards/rejected": 0.2590835690498352, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.151309674184427e-07, |
|
"logits/chosen": -1.6630769968032837, |
|
"logits/rejected": -1.5527112483978271, |
|
"logps/chosen": -2740.18212890625, |
|
"logps/rejected": -1866.758544921875, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5968760848045349, |
|
"rewards/margins": 0.35547563433647156, |
|
"rewards/rejected": 0.24140043556690216, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.125670740304409e-07, |
|
"logits/chosen": -1.5219511985778809, |
|
"logits/rejected": -1.4647961854934692, |
|
"logps/chosen": -2756.6201171875, |
|
"logps/rejected": -2311.59423828125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.1456257104873657, |
|
"rewards/margins": 0.25537100434303284, |
|
"rewards/rejected": 0.8902546763420105, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.099687314104858e-07, |
|
"logits/chosen": -1.6184484958648682, |
|
"logits/rejected": -1.5983049869537354, |
|
"logps/chosen": -2442.0712890625, |
|
"logps/rejected": -2581.9248046875, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.6723107099533081, |
|
"rewards/margins": 0.27203884720802307, |
|
"rewards/rejected": 0.40027180314064026, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.073361565194381e-07, |
|
"logits/chosen": -1.5782119035720825, |
|
"logits/rejected": -1.6148672103881836, |
|
"logps/chosen": -2074.51611328125, |
|
"logps/rejected": -2046.3336181640625, |
|
"loss": 0.6968, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2032502144575119, |
|
"rewards/margins": 0.15900087356567383, |
|
"rewards/rejected": 0.04424933344125748, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.046695691765435e-07, |
|
"logits/chosen": -1.7396224737167358, |
|
"logits/rejected": -1.7009761333465576, |
|
"logps/chosen": -2369.361328125, |
|
"logps/rejected": -2038.233642578125, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.712352991104126, |
|
"rewards/margins": 0.39164695143699646, |
|
"rewards/rejected": 0.32070595026016235, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.019691920410778e-07, |
|
"logits/chosen": -1.8201916217803955, |
|
"logits/rejected": -1.8124058246612549, |
|
"logps/chosen": -2379.390869140625, |
|
"logps/rejected": -2190.876708984375, |
|
"loss": 0.7336, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.1395015716552734, |
|
"rewards/margins": 0.2840576171875, |
|
"rewards/rejected": 0.855444073677063, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.992352505937547e-07, |
|
"logits/chosen": -1.7162584066390991, |
|
"logits/rejected": -1.6304266452789307, |
|
"logps/chosen": -2344.68701171875, |
|
"logps/rejected": -1602.481201171875, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6249530911445618, |
|
"rewards/margins": 0.44733700156211853, |
|
"rewards/rejected": 0.17761602997779846, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.964679731178984e-07, |
|
"logits/chosen": -1.625732183456421, |
|
"logits/rejected": -1.5899415016174316, |
|
"logps/chosen": -2034.600341796875, |
|
"logps/rejected": -1786.969482421875, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.20644518733024597, |
|
"rewards/margins": 0.36025696992874146, |
|
"rewards/rejected": -0.15381178259849548, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -1.5785434246063232, |
|
"eval_logits/rejected": -1.5381590127944946, |
|
"eval_logps/chosen": -2619.966064453125, |
|
"eval_logps/rejected": -2211.10400390625, |
|
"eval_loss": 0.6512665748596191, |
|
"eval_rewards/accuracies": 0.6639999747276306, |
|
"eval_rewards/chosen": 0.5280923843383789, |
|
"eval_rewards/margins": 0.33396461606025696, |
|
"eval_rewards/rejected": 0.19412773847579956, |
|
"eval_runtime": 273.9824, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.936675906803815e-07, |
|
"logits/chosen": -1.623098373413086, |
|
"logits/rejected": -1.5492380857467651, |
|
"logps/chosen": -3145.0009765625, |
|
"logps/rejected": -2464.873046875, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9549194574356079, |
|
"rewards/margins": 0.5628315210342407, |
|
"rewards/rejected": 0.3920879065990448, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.908343371123319e-07, |
|
"logits/chosen": -1.7886734008789062, |
|
"logits/rejected": -1.7371702194213867, |
|
"logps/chosen": -2213.9833984375, |
|
"logps/rejected": -2165.156982421875, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.4289758801460266, |
|
"rewards/margins": -0.01336582936346531, |
|
"rewards/rejected": 0.4423416554927826, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.879684489896071e-07, |
|
"logits/chosen": -1.9691905975341797, |
|
"logits/rejected": -1.9757953882217407, |
|
"logps/chosen": -2335.632568359375, |
|
"logps/rejected": -2261.8828125, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.7513018846511841, |
|
"rewards/margins": 0.29577815532684326, |
|
"rewards/rejected": 0.4555237889289856, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.850701656130407e-07, |
|
"logits/chosen": -1.9100147485733032, |
|
"logits/rejected": -1.8400824069976807, |
|
"logps/chosen": -2170.811279296875, |
|
"logps/rejected": -1717.5797119140625, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8315251469612122, |
|
"rewards/margins": 0.4699254631996155, |
|
"rewards/rejected": 0.3615996241569519, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.821397289884605e-07, |
|
"logits/chosen": -1.7558555603027344, |
|
"logits/rejected": -1.709307074546814, |
|
"logps/chosen": -2761.39501953125, |
|
"logps/rejected": -2207.11669921875, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.443568468093872, |
|
"rewards/margins": 0.5788078308105469, |
|
"rewards/rejected": 0.8647607564926147, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.791773838064811e-07, |
|
"logits/chosen": -1.928342580795288, |
|
"logits/rejected": -1.8991355895996094, |
|
"logps/chosen": -2210.54248046875, |
|
"logps/rejected": -2129.330078125, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5884748697280884, |
|
"rewards/margins": 0.13398051261901855, |
|
"rewards/rejected": 0.45449432730674744, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.76183377422073e-07, |
|
"logits/chosen": -1.8995704650878906, |
|
"logits/rejected": -1.920475721359253, |
|
"logps/chosen": -2605.925048828125, |
|
"logps/rejected": -2576.642578125, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7022437453269958, |
|
"rewards/margins": 0.12484090030193329, |
|
"rewards/rejected": 0.5774028897285461, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.731579598339079e-07, |
|
"logits/chosen": -1.8158420324325562, |
|
"logits/rejected": -1.8012596368789673, |
|
"logps/chosen": -2187.33447265625, |
|
"logps/rejected": -1711.1923828125, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.6456224918365479, |
|
"rewards/margins": 0.084513820707798, |
|
"rewards/rejected": 0.5611085891723633, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.701013836634832e-07, |
|
"logits/chosen": -1.7981412410736084, |
|
"logits/rejected": -1.6974513530731201, |
|
"logps/chosen": -2500.97412109375, |
|
"logps/rejected": -2135.02880859375, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.2788394689559937, |
|
"rewards/margins": 0.4392651915550232, |
|
"rewards/rejected": 0.8395741581916809, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.670139041340298e-07, |
|
"logits/chosen": -1.8434431552886963, |
|
"logits/rejected": -1.789004921913147, |
|
"logps/chosen": -2898.79150390625, |
|
"logps/rejected": -2346.39208984375, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.0067036151885986, |
|
"rewards/margins": 0.2750965654850006, |
|
"rewards/rejected": 0.7316070795059204, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.6764802932739258, |
|
"eval_logits/rejected": -1.6345183849334717, |
|
"eval_logps/chosen": -2599.054443359375, |
|
"eval_logps/rejected": -2196.55810546875, |
|
"eval_loss": 0.6436493396759033, |
|
"eval_rewards/accuracies": 0.6460000276565552, |
|
"eval_rewards/chosen": 0.7372069954872131, |
|
"eval_rewards/margins": 0.3976210653781891, |
|
"eval_rewards/rejected": 0.33958590030670166, |
|
"eval_runtime": 274.6861, |
|
"eval_samples_per_second": 7.281, |
|
"eval_steps_per_second": 0.455, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.638957790491998e-07, |
|
"logits/chosen": -1.8298311233520508, |
|
"logits/rejected": -1.7556016445159912, |
|
"logps/chosen": -2440.884765625, |
|
"logps/rejected": -2195.47705078125, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4565182626247406, |
|
"rewards/margins": 0.3322807252407074, |
|
"rewards/rejected": 0.1242375373840332, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.607472687715407e-07, |
|
"logits/chosen": -1.7025740146636963, |
|
"logits/rejected": -1.6857258081436157, |
|
"logps/chosen": -2362.98388671875, |
|
"logps/rejected": -2594.92822265625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03539573401212692, |
|
"rewards/margins": 0.22561678290367126, |
|
"rewards/rejected": -0.19022107124328613, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.575686362007543e-07, |
|
"logits/chosen": -1.9242517948150635, |
|
"logits/rejected": -1.82456374168396, |
|
"logps/chosen": -2176.520751953125, |
|
"logps/rejected": -1710.506591796875, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9000527262687683, |
|
"rewards/margins": 0.34358882904052734, |
|
"rewards/rejected": 0.5564638376235962, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.543601467517459e-07, |
|
"logits/chosen": -1.850541114807129, |
|
"logits/rejected": -1.7285171747207642, |
|
"logps/chosen": -2746.185302734375, |
|
"logps/rejected": -2296.318359375, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.3377187252044678, |
|
"rewards/margins": 0.5021657347679138, |
|
"rewards/rejected": 0.835552990436554, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.511220683324607e-07, |
|
"logits/chosen": -1.6543235778808594, |
|
"logits/rejected": -1.5612728595733643, |
|
"logps/chosen": -2371.774658203125, |
|
"logps/rejected": -1993.567626953125, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.1099433898925781, |
|
"rewards/margins": 0.45349279046058655, |
|
"rewards/rejected": 0.656450629234314, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.478546713215151e-07, |
|
"logits/chosen": -1.630977988243103, |
|
"logits/rejected": -1.6222995519638062, |
|
"logps/chosen": -2651.537353515625, |
|
"logps/rejected": -2243.670166015625, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9021153450012207, |
|
"rewards/margins": 0.5950330495834351, |
|
"rewards/rejected": 0.3070824146270752, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.445582285456195e-07, |
|
"logits/chosen": -1.564117193222046, |
|
"logits/rejected": -1.4608399868011475, |
|
"logps/chosen": -2718.778076171875, |
|
"logps/rejected": -2309.786376953125, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9259850382804871, |
|
"rewards/margins": 0.6184819340705872, |
|
"rewards/rejected": 0.3075031638145447, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.412330152567964e-07, |
|
"logits/chosen": -1.4873427152633667, |
|
"logits/rejected": -1.4943621158599854, |
|
"logps/chosen": -2755.173828125, |
|
"logps/rejected": -2068.41455078125, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9072205424308777, |
|
"rewards/margins": 0.3468925952911377, |
|
"rewards/rejected": 0.56032794713974, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.378793091093989e-07, |
|
"logits/chosen": -1.7745788097381592, |
|
"logits/rejected": -1.6726099252700806, |
|
"logps/chosen": -2075.09521484375, |
|
"logps/rejected": -1817.7054443359375, |
|
"loss": 0.7009, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.997990608215332, |
|
"rewards/margins": 0.39851805567741394, |
|
"rewards/rejected": 0.5994727611541748, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.344973901369252e-07, |
|
"logits/chosen": -1.7127208709716797, |
|
"logits/rejected": -1.6906402111053467, |
|
"logps/chosen": -1995.653564453125, |
|
"logps/rejected": -1904.506103515625, |
|
"loss": 0.7365, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.5294217467308044, |
|
"rewards/margins": -0.03213152289390564, |
|
"rewards/rejected": 0.5615532398223877, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -1.6008652448654175, |
|
"eval_logits/rejected": -1.5596896409988403, |
|
"eval_logps/chosen": -2580.944580078125, |
|
"eval_logps/rejected": -2178.24365234375, |
|
"eval_loss": 0.6399799585342407, |
|
"eval_rewards/accuracies": 0.6240000128746033, |
|
"eval_rewards/chosen": 0.9183096885681152, |
|
"eval_rewards/margins": 0.395578533411026, |
|
"eval_rewards/rejected": 0.5227311849594116, |
|
"eval_runtime": 279.2878, |
|
"eval_samples_per_second": 7.161, |
|
"eval_steps_per_second": 0.448, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.310875407286363e-07, |
|
"logits/chosen": -1.5962960720062256, |
|
"logits/rejected": -1.6336969137191772, |
|
"logps/chosen": -2335.583984375, |
|
"logps/rejected": -2098.15625, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8231102228164673, |
|
"rewards/margins": 0.4199337363243103, |
|
"rewards/rejected": 0.4031763970851898, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.276500456059762e-07, |
|
"logits/chosen": -1.6020572185516357, |
|
"logits/rejected": -1.524186611175537, |
|
"logps/chosen": -2809.671142578125, |
|
"logps/rejected": -1945.8929443359375, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.7115261554718018, |
|
"rewards/margins": 0.7415082454681396, |
|
"rewards/rejected": -0.02998208999633789, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.241851917987987e-07, |
|
"logits/chosen": -1.5716346502304077, |
|
"logits/rejected": -1.5255610942840576, |
|
"logps/chosen": -2458.597900390625, |
|
"logps/rejected": -2043.8544921875, |
|
"loss": 0.698, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.652920663356781, |
|
"rewards/margins": 0.3171829283237457, |
|
"rewards/rejected": 0.3357377350330353, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.206932686213996e-07, |
|
"logits/chosen": -1.6627728939056396, |
|
"logits/rejected": -1.5985891819000244, |
|
"logps/chosen": -2899.52587890625, |
|
"logps/rejected": -1978.255859375, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4774368703365326, |
|
"rewards/margins": 0.4220924973487854, |
|
"rewards/rejected": 0.055344413965940475, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.171745676483592e-07, |
|
"logits/chosen": -1.6537669897079468, |
|
"logits/rejected": -1.6453883647918701, |
|
"logps/chosen": -2317.5439453125, |
|
"logps/rejected": -2154.13525390625, |
|
"loss": 0.734, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.5170091986656189, |
|
"rewards/margins": 0.14945654571056366, |
|
"rewards/rejected": 0.3675526976585388, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.13629382690196e-07, |
|
"logits/chosen": -1.796841025352478, |
|
"logits/rejected": -1.765019416809082, |
|
"logps/chosen": -2391.85205078125, |
|
"logps/rejected": -2199.541015625, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6095671057701111, |
|
"rewards/margins": 0.15716035664081573, |
|
"rewards/rejected": 0.45240673422813416, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.100580097688341e-07, |
|
"logits/chosen": -1.688180923461914, |
|
"logits/rejected": -1.69215989112854, |
|
"logps/chosen": -3190.982177734375, |
|
"logps/rejected": -2641.860595703125, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.130110502243042, |
|
"rewards/margins": 0.5017646551132202, |
|
"rewards/rejected": 0.6283458471298218, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.064607470928844e-07, |
|
"logits/chosen": -1.8125782012939453, |
|
"logits/rejected": -1.709834337234497, |
|
"logps/chosen": -2791.87841796875, |
|
"logps/rejected": -1950.5830078125, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7538470029830933, |
|
"rewards/margins": 0.3855181336402893, |
|
"rewards/rejected": 0.36832886934280396, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.028378950327452e-07, |
|
"logits/chosen": -1.8029365539550781, |
|
"logits/rejected": -1.725874900817871, |
|
"logps/chosen": -2617.264892578125, |
|
"logps/rejected": -2055.29296875, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6537911295890808, |
|
"rewards/margins": 0.3229553699493408, |
|
"rewards/rejected": 0.33083575963974, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.99189756095521e-07, |
|
"logits/chosen": -1.794398546218872, |
|
"logits/rejected": -1.8133939504623413, |
|
"logps/chosen": -2457.388671875, |
|
"logps/rejected": -2300.2109375, |
|
"loss": 0.7057, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.7108521461486816, |
|
"rewards/margins": 0.028808236122131348, |
|
"rewards/rejected": 0.6820439100265503, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.7116947174072266, |
|
"eval_logits/rejected": -1.671615719795227, |
|
"eval_logps/chosen": -2577.6376953125, |
|
"eval_logps/rejected": -2174.325439453125, |
|
"eval_loss": 0.6468178033828735, |
|
"eval_rewards/accuracies": 0.6140000224113464, |
|
"eval_rewards/chosen": 0.9513765573501587, |
|
"eval_rewards/margins": 0.38946446776390076, |
|
"eval_rewards/rejected": 0.5619121193885803, |
|
"eval_runtime": 276.3765, |
|
"eval_samples_per_second": 7.237, |
|
"eval_steps_per_second": 0.452, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.955166348997632e-07, |
|
"logits/chosen": -1.8387196063995361, |
|
"logits/rejected": -1.8379266262054443, |
|
"logps/chosen": -2603.5302734375, |
|
"logps/rejected": -2606.268798828125, |
|
"loss": 0.7209, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.6424747109413147, |
|
"rewards/margins": -0.08023136854171753, |
|
"rewards/rejected": 0.7227060198783875, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.918188381500343e-07, |
|
"logits/chosen": -1.763655424118042, |
|
"logits/rejected": -1.834721326828003, |
|
"logps/chosen": -2435.61669921875, |
|
"logps/rejected": -2525.558349609375, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5239884853363037, |
|
"rewards/margins": 0.1298888623714447, |
|
"rewards/rejected": 0.3940996527671814, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.880966746112995e-07, |
|
"logits/chosen": -1.7425800561904907, |
|
"logits/rejected": -1.7341902256011963, |
|
"logps/chosen": -2153.57861328125, |
|
"logps/rejected": -2187.4521484375, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.2758369743824005, |
|
"rewards/margins": 0.08724844455718994, |
|
"rewards/rejected": 0.18858852982521057, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.843504550831423e-07, |
|
"logits/chosen": -1.8196338415145874, |
|
"logits/rejected": -1.8200260400772095, |
|
"logps/chosen": -2192.01513671875, |
|
"logps/rejected": -2054.28369140625, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.20235542953014374, |
|
"rewards/margins": 0.17298252880573273, |
|
"rewards/rejected": 0.029372822493314743, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.805804923738157e-07, |
|
"logits/chosen": -1.7764650583267212, |
|
"logits/rejected": -1.7625634670257568, |
|
"logps/chosen": -2062.028564453125, |
|
"logps/rejected": -2072.051025390625, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.46689948439598083, |
|
"rewards/margins": 0.057696618139743805, |
|
"rewards/rejected": 0.4092028737068176, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.76787101274121e-07, |
|
"logits/chosen": -1.5717532634735107, |
|
"logits/rejected": -1.531313180923462, |
|
"logps/chosen": -2799.623779296875, |
|
"logps/rejected": -2392.739501953125, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6438823938369751, |
|
"rewards/margins": 0.3187286853790283, |
|
"rewards/rejected": 0.32515376806259155, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.729705985311232e-07, |
|
"logits/chosen": -1.8854055404663086, |
|
"logits/rejected": -1.782091736793518, |
|
"logps/chosen": -2639.52099609375, |
|
"logps/rejected": -2122.951416015625, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5428428053855896, |
|
"rewards/margins": 0.35903045535087585, |
|
"rewards/rejected": 0.18381235003471375, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.69131302821703e-07, |
|
"logits/chosen": -1.6489614248275757, |
|
"logits/rejected": -1.6407349109649658, |
|
"logps/chosen": -2218.22607421875, |
|
"logps/rejected": -2045.851318359375, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8161298036575317, |
|
"rewards/margins": 0.3229297399520874, |
|
"rewards/rejected": 0.49320006370544434, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.652695347259475e-07, |
|
"logits/chosen": -1.7384567260742188, |
|
"logits/rejected": -1.7179895639419556, |
|
"logps/chosen": -2210.828369140625, |
|
"logps/rejected": -1787.0751953125, |
|
"loss": 0.714, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6207581758499146, |
|
"rewards/margins": 0.25422900915145874, |
|
"rewards/rejected": 0.36652907729148865, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.613856167003811e-07, |
|
"logits/chosen": -1.7387768030166626, |
|
"logits/rejected": -1.7393105030059814, |
|
"logps/chosen": -2618.2705078125, |
|
"logps/rejected": -2462.711669921875, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.7113048434257507, |
|
"rewards/margins": 0.039934493601322174, |
|
"rewards/rejected": 0.6713703870773315, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -1.6599823236465454, |
|
"eval_logits/rejected": -1.6244251728057861, |
|
"eval_logps/chosen": -2577.3193359375, |
|
"eval_logps/rejected": -2176.467529296875, |
|
"eval_loss": 0.6498265862464905, |
|
"eval_rewards/accuracies": 0.6399999856948853, |
|
"eval_rewards/chosen": 0.9545619487762451, |
|
"eval_rewards/margins": 0.41406965255737305, |
|
"eval_rewards/rejected": 0.5404923558235168, |
|
"eval_runtime": 277.7488, |
|
"eval_samples_per_second": 7.201, |
|
"eval_steps_per_second": 0.45, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.574798730510415e-07, |
|
"logits/chosen": -1.7479991912841797, |
|
"logits/rejected": -1.766013503074646, |
|
"logps/chosen": -2737.697265625, |
|
"logps/rejected": -2326.7646484375, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8921098709106445, |
|
"rewards/margins": 0.41540461778640747, |
|
"rewards/rejected": 0.47670525312423706, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.53552629906399e-07, |
|
"logits/chosen": -1.7563549280166626, |
|
"logits/rejected": -1.6970514059066772, |
|
"logps/chosen": -2282.693603515625, |
|
"logps/rejected": -1962.1783447265625, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7120363712310791, |
|
"rewards/margins": 0.4519389271736145, |
|
"rewards/rejected": 0.260097473859787, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.496042151901265e-07, |
|
"logits/chosen": -1.545775055885315, |
|
"logits/rejected": -1.5133075714111328, |
|
"logps/chosen": -2213.603271484375, |
|
"logps/rejected": -1864.095458984375, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.40503087639808655, |
|
"rewards/margins": 0.2904754877090454, |
|
"rewards/rejected": 0.11455540359020233, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.456349585937164e-07, |
|
"logits/chosen": -1.5178546905517578, |
|
"logits/rejected": -1.4629215002059937, |
|
"logps/chosen": -2501.837890625, |
|
"logps/rejected": -2465.253173828125, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.42861613631248474, |
|
"rewards/margins": 0.19511394202709198, |
|
"rewards/rejected": 0.23350219428539276, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.41645191548952e-07, |
|
"logits/chosen": -1.482337474822998, |
|
"logits/rejected": -1.4365692138671875, |
|
"logps/chosen": -2804.274169921875, |
|
"logps/rejected": -2157.90234375, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6451601982116699, |
|
"rewards/margins": 0.35519418120384216, |
|
"rewards/rejected": 0.28996604681015015, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.376352472002336e-07, |
|
"logits/chosen": -1.6978435516357422, |
|
"logits/rejected": -1.6573905944824219, |
|
"logps/chosen": -2645.374267578125, |
|
"logps/rejected": -2618.08837890625, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7978582382202148, |
|
"rewards/margins": 0.40063905715942383, |
|
"rewards/rejected": 0.397219181060791, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.336054603767603e-07, |
|
"logits/chosen": -1.7187271118164062, |
|
"logits/rejected": -1.630066156387329, |
|
"logps/chosen": -2555.4833984375, |
|
"logps/rejected": -2423.76904296875, |
|
"loss": 0.6165, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.568882405757904, |
|
"rewards/margins": 0.4214504361152649, |
|
"rewards/rejected": 0.14743201434612274, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.295561675645719e-07, |
|
"logits/chosen": -1.852007269859314, |
|
"logits/rejected": -1.7471548318862915, |
|
"logps/chosen": -3065.30908203125, |
|
"logps/rejected": -2331.70068359375, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9147247076034546, |
|
"rewards/margins": 0.5064901113510132, |
|
"rewards/rejected": 0.40823444724082947, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.254877068784535e-07, |
|
"logits/chosen": -1.764828085899353, |
|
"logits/rejected": -1.7764110565185547, |
|
"logps/chosen": -2410.434326171875, |
|
"logps/rejected": -2364.50927734375, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9937675595283508, |
|
"rewards/margins": 0.3099968731403351, |
|
"rewards/rejected": 0.6837707161903381, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.214004180337011e-07, |
|
"logits/chosen": -1.7532641887664795, |
|
"logits/rejected": -1.7427523136138916, |
|
"logps/chosen": -2716.545654296875, |
|
"logps/rejected": -2454.630859375, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9549525380134583, |
|
"rewards/margins": 0.41002315282821655, |
|
"rewards/rejected": 0.5449293851852417, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.6706230640411377, |
|
"eval_logits/rejected": -1.6255484819412231, |
|
"eval_logps/chosen": -2577.740234375, |
|
"eval_logps/rejected": -2176.956787109375, |
|
"eval_loss": 0.6487711668014526, |
|
"eval_rewards/accuracies": 0.6480000019073486, |
|
"eval_rewards/chosen": 0.9503532648086548, |
|
"eval_rewards/margins": 0.41475310921669006, |
|
"eval_rewards/rejected": 0.5356001853942871, |
|
"eval_runtime": 272.6994, |
|
"eval_samples_per_second": 7.334, |
|
"eval_steps_per_second": 0.458, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.172946423177573e-07, |
|
"logits/chosen": -1.7609493732452393, |
|
"logits/rejected": -1.705553412437439, |
|
"logps/chosen": -2359.901611328125, |
|
"logps/rejected": -1943.8248291015625, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7265006303787231, |
|
"rewards/margins": 0.3340596854686737, |
|
"rewards/rejected": 0.3924410045146942, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.131707225617124e-07, |
|
"logits/chosen": -1.7774537801742554, |
|
"logits/rejected": -1.7147903442382812, |
|
"logps/chosen": -2557.030029296875, |
|
"logps/rejected": -1920.765380859375, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.7465502619743347, |
|
"rewards/margins": 0.5866314172744751, |
|
"rewards/rejected": 0.15991875529289246, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.090290031116797e-07, |
|
"logits/chosen": -1.598940134048462, |
|
"logits/rejected": -1.5846506357192993, |
|
"logps/chosen": -2371.542236328125, |
|
"logps/rejected": -2360.19091796875, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7226258516311646, |
|
"rewards/margins": 0.45876359939575195, |
|
"rewards/rejected": 0.2638623118400574, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.048698298000411e-07, |
|
"logits/chosen": -1.5689928531646729, |
|
"logits/rejected": -1.5398110151290894, |
|
"logps/chosen": -2748.099609375, |
|
"logps/rejected": -2357.985107421875, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8013742566108704, |
|
"rewards/margins": 0.36650413274765015, |
|
"rewards/rejected": 0.4348701536655426, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.006935499165714e-07, |
|
"logits/chosen": -1.4957153797149658, |
|
"logits/rejected": -1.4456464052200317, |
|
"logps/chosen": -2492.004638671875, |
|
"logps/rejected": -2111.281982421875, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.48487788438796997, |
|
"rewards/margins": 0.27980294823646545, |
|
"rewards/rejected": 0.20507490634918213, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.965005121794388e-07, |
|
"logits/chosen": -1.7797536849975586, |
|
"logits/rejected": -1.652152419090271, |
|
"logps/chosen": -2582.0791015625, |
|
"logps/rejected": -1727.4976806640625, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.2586898803710938, |
|
"rewards/margins": 0.8147012591362, |
|
"rewards/rejected": 0.4439886212348938, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.922910667060881e-07, |
|
"logits/chosen": -1.6934763193130493, |
|
"logits/rejected": -1.7006620168685913, |
|
"logps/chosen": -2171.2431640625, |
|
"logps/rejected": -2124.131103515625, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.1348068714141846, |
|
"rewards/margins": 0.2678903043270111, |
|
"rewards/rejected": 0.8669164776802063, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.880655649840044e-07, |
|
"logits/chosen": -1.7043119668960571, |
|
"logits/rejected": -1.6158339977264404, |
|
"logps/chosen": -2277.177001953125, |
|
"logps/rejected": -1852.617919921875, |
|
"loss": 0.7029, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9791079759597778, |
|
"rewards/margins": 0.4544892907142639, |
|
"rewards/rejected": 0.5246187448501587, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.838243598413657e-07, |
|
"logits/chosen": -1.6774076223373413, |
|
"logits/rejected": -1.5916404724121094, |
|
"logps/chosen": -2854.013671875, |
|
"logps/rejected": -2340.158203125, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8015478253364563, |
|
"rewards/margins": 0.3010616898536682, |
|
"rewards/rejected": 0.5004860162734985, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.795678054175811e-07, |
|
"logits/chosen": -1.676679253578186, |
|
"logits/rejected": -1.6284263134002686, |
|
"logps/chosen": -2829.0634765625, |
|
"logps/rejected": -2389.222900390625, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.1989120244979858, |
|
"rewards/margins": 0.28385791182518005, |
|
"rewards/rejected": 0.9150541424751282, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -1.6191504001617432, |
|
"eval_logits/rejected": -1.5736533403396606, |
|
"eval_logps/chosen": -2547.931640625, |
|
"eval_logps/rejected": -2149.956787109375, |
|
"eval_loss": 0.6500846147537231, |
|
"eval_rewards/accuracies": 0.6100000143051147, |
|
"eval_rewards/chosen": 1.248434066772461, |
|
"eval_rewards/margins": 0.4428330659866333, |
|
"eval_rewards/rejected": 0.8056011199951172, |
|
"eval_runtime": 273.8345, |
|
"eval_samples_per_second": 7.304, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.752962571337198e-07, |
|
"logits/chosen": -1.6186796426773071, |
|
"logits/rejected": -1.561805009841919, |
|
"logps/chosen": -2682.119384765625, |
|
"logps/rejected": -2285.04345703125, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.003474235534668, |
|
"rewards/margins": 0.2986948490142822, |
|
"rewards/rejected": 0.7047793865203857, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.710100716628344e-07, |
|
"logits/chosen": -1.6599292755126953, |
|
"logits/rejected": -1.6332380771636963, |
|
"logps/chosen": -2627.91357421875, |
|
"logps/rejected": -2327.94580078125, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7611712217330933, |
|
"rewards/margins": 0.35671621561050415, |
|
"rewards/rejected": 0.4044550061225891, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.66709606900178e-07, |
|
"logits/chosen": -1.7462646961212158, |
|
"logits/rejected": -1.6820160150527954, |
|
"logps/chosen": -2839.33154296875, |
|
"logps/rejected": -2203.18603515625, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9174792170524597, |
|
"rewards/margins": 0.4399174749851227, |
|
"rewards/rejected": 0.4775618612766266, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.62395221933321e-07, |
|
"logits/chosen": -1.5487401485443115, |
|
"logits/rejected": -1.5394811630249023, |
|
"logps/chosen": -2549.23876953125, |
|
"logps/rejected": -2237.34228515625, |
|
"loss": 0.7309, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.9733200073242188, |
|
"rewards/margins": 0.22530770301818848, |
|
"rewards/rejected": 0.7480123043060303, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.580672770121663e-07, |
|
"logits/chosen": -1.641426682472229, |
|
"logits/rejected": -1.553511381149292, |
|
"logps/chosen": -2529.59033203125, |
|
"logps/rejected": -2177.71142578125, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.0179182291030884, |
|
"rewards/margins": 0.17651347815990448, |
|
"rewards/rejected": 0.8414047956466675, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.537261335188695e-07, |
|
"logits/chosen": -1.5916087627410889, |
|
"logits/rejected": -1.5262328386306763, |
|
"logps/chosen": -2461.32666015625, |
|
"logps/rejected": -1872.6234130859375, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8468859791755676, |
|
"rewards/margins": 0.3362599015235901, |
|
"rewards/rejected": 0.5106260776519775, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.493721539376629e-07, |
|
"logits/chosen": -1.460197925567627, |
|
"logits/rejected": -1.4017575979232788, |
|
"logps/chosen": -2258.1845703125, |
|
"logps/rejected": -2031.2353515625, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.39380067586898804, |
|
"rewards/margins": 0.16991008818149567, |
|
"rewards/rejected": 0.22389057278633118, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.450057018245887e-07, |
|
"logits/chosen": -1.478092908859253, |
|
"logits/rejected": -1.413187026977539, |
|
"logps/chosen": -2277.25927734375, |
|
"logps/rejected": -2329.82421875, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.27560552954673767, |
|
"rewards/margins": 0.3579794764518738, |
|
"rewards/rejected": -0.0823739543557167, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.406271417771417e-07, |
|
"logits/chosen": -1.4325132369995117, |
|
"logits/rejected": -1.4178228378295898, |
|
"logps/chosen": -2349.21337890625, |
|
"logps/rejected": -2154.297119140625, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3952658474445343, |
|
"rewards/margins": 0.16848233342170715, |
|
"rewards/rejected": 0.22678343951702118, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.362368394038253e-07, |
|
"logits/chosen": -1.5897849798202515, |
|
"logits/rejected": -1.5390173196792603, |
|
"logps/chosen": -1875.8529052734375, |
|
"logps/rejected": -1862.1568603515625, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7089827656745911, |
|
"rewards/margins": 0.22261781990528107, |
|
"rewards/rejected": 0.4863649904727936, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.4973212480545044, |
|
"eval_logits/rejected": -1.4589377641677856, |
|
"eval_logps/chosen": -2552.484619140625, |
|
"eval_logps/rejected": -2154.230712890625, |
|
"eval_loss": 0.6439757347106934, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": 1.2029086351394653, |
|
"eval_rewards/margins": 0.44004881381988525, |
|
"eval_rewards/rejected": 0.7628598809242249, |
|
"eval_runtime": 274.3283, |
|
"eval_samples_per_second": 7.291, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.318351612936251e-07, |
|
"logits/chosen": -1.540020227432251, |
|
"logits/rejected": -1.5377864837646484, |
|
"logps/chosen": -2579.018798828125, |
|
"logps/rejected": -2040.8626708984375, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.3657995462417603, |
|
"rewards/margins": 0.5092751383781433, |
|
"rewards/rejected": 0.8565242886543274, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.27422474985396e-07, |
|
"logits/chosen": -1.6203733682632446, |
|
"logits/rejected": -1.5685895681381226, |
|
"logps/chosen": -2994.5986328125, |
|
"logps/rejected": -2297.98779296875, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 1.1853954792022705, |
|
"rewards/margins": 0.3969642221927643, |
|
"rewards/rejected": 0.7884311079978943, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.229991489371753e-07, |
|
"logits/chosen": -1.4962559938430786, |
|
"logits/rejected": -1.5039407014846802, |
|
"logps/chosen": -2847.88330078125, |
|
"logps/rejected": -2582.642333984375, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.1997880935668945, |
|
"rewards/margins": 0.3008013367652893, |
|
"rewards/rejected": 0.8989866375923157, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.185655524954147e-07, |
|
"logits/chosen": -1.4752075672149658, |
|
"logits/rejected": -1.4018795490264893, |
|
"logps/chosen": -2723.45263671875, |
|
"logps/rejected": -2207.67626953125, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8859116435050964, |
|
"rewards/margins": 0.42819732427597046, |
|
"rewards/rejected": 0.4577142596244812, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.141220558641415e-07, |
|
"logits/chosen": -1.6355358362197876, |
|
"logits/rejected": -1.5703387260437012, |
|
"logps/chosen": -2515.22509765625, |
|
"logps/rejected": -2019.4664306640625, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9024883508682251, |
|
"rewards/margins": 0.5706073045730591, |
|
"rewards/rejected": 0.33188116550445557, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.096690300740452e-07, |
|
"logits/chosen": -1.5263564586639404, |
|
"logits/rejected": -1.5084933042526245, |
|
"logps/chosen": -2439.4453125, |
|
"logps/rejected": -2368.387939453125, |
|
"loss": 0.6958, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8908926248550415, |
|
"rewards/margins": 0.32425767183303833, |
|
"rewards/rejected": 0.5666350722312927, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.052068469514983e-07, |
|
"logits/chosen": -1.4977385997772217, |
|
"logits/rejected": -1.4453041553497314, |
|
"logps/chosen": -2486.1298828125, |
|
"logps/rejected": -2206.44384765625, |
|
"loss": 0.7091, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.45466309785842896, |
|
"rewards/margins": 0.2726563811302185, |
|
"rewards/rejected": 0.18200668692588806, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.007358790875071e-07, |
|
"logits/chosen": -1.523725986480713, |
|
"logits/rejected": -1.3718494176864624, |
|
"logps/chosen": -3135.372314453125, |
|
"logps/rejected": -2062.346435546875, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8700541257858276, |
|
"rewards/margins": 0.534584105014801, |
|
"rewards/rejected": 0.335470050573349, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.962564998066017e-07, |
|
"logits/chosen": -1.5558342933654785, |
|
"logits/rejected": -1.530552625656128, |
|
"logps/chosen": -2413.99951171875, |
|
"logps/rejected": -2231.44580078125, |
|
"loss": 0.7787, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.8536021113395691, |
|
"rewards/margins": 0.16090592741966248, |
|
"rewards/rejected": 0.692696213722229, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.917690831356632e-07, |
|
"logits/chosen": -1.5734992027282715, |
|
"logits/rejected": -1.5821874141693115, |
|
"logps/chosen": -1971.658447265625, |
|
"logps/rejected": -2253.21533203125, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.37103039026260376, |
|
"rewards/margins": 0.36184656620025635, |
|
"rewards/rejected": 0.00918380357325077, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -1.5323480367660522, |
|
"eval_logits/rejected": -1.4961434602737427, |
|
"eval_logps/chosen": -2651.6640625, |
|
"eval_logps/rejected": -2239.925537109375, |
|
"eval_loss": 0.6641379594802856, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": 0.21111172437667847, |
|
"eval_rewards/margins": 0.3052009046077728, |
|
"eval_rewards/rejected": -0.09408915787935257, |
|
"eval_runtime": 272.1389, |
|
"eval_samples_per_second": 7.349, |
|
"eval_steps_per_second": 0.459, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.872740037726918e-07, |
|
"logits/chosen": -1.6534467935562134, |
|
"logits/rejected": -1.586725115776062, |
|
"logps/chosen": -2391.630615234375, |
|
"logps/rejected": -2272.478271484375, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.07434698194265366, |
|
"rewards/margins": 0.33248382806777954, |
|
"rewards/rejected": -0.2581368684768677, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.82771637055521e-07, |
|
"logits/chosen": -1.5501420497894287, |
|
"logits/rejected": -1.5288193225860596, |
|
"logps/chosen": -2284.060302734375, |
|
"logps/rejected": -1815.4986572265625, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5421526432037354, |
|
"rewards/margins": 0.34266138076782227, |
|
"rewards/rejected": 0.1994912326335907, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.78262358930476e-07, |
|
"logits/chosen": -1.6008961200714111, |
|
"logits/rejected": -1.6260267496109009, |
|
"logps/chosen": -2463.80615234375, |
|
"logps/rejected": -2245.742919921875, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.3033373355865479, |
|
"rewards/margins": 0.4004366993904114, |
|
"rewards/rejected": 0.9029006958007812, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.737465459209825e-07, |
|
"logits/chosen": -1.4986151456832886, |
|
"logits/rejected": -1.5256952047348022, |
|
"logps/chosen": -2494.918701171875, |
|
"logps/rejected": -2140.651123046875, |
|
"loss": 0.6964, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9287387728691101, |
|
"rewards/margins": 0.3198220729827881, |
|
"rewards/rejected": 0.6089166402816772, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.692245750961274e-07, |
|
"logits/chosen": -1.6182562112808228, |
|
"logits/rejected": -1.5028297901153564, |
|
"logps/chosen": -3051.378173828125, |
|
"logps/rejected": -2346.170654296875, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.0541870594024658, |
|
"rewards/margins": 0.3164740204811096, |
|
"rewards/rejected": 0.7377129793167114, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.646968240391729e-07, |
|
"logits/chosen": -1.649444580078125, |
|
"logits/rejected": -1.6214931011199951, |
|
"logps/chosen": -2746.20849609375, |
|
"logps/rejected": -2309.657958984375, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.2585784196853638, |
|
"rewards/margins": 0.8424956202507019, |
|
"rewards/rejected": 0.41608279943466187, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.601636708160296e-07, |
|
"logits/chosen": -1.547055959701538, |
|
"logits/rejected": -1.4317224025726318, |
|
"logps/chosen": -2590.408447265625, |
|
"logps/rejected": -2097.73779296875, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8106128573417664, |
|
"rewards/margins": 0.47321099042892456, |
|
"rewards/rejected": 0.3374018669128418, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.55625493943687e-07, |
|
"logits/chosen": -1.6556923389434814, |
|
"logits/rejected": -1.5846012830734253, |
|
"logps/chosen": -2086.85400390625, |
|
"logps/rejected": -1811.0687255859375, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6724368929862976, |
|
"rewards/margins": 0.2351864129304886, |
|
"rewards/rejected": 0.437250554561615, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.510826723586078e-07, |
|
"logits/chosen": -1.568554401397705, |
|
"logits/rejected": -1.5848562717437744, |
|
"logps/chosen": -2322.924560546875, |
|
"logps/rejected": -2143.58740234375, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8154586553573608, |
|
"rewards/margins": 0.22640132904052734, |
|
"rewards/rejected": 0.5890573263168335, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.465355853850871e-07, |
|
"logits/chosen": -1.5256285667419434, |
|
"logits/rejected": -1.4730937480926514, |
|
"logps/chosen": -2586.426025390625, |
|
"logps/rejected": -2263.304443359375, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8933171033859253, |
|
"rewards/margins": 0.33347776532173157, |
|
"rewards/rejected": 0.5598393678665161, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.4934203624725342, |
|
"eval_logits/rejected": -1.450918436050415, |
|
"eval_logps/chosen": -2615.305419921875, |
|
"eval_logps/rejected": -2210.75, |
|
"eval_loss": 0.648023784160614, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": 0.5746970772743225, |
|
"eval_rewards/margins": 0.3770293593406677, |
|
"eval_rewards/rejected": 0.1976676881313324, |
|
"eval_runtime": 273.6196, |
|
"eval_samples_per_second": 7.309, |
|
"eval_steps_per_second": 0.457, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.41984612703579e-07, |
|
"logits/chosen": -1.4832929372787476, |
|
"logits/rejected": -1.4308890104293823, |
|
"logps/chosen": -2649.85986328125, |
|
"logps/rejected": -2068.37451171875, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.21885940432548523, |
|
"rewards/margins": 0.2792533338069916, |
|
"rewards/rejected": -0.06039392948150635, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.37430134318992e-07, |
|
"logits/chosen": -1.62113356590271, |
|
"logits/rejected": -1.5398887395858765, |
|
"logps/chosen": -2633.84912109375, |
|
"logps/rejected": -2066.56005859375, |
|
"loss": 0.6041, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4892839789390564, |
|
"rewards/margins": 0.35047203302383423, |
|
"rewards/rejected": 0.13881191611289978, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.328725305289612e-07, |
|
"logits/chosen": -1.4779340028762817, |
|
"logits/rejected": -1.5425903797149658, |
|
"logps/chosen": -2540.42724609375, |
|
"logps/rejected": -2326.0771484375, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8963820338249207, |
|
"rewards/margins": 0.08198239654302597, |
|
"rewards/rejected": 0.8143996000289917, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.283121818920911e-07, |
|
"logits/chosen": -1.6571362018585205, |
|
"logits/rejected": -1.5648419857025146, |
|
"logps/chosen": -2485.504150390625, |
|
"logps/rejected": -1707.0404052734375, |
|
"loss": 0.7148, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.0816528797149658, |
|
"rewards/margins": 0.5686277151107788, |
|
"rewards/rejected": 0.5130252838134766, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.237494691961808e-07, |
|
"logits/chosen": -1.6447166204452515, |
|
"logits/rejected": -1.5862451791763306, |
|
"logps/chosen": -2401.858642578125, |
|
"logps/rejected": -1855.913818359375, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7710812091827393, |
|
"rewards/margins": 0.4302866458892822, |
|
"rewards/rejected": 0.3407946228981018, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.191847734264272e-07, |
|
"logits/chosen": -1.4784064292907715, |
|
"logits/rejected": -1.3932642936706543, |
|
"logps/chosen": -2951.359619140625, |
|
"logps/rejected": -1941.2406005859375, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6071017384529114, |
|
"rewards/margins": 0.42749419808387756, |
|
"rewards/rejected": 0.17960752546787262, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.146184757336133e-07, |
|
"logits/chosen": -1.5501275062561035, |
|
"logits/rejected": -1.4930084943771362, |
|
"logps/chosen": -2447.392578125, |
|
"logps/rejected": -2330.06298828125, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.750685453414917, |
|
"rewards/margins": 0.24931129813194275, |
|
"rewards/rejected": 0.5013741254806519, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.100509574022827e-07, |
|
"logits/chosen": -1.5865153074264526, |
|
"logits/rejected": -1.5072886943817139, |
|
"logps/chosen": -2638.115966796875, |
|
"logps/rejected": -2270.362060546875, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5812515020370483, |
|
"rewards/margins": 0.3027082085609436, |
|
"rewards/rejected": 0.27854329347610474, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.054825998189012e-07, |
|
"logits/chosen": -1.5698521137237549, |
|
"logits/rejected": -1.5683820247650146, |
|
"logps/chosen": -3126.69384765625, |
|
"logps/rejected": -2817.0693359375, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.212766170501709, |
|
"rewards/margins": 0.6158873438835144, |
|
"rewards/rejected": 0.5968788266181946, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.009137844400127e-07, |
|
"logits/chosen": -1.5645443201065063, |
|
"logits/rejected": -1.5424262285232544, |
|
"logps/chosen": -2719.923583984375, |
|
"logps/rejected": -2182.44140625, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.0640016794204712, |
|
"rewards/margins": 0.3684343695640564, |
|
"rewards/rejected": 0.69556725025177, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -1.484187364578247, |
|
"eval_logits/rejected": -1.441811203956604, |
|
"eval_logps/chosen": -2584.68408203125, |
|
"eval_logps/rejected": -2185.49853515625, |
|
"eval_loss": 0.6357956528663635, |
|
"eval_rewards/accuracies": 0.6480000019073486, |
|
"eval_rewards/chosen": 0.8809126019477844, |
|
"eval_rewards/margins": 0.4307316839694977, |
|
"eval_rewards/rejected": 0.45018094778060913, |
|
"eval_runtime": 264.5545, |
|
"eval_samples_per_second": 7.56, |
|
"eval_steps_per_second": 0.472, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.963448927603866e-07, |
|
"logits/chosen": -1.485181212425232, |
|
"logits/rejected": -1.448392629623413, |
|
"logps/chosen": -2547.41015625, |
|
"logps/rejected": -2374.857421875, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6609773635864258, |
|
"rewards/margins": 0.6512421369552612, |
|
"rewards/rejected": 0.00973515771329403, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.917763062811631e-07, |
|
"logits/chosen": -1.5620416402816772, |
|
"logits/rejected": -1.5052043199539185, |
|
"logps/chosen": -2927.76953125, |
|
"logps/rejected": -2448.58154296875, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.1013530492782593, |
|
"rewards/margins": 0.3367983400821686, |
|
"rewards/rejected": 0.7645547389984131, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.872084064779983e-07, |
|
"logits/chosen": -1.538593053817749, |
|
"logits/rejected": -1.4424632787704468, |
|
"logps/chosen": -2763.087646484375, |
|
"logps/rejected": -2043.810546875, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.2892661094665527, |
|
"rewards/margins": 0.6896950006484985, |
|
"rewards/rejected": 0.5995711088180542, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.826415747692117e-07, |
|
"logits/chosen": -1.4685488939285278, |
|
"logits/rejected": -1.3683886528015137, |
|
"logps/chosen": -2700.881591796875, |
|
"logps/rejected": -1875.2572021484375, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.738589882850647, |
|
"rewards/margins": 0.4977818429470062, |
|
"rewards/rejected": 0.24080801010131836, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.780761924839365e-07, |
|
"logits/chosen": -1.310004472732544, |
|
"logits/rejected": -1.1984180212020874, |
|
"logps/chosen": -2606.06982421875, |
|
"logps/rejected": -2154.67529296875, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3540424108505249, |
|
"rewards/margins": 0.13220936059951782, |
|
"rewards/rejected": 0.22183306515216827, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7351264083027954e-07, |
|
"logits/chosen": -1.4272174835205078, |
|
"logits/rejected": -1.3643287420272827, |
|
"logps/chosen": -2741.249755859375, |
|
"logps/rejected": -2210.83203125, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.0889028310775757, |
|
"rewards/margins": 0.5112255215644836, |
|
"rewards/rejected": 0.5776773691177368, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.689513008634906e-07, |
|
"logits/chosen": -1.4570413827896118, |
|
"logits/rejected": -1.4573755264282227, |
|
"logps/chosen": -2330.010986328125, |
|
"logps/rejected": -2262.497802734375, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9347332119941711, |
|
"rewards/margins": 0.4426857829093933, |
|
"rewards/rejected": 0.4920472204685211, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.6439255345414475e-07, |
|
"logits/chosen": -1.3844455480575562, |
|
"logits/rejected": -1.3740614652633667, |
|
"logps/chosen": -2633.23876953125, |
|
"logps/rejected": -2337.79296875, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.6188936829566956, |
|
"rewards/margins": 0.3281751871109009, |
|
"rewards/rejected": 0.29071852564811707, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5983677925633836e-07, |
|
"logits/chosen": -1.4414739608764648, |
|
"logits/rejected": -1.4345848560333252, |
|
"logps/chosen": -2682.71337890625, |
|
"logps/rejected": -2551.15185546875, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5205819010734558, |
|
"rewards/margins": 0.21859097480773926, |
|
"rewards/rejected": 0.30199089646339417, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.5528435867590595e-07, |
|
"logits/chosen": -1.4516639709472656, |
|
"logits/rejected": -1.437811255455017, |
|
"logps/chosen": -2479.385498046875, |
|
"logps/rejected": -2412.54052734375, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7011703252792358, |
|
"rewards/margins": 0.2219802588224411, |
|
"rewards/rejected": 0.4791901111602783, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.3656065464019775, |
|
"eval_logits/rejected": -1.3192769289016724, |
|
"eval_logps/chosen": -2579.66357421875, |
|
"eval_logps/rejected": -2179.766845703125, |
|
"eval_loss": 0.6346109509468079, |
|
"eval_rewards/accuracies": 0.656000018119812, |
|
"eval_rewards/chosen": 0.9311181306838989, |
|
"eval_rewards/margins": 0.42361852526664734, |
|
"eval_rewards/rejected": 0.5074995160102844, |
|
"eval_runtime": 268.8417, |
|
"eval_samples_per_second": 7.439, |
|
"eval_steps_per_second": 0.465, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.507356718386556e-07, |
|
"logits/chosen": -1.4297059774398804, |
|
"logits/rejected": -1.306510329246521, |
|
"logps/chosen": -2648.14892578125, |
|
"logps/rejected": -1472.99169921875, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7275451421737671, |
|
"rewards/margins": 0.39894041419029236, |
|
"rewards/rejected": 0.32860463857650757, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.461910985586298e-07, |
|
"logits/chosen": -1.5129501819610596, |
|
"logits/rejected": -1.4305498600006104, |
|
"logps/chosen": -2532.091796875, |
|
"logps/rejected": -1929.642333984375, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.124761939048767, |
|
"rewards/margins": 0.4813266694545746, |
|
"rewards/rejected": 0.6434352993965149, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4165101830638937e-07, |
|
"logits/chosen": -1.3523738384246826, |
|
"logits/rejected": -1.2613658905029297, |
|
"logps/chosen": -2724.252197265625, |
|
"logps/rejected": -2269.352783203125, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.0175944566726685, |
|
"rewards/margins": 0.4952741265296936, |
|
"rewards/rejected": 0.5223202705383301, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.3711581017732866e-07, |
|
"logits/chosen": -1.3893333673477173, |
|
"logits/rejected": -1.390366554260254, |
|
"logps/chosen": -2303.6611328125, |
|
"logps/rejected": -2041.0625, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8912409543991089, |
|
"rewards/margins": 0.2850377857685089, |
|
"rewards/rejected": 0.6062031388282776, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.325858528600214e-07, |
|
"logits/chosen": -1.3108584880828857, |
|
"logits/rejected": -1.2193920612335205, |
|
"logps/chosen": -2475.795654296875, |
|
"logps/rejected": -2092.73583984375, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.1036341190338135, |
|
"rewards/margins": 0.3697579503059387, |
|
"rewards/rejected": 0.7338761687278748, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.280615246046001e-07, |
|
"logits/chosen": -1.4235751628875732, |
|
"logits/rejected": -1.3617385625839233, |
|
"logps/chosen": -2633.86083984375, |
|
"logps/rejected": -2408.10009765625, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.153663992881775, |
|
"rewards/margins": 0.3248019814491272, |
|
"rewards/rejected": 0.8288620710372925, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.235432031911719e-07, |
|
"logits/chosen": -1.4218213558197021, |
|
"logits/rejected": -1.4208636283874512, |
|
"logps/chosen": -2631.169189453125, |
|
"logps/rejected": -2223.19970703125, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8666805028915405, |
|
"rewards/margins": 0.2339131385087967, |
|
"rewards/rejected": 0.632767379283905, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.190312658982747e-07, |
|
"logits/chosen": -1.4493274688720703, |
|
"logits/rejected": -1.3931918144226074, |
|
"logps/chosen": -2808.74853515625, |
|
"logps/rejected": -2093.283203125, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.029400110244751, |
|
"rewards/margins": 0.31582584977149963, |
|
"rewards/rejected": 0.7135743498802185, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.145260894713738e-07, |
|
"logits/chosen": -1.3703809976577759, |
|
"logits/rejected": -1.3452428579330444, |
|
"logps/chosen": -2364.276123046875, |
|
"logps/rejected": -2126.041015625, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.6234472990036011, |
|
"rewards/margins": 0.22590875625610352, |
|
"rewards/rejected": 0.39753851294517517, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.1002805009140464e-07, |
|
"logits/chosen": -1.2794725894927979, |
|
"logits/rejected": -1.265462875366211, |
|
"logps/chosen": -3121.4658203125, |
|
"logps/rejected": -2392.01123046875, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.061316728591919, |
|
"rewards/margins": 0.7338230013847351, |
|
"rewards/rejected": 0.32749372720718384, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -1.3116086721420288, |
|
"eval_logits/rejected": -1.2831621170043945, |
|
"eval_logps/chosen": -2606.788330078125, |
|
"eval_logps/rejected": -2201.394775390625, |
|
"eval_loss": 0.6396492719650269, |
|
"eval_rewards/accuracies": 0.6480000019073486, |
|
"eval_rewards/chosen": 0.6598689556121826, |
|
"eval_rewards/margins": 0.368648886680603, |
|
"eval_rewards/rejected": 0.2912200391292572, |
|
"eval_runtime": 275.4174, |
|
"eval_samples_per_second": 7.262, |
|
"eval_steps_per_second": 0.454, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.055375233433608e-07, |
|
"logits/chosen": -1.4271332025527954, |
|
"logits/rejected": -1.423020601272583, |
|
"logps/chosen": -2071.07080078125, |
|
"logps/rejected": -1836.254638671875, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4686586856842041, |
|
"rewards/margins": 0.19411948323249817, |
|
"rewards/rejected": 0.27453920245170593, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.010548841849336e-07, |
|
"logits/chosen": -1.3506792783737183, |
|
"logits/rejected": -1.3817119598388672, |
|
"logps/chosen": -2158.24267578125, |
|
"logps/rejected": -2183.30029296875, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4484243392944336, |
|
"rewards/margins": 0.16151314973831177, |
|
"rewards/rejected": 0.2869111895561218, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9658050691520243e-07, |
|
"logits/chosen": -1.2983678579330444, |
|
"logits/rejected": -1.291818380355835, |
|
"logps/chosen": -1904.927978515625, |
|
"logps/rejected": -2080.394287109375, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5332204699516296, |
|
"rewards/margins": 0.13143374025821686, |
|
"rewards/rejected": 0.401786744594574, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.921147651433822e-07, |
|
"logits/chosen": -1.2738254070281982, |
|
"logits/rejected": -1.3125003576278687, |
|
"logps/chosen": -2600.62841796875, |
|
"logps/rejected": -2347.55517578125, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9114497900009155, |
|
"rewards/margins": 0.5179422497749329, |
|
"rewards/rejected": 0.3935074806213379, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.8765803175762547e-07, |
|
"logits/chosen": -1.4664332866668701, |
|
"logits/rejected": -1.4184716939926147, |
|
"logps/chosen": -3116.819580078125, |
|
"logps/rejected": -2370.01806640625, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5260448455810547, |
|
"rewards/margins": 0.2121487557888031, |
|
"rewards/rejected": 0.3138960897922516, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.832106788938873e-07, |
|
"logits/chosen": -1.3046257495880127, |
|
"logits/rejected": -1.2335705757141113, |
|
"logps/chosen": -2104.3447265625, |
|
"logps/rejected": -1580.002685546875, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5160833597183228, |
|
"rewards/margins": 0.3431095778942108, |
|
"rewards/rejected": 0.17297373712062836, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7877307790485204e-07, |
|
"logits/chosen": -1.4190560579299927, |
|
"logits/rejected": -1.4400821924209595, |
|
"logps/chosen": -2375.158935546875, |
|
"logps/rejected": -2192.509765625, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.026429295539856, |
|
"rewards/margins": 0.4570732116699219, |
|
"rewards/rejected": 0.5693560838699341, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7434559932892527e-07, |
|
"logits/chosen": -1.4493039846420288, |
|
"logits/rejected": -1.4444842338562012, |
|
"logps/chosen": -2466.854248046875, |
|
"logps/rejected": -2544.655517578125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.071681022644043, |
|
"rewards/margins": 0.44979220628738403, |
|
"rewards/rejected": 0.6218888163566589, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.699286128592939e-07, |
|
"logits/chosen": -1.5252349376678467, |
|
"logits/rejected": -1.5242667198181152, |
|
"logps/chosen": -2524.96923828125, |
|
"logps/rejected": -2338.88916015625, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6778838634490967, |
|
"rewards/margins": 0.30297914147377014, |
|
"rewards/rejected": 0.37490472197532654, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.655224873130571e-07, |
|
"logits/chosen": -1.5355488061904907, |
|
"logits/rejected": -1.5058531761169434, |
|
"logps/chosen": -2609.71728515625, |
|
"logps/rejected": -2428.035888671875, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.44195812940597534, |
|
"rewards/margins": 0.31578582525253296, |
|
"rewards/rejected": 0.12617230415344238, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.477705478668213, |
|
"eval_logits/rejected": -1.4460340738296509, |
|
"eval_logps/chosen": -2630.405029296875, |
|
"eval_logps/rejected": -2221.14599609375, |
|
"eval_loss": 0.6450788378715515, |
|
"eval_rewards/accuracies": 0.6399999856948853, |
|
"eval_rewards/chosen": 0.4237046539783478, |
|
"eval_rewards/margins": 0.3299960792064667, |
|
"eval_rewards/rejected": 0.09370850026607513, |
|
"eval_runtime": 267.0847, |
|
"eval_samples_per_second": 7.488, |
|
"eval_steps_per_second": 0.468, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.611275906004298e-07, |
|
"logits/chosen": -1.3755953311920166, |
|
"logits/rejected": -1.3369966745376587, |
|
"logps/chosen": -2507.03515625, |
|
"logps/rejected": -2109.78173828125, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5401030778884888, |
|
"rewards/margins": 0.40320855379104614, |
|
"rewards/rejected": 0.13689449429512024, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.5674428969402306e-07, |
|
"logits/chosen": -1.5944547653198242, |
|
"logits/rejected": -1.5777031183242798, |
|
"logps/chosen": -2441.38525390625, |
|
"logps/rejected": -2047.470947265625, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8870500326156616, |
|
"rewards/margins": 0.2639123797416687, |
|
"rewards/rejected": 0.6231377720832825, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.523729505982008e-07, |
|
"logits/chosen": -1.5361446142196655, |
|
"logits/rejected": -1.5061471462249756, |
|
"logps/chosen": -2847.749267578125, |
|
"logps/rejected": -2523.47705078125, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.2106680870056152, |
|
"rewards/margins": 0.3830196261405945, |
|
"rewards/rejected": 0.8276484608650208, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.480139383185199e-07, |
|
"logits/chosen": -1.4447945356369019, |
|
"logits/rejected": -1.446575403213501, |
|
"logps/chosen": -2404.881591796875, |
|
"logps/rejected": -1928.1968994140625, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8293660879135132, |
|
"rewards/margins": 0.44402575492858887, |
|
"rewards/rejected": 0.38534030318260193, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.436676168312508e-07, |
|
"logits/chosen": -1.5186269283294678, |
|
"logits/rejected": -1.4943647384643555, |
|
"logps/chosen": -2593.47021484375, |
|
"logps/rejected": -2183.07861328125, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6045433878898621, |
|
"rewards/margins": 0.43318018317222595, |
|
"rewards/rejected": 0.17136314511299133, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.393343490529874e-07, |
|
"logits/chosen": -1.4893147945404053, |
|
"logits/rejected": -1.487684965133667, |
|
"logps/chosen": -2017.423583984375, |
|
"logps/rejected": -1761.2838134765625, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5850464105606079, |
|
"rewards/margins": 0.3719884753227234, |
|
"rewards/rejected": 0.21305795013904572, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.35014496810342e-07, |
|
"logits/chosen": -1.4742892980575562, |
|
"logits/rejected": -1.4468326568603516, |
|
"logps/chosen": -2771.84423828125, |
|
"logps/rejected": -2527.495849609375, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9822020530700684, |
|
"rewards/margins": 0.38336580991744995, |
|
"rewards/rejected": 0.5988362431526184, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.3070842080973365e-07, |
|
"logits/chosen": -1.3868352174758911, |
|
"logits/rejected": -1.2980562448501587, |
|
"logps/chosen": -2637.708984375, |
|
"logps/rejected": -2321.34423828125, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7674530744552612, |
|
"rewards/margins": 0.3672861158847809, |
|
"rewards/rejected": 0.40016698837280273, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.264164806072691e-07, |
|
"logits/chosen": -1.4247747659683228, |
|
"logits/rejected": -1.4318523406982422, |
|
"logps/chosen": -1995.621337890625, |
|
"logps/rejected": -1889.0433349609375, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4019368290901184, |
|
"rewards/margins": 0.3562806248664856, |
|
"rewards/rejected": 0.04565621167421341, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.221390345787205e-07, |
|
"logits/chosen": -1.4784921407699585, |
|
"logits/rejected": -1.4326039552688599, |
|
"logps/chosen": -2378.757080078125, |
|
"logps/rejected": -2247.66650390625, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8328452110290527, |
|
"rewards/margins": 0.35527855157852173, |
|
"rewards/rejected": 0.47756657004356384, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.4396635293960571, |
|
"eval_logits/rejected": -1.4040294885635376, |
|
"eval_logps/chosen": -2585.951171875, |
|
"eval_logps/rejected": -2188.20947265625, |
|
"eval_loss": 0.6313052177429199, |
|
"eval_rewards/accuracies": 0.6460000276565552, |
|
"eval_rewards/chosen": 0.8682412505149841, |
|
"eval_rewards/margins": 0.44516855478286743, |
|
"eval_rewards/rejected": 0.4230727553367615, |
|
"eval_runtime": 273.6779, |
|
"eval_samples_per_second": 7.308, |
|
"eval_steps_per_second": 0.457, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.178764398895999e-07, |
|
"logits/chosen": -1.5333797931671143, |
|
"logits/rejected": -1.4616153240203857, |
|
"logps/chosen": -2828.387451171875, |
|
"logps/rejected": -2186.489501953125, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0755081176757812, |
|
"rewards/margins": 0.3677830100059509, |
|
"rewards/rejected": 0.7077249884605408, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.1362905246533733e-07, |
|
"logits/chosen": -1.5495270490646362, |
|
"logits/rejected": -1.5117230415344238, |
|
"logps/chosen": -2374.715087890625, |
|
"logps/rejected": -2174.619384765625, |
|
"loss": 0.6468, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.9045315980911255, |
|
"rewards/margins": 0.290399968624115, |
|
"rewards/rejected": 0.6141316890716553, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.093972269615602e-07, |
|
"logits/chosen": -1.5428906679153442, |
|
"logits/rejected": -1.4694817066192627, |
|
"logps/chosen": -2695.100341796875, |
|
"logps/rejected": -2482.61328125, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9704828262329102, |
|
"rewards/margins": 0.4584183096885681, |
|
"rewards/rejected": 0.5120643377304077, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.051813167344807e-07, |
|
"logits/chosen": -1.493492841720581, |
|
"logits/rejected": -1.4127018451690674, |
|
"logps/chosen": -2684.31298828125, |
|
"logps/rejected": -2033.2591552734375, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.5565832853317261, |
|
"rewards/margins": 0.16704833507537842, |
|
"rewards/rejected": 0.38953498005867004, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.009816738113891e-07, |
|
"logits/chosen": -1.4053213596343994, |
|
"logits/rejected": -1.3655506372451782, |
|
"logps/chosen": -2278.53955078125, |
|
"logps/rejected": -2113.007568359375, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7199645042419434, |
|
"rewards/margins": 0.3212481141090393, |
|
"rewards/rejected": 0.39871641993522644, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.967986488612611e-07, |
|
"logits/chosen": -1.4535365104675293, |
|
"logits/rejected": -1.4404280185699463, |
|
"logps/chosen": -1694.073486328125, |
|
"logps/rejected": -1605.5189208984375, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.42730236053466797, |
|
"rewards/margins": 0.42983976006507874, |
|
"rewards/rejected": -0.0025373927783221006, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.92632591165476e-07, |
|
"logits/chosen": -1.4190573692321777, |
|
"logits/rejected": -1.398150086402893, |
|
"logps/chosen": -2259.364013671875, |
|
"logps/rejected": -2268.209228515625, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7744486927986145, |
|
"rewards/margins": 0.38980168104171753, |
|
"rewards/rejected": 0.38464704155921936, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.884838485886531e-07, |
|
"logits/chosen": -1.0902016162872314, |
|
"logits/rejected": -1.1198498010635376, |
|
"logps/chosen": -2453.63427734375, |
|
"logps/rejected": -2333.11572265625, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7615150809288025, |
|
"rewards/margins": 0.3345804810523987, |
|
"rewards/rejected": 0.4269346594810486, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.8435276754960316e-07, |
|
"logits/chosen": -1.4796117544174194, |
|
"logits/rejected": -1.3743705749511719, |
|
"logps/chosen": -2669.708984375, |
|
"logps/rejected": -2046.904296875, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.0122426748275757, |
|
"rewards/margins": 0.7491260766983032, |
|
"rewards/rejected": 0.2631165385246277, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.802396929924042e-07, |
|
"logits/chosen": -1.5238767862319946, |
|
"logits/rejected": -1.4813053607940674, |
|
"logps/chosen": -2612.95654296875, |
|
"logps/rejected": -2195.318115234375, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7694698572158813, |
|
"rewards/margins": 0.3584301471710205, |
|
"rewards/rejected": 0.41103968024253845, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.3860489130020142, |
|
"eval_logits/rejected": -1.357996940612793, |
|
"eval_logps/chosen": -2588.8173828125, |
|
"eval_logps/rejected": -2194.014404296875, |
|
"eval_loss": 0.6273570656776428, |
|
"eval_rewards/accuracies": 0.6639999747276306, |
|
"eval_rewards/chosen": 0.8395788669586182, |
|
"eval_rewards/margins": 0.47455480694770813, |
|
"eval_rewards/rejected": 0.3650241196155548, |
|
"eval_runtime": 275.8616, |
|
"eval_samples_per_second": 7.25, |
|
"eval_steps_per_second": 0.453, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.761449683575979e-07, |
|
"logits/chosen": -1.394689917564392, |
|
"logits/rejected": -1.3897894620895386, |
|
"logps/chosen": -2253.80322265625, |
|
"logps/rejected": -1823.135986328125, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.6918545961380005, |
|
"rewards/margins": 0.3240147531032562, |
|
"rewards/rejected": 0.3678398132324219, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.720689355535133e-07, |
|
"logits/chosen": -1.5367738008499146, |
|
"logits/rejected": -1.4291003942489624, |
|
"logps/chosen": -3060.17431640625, |
|
"logps/rejected": -2661.6337890625, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8580164909362793, |
|
"rewards/margins": 0.25758737325668335, |
|
"rewards/rejected": 0.6004289984703064, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.680119349277163e-07, |
|
"logits/chosen": -1.4364140033721924, |
|
"logits/rejected": -1.3742132186889648, |
|
"logps/chosen": -2562.680908203125, |
|
"logps/rejected": -2340.421875, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6592432260513306, |
|
"rewards/margins": 0.4979625642299652, |
|
"rewards/rejected": 0.16128072142601013, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.639743052385917e-07, |
|
"logits/chosen": -1.4134931564331055, |
|
"logits/rejected": -1.4067234992980957, |
|
"logps/chosen": -2398.591796875, |
|
"logps/rejected": -2171.51611328125, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5373933911323547, |
|
"rewards/margins": 0.3850085139274597, |
|
"rewards/rejected": 0.15238483250141144, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.599563836270564e-07, |
|
"logits/chosen": -1.3736032247543335, |
|
"logits/rejected": -1.3715035915374756, |
|
"logps/chosen": -2091.04345703125, |
|
"logps/rejected": -1912.631591796875, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.41586917638778687, |
|
"rewards/margins": 0.2734389901161194, |
|
"rewards/rejected": 0.14243023097515106, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.55958505588409e-07, |
|
"logits/chosen": -1.2896828651428223, |
|
"logits/rejected": -1.27872896194458, |
|
"logps/chosen": -2430.079345703125, |
|
"logps/rejected": -1943.730224609375, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5126298069953918, |
|
"rewards/margins": 0.3756958842277527, |
|
"rewards/rejected": 0.13693387806415558, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.519810049443152e-07, |
|
"logits/chosen": -1.4509341716766357, |
|
"logits/rejected": -1.3884532451629639, |
|
"logps/chosen": -3016.3662109375, |
|
"logps/rejected": -2212.178466796875, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7909504175186157, |
|
"rewards/margins": 0.5453279614448547, |
|
"rewards/rejected": 0.24562236666679382, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.4802421381493405e-07, |
|
"logits/chosen": -1.3750033378601074, |
|
"logits/rejected": -1.3092578649520874, |
|
"logps/chosen": -2432.66455078125, |
|
"logps/rejected": -1898.987548828125, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6120018362998962, |
|
"rewards/margins": 0.7680531144142151, |
|
"rewards/rejected": -0.15605124831199646, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.440884625911861e-07, |
|
"logits/chosen": -1.446340560913086, |
|
"logits/rejected": -1.4364707469940186, |
|
"logps/chosen": -2399.11572265625, |
|
"logps/rejected": -2216.21240234375, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7894426584243774, |
|
"rewards/margins": 0.5115488767623901, |
|
"rewards/rejected": 0.2778938412666321, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.4017407990716597e-07, |
|
"logits/chosen": -1.3791276216506958, |
|
"logits/rejected": -1.3035521507263184, |
|
"logps/chosen": -2603.680908203125, |
|
"logps/rejected": -2026.604248046875, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9257089495658875, |
|
"rewards/margins": 0.778711199760437, |
|
"rewards/rejected": 0.14699774980545044, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -1.2938450574874878, |
|
"eval_logits/rejected": -1.2622296810150146, |
|
"eval_logps/chosen": -2606.926025390625, |
|
"eval_logps/rejected": -2210.395751953125, |
|
"eval_loss": 0.6327735781669617, |
|
"eval_rewards/accuracies": 0.6639999747276306, |
|
"eval_rewards/chosen": 0.6584945917129517, |
|
"eval_rewards/margins": 0.45728600025177, |
|
"eval_rewards/rejected": 0.20120853185653687, |
|
"eval_runtime": 277.1566, |
|
"eval_samples_per_second": 7.216, |
|
"eval_steps_per_second": 0.451, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.3628139261270135e-07, |
|
"logits/chosen": -1.3448095321655273, |
|
"logits/rejected": -1.2422099113464355, |
|
"logps/chosen": -3200.04736328125, |
|
"logps/rejected": -2413.40673828125, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6920865774154663, |
|
"rewards/margins": 0.627841055393219, |
|
"rewards/rejected": 0.06424557417631149, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.3241072574606102e-07, |
|
"logits/chosen": -1.4279682636260986, |
|
"logits/rejected": -1.3650823831558228, |
|
"logps/chosen": -2858.18310546875, |
|
"logps/rejected": -2436.86474609375, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9761995077133179, |
|
"rewards/margins": 0.4037070870399475, |
|
"rewards/rejected": 0.5724924802780151, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.285624025068143e-07, |
|
"logits/chosen": -1.4990990161895752, |
|
"logits/rejected": -1.4075102806091309, |
|
"logps/chosen": -2588.497802734375, |
|
"logps/rejected": -1992.470703125, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8248246312141418, |
|
"rewards/margins": 0.41641107201576233, |
|
"rewards/rejected": 0.40841349959373474, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.247367442288446e-07, |
|
"logits/chosen": -1.4395039081573486, |
|
"logits/rejected": -1.3425724506378174, |
|
"logps/chosen": -2676.036376953125, |
|
"logps/rejected": -1750.607177734375, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.0520992279052734, |
|
"rewards/margins": 0.9028045535087585, |
|
"rewards/rejected": 0.14929473400115967, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.209340703535169e-07, |
|
"logits/chosen": -1.4604949951171875, |
|
"logits/rejected": -1.349486231803894, |
|
"logps/chosen": -2208.95947265625, |
|
"logps/rejected": -1485.8690185546875, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6709630489349365, |
|
"rewards/margins": 0.3589937090873718, |
|
"rewards/rejected": 0.3119693398475647, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.171546984030056e-07, |
|
"logits/chosen": -1.4167903661727905, |
|
"logits/rejected": -1.3515651226043701, |
|
"logps/chosen": -2376.03857421875, |
|
"logps/rejected": -1771.710205078125, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8238846063613892, |
|
"rewards/margins": 0.5126960277557373, |
|
"rewards/rejected": 0.31118854880332947, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.1339894395378067e-07, |
|
"logits/chosen": -1.4049344062805176, |
|
"logits/rejected": -1.3172833919525146, |
|
"logps/chosen": -2733.88330078125, |
|
"logps/rejected": -2381.346923828125, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9343398213386536, |
|
"rewards/margins": 0.8638205528259277, |
|
"rewards/rejected": 0.0705193430185318, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.096671206102582e-07, |
|
"logits/chosen": -1.4337115287780762, |
|
"logits/rejected": -1.318379521369934, |
|
"logps/chosen": -2679.900390625, |
|
"logps/rejected": -1911.437255859375, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6781307458877563, |
|
"rewards/margins": 0.644757866859436, |
|
"rewards/rejected": 0.03337289020419121, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.0595953997861326e-07, |
|
"logits/chosen": -1.4050424098968506, |
|
"logits/rejected": -1.3058971166610718, |
|
"logps/chosen": -2156.65185546875, |
|
"logps/rejected": -1914.407958984375, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.6217959523200989, |
|
"rewards/margins": 0.3280433416366577, |
|
"rewards/rejected": 0.2937525808811188, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.0227651164076153e-07, |
|
"logits/chosen": -1.4374796152114868, |
|
"logits/rejected": -1.432049036026001, |
|
"logps/chosen": -2434.140380859375, |
|
"logps/rejected": -1971.261474609375, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6336767673492432, |
|
"rewards/margins": 0.5388184785842896, |
|
"rewards/rejected": 0.09485818445682526, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.3635029792785645, |
|
"eval_logits/rejected": -1.331161618232727, |
|
"eval_logps/chosen": -2587.720947265625, |
|
"eval_logps/rejected": -2192.89892578125, |
|
"eval_loss": 0.6305412650108337, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.8505436778068542, |
|
"eval_rewards/margins": 0.474366158246994, |
|
"eval_rewards/rejected": 0.376177579164505, |
|
"eval_runtime": 278.5163, |
|
"eval_samples_per_second": 7.181, |
|
"eval_steps_per_second": 0.449, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.986183431285095e-07, |
|
"logits/chosen": -1.4380724430084229, |
|
"logits/rejected": -1.3568377494812012, |
|
"logps/chosen": -2706.828857421875, |
|
"logps/rejected": -2156.448486328125, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.1412960290908813, |
|
"rewards/margins": 0.6656811833381653, |
|
"rewards/rejected": 0.4756149351596832, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.9498533989787508e-07, |
|
"logits/chosen": -1.4406335353851318, |
|
"logits/rejected": -1.4548825025558472, |
|
"logps/chosen": -2395.5869140625, |
|
"logps/rejected": -2094.87353515625, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9250233769416809, |
|
"rewards/margins": 0.521003782749176, |
|
"rewards/rejected": 0.4040195345878601, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.9137780530358255e-07, |
|
"logits/chosen": -1.4243742227554321, |
|
"logits/rejected": -1.3397035598754883, |
|
"logps/chosen": -2837.8330078125, |
|
"logps/rejected": -2148.09423828125, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0236155986785889, |
|
"rewards/margins": 0.5235460996627808, |
|
"rewards/rejected": 0.5000696182250977, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8779604057373232e-07, |
|
"logits/chosen": -1.4576950073242188, |
|
"logits/rejected": -1.3922784328460693, |
|
"logps/chosen": -2905.541259765625, |
|
"logps/rejected": -2111.591064453125, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.0767219066619873, |
|
"rewards/margins": 0.7249119281768799, |
|
"rewards/rejected": 0.35180991888046265, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.842403447846485e-07, |
|
"logits/chosen": -1.3431110382080078, |
|
"logits/rejected": -1.3422303199768066, |
|
"logps/chosen": -2476.67431640625, |
|
"logps/rejected": -1822.1331787109375, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.6926653385162354, |
|
"rewards/margins": 0.35034242272377014, |
|
"rewards/rejected": 0.34232297539711, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8071101483590657e-07, |
|
"logits/chosen": -1.4735561609268188, |
|
"logits/rejected": -1.4546959400177002, |
|
"logps/chosen": -2374.36376953125, |
|
"logps/rejected": -1975.539794921875, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.792915940284729, |
|
"rewards/margins": 0.5957690477371216, |
|
"rewards/rejected": 0.19714678823947906, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.772083454255413e-07, |
|
"logits/chosen": -1.4286420345306396, |
|
"logits/rejected": -1.3648045063018799, |
|
"logps/chosen": -3039.596435546875, |
|
"logps/rejected": -2663.343017578125, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8077454566955566, |
|
"rewards/margins": 0.41170400381088257, |
|
"rewards/rejected": 0.3960413932800293, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7373262902544057e-07, |
|
"logits/chosen": -1.3764533996582031, |
|
"logits/rejected": -1.3593838214874268, |
|
"logps/chosen": -1988.8544921875, |
|
"logps/rejected": -1976.740966796875, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4973524212837219, |
|
"rewards/margins": 0.3927370011806488, |
|
"rewards/rejected": 0.10461540520191193, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7028415585692335e-07, |
|
"logits/chosen": -1.4325544834136963, |
|
"logits/rejected": -1.3712177276611328, |
|
"logps/chosen": -2467.70849609375, |
|
"logps/rejected": -1956.5791015625, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5883597135543823, |
|
"rewards/margins": 0.17649570107460022, |
|
"rewards/rejected": 0.41186395287513733, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.668632138665071e-07, |
|
"logits/chosen": -1.4749171733856201, |
|
"logits/rejected": -1.4637329578399658, |
|
"logps/chosen": -2227.57861328125, |
|
"logps/rejected": -2143.876220703125, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6317578554153442, |
|
"rewards/margins": 0.4265865683555603, |
|
"rewards/rejected": 0.20517130196094513, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": -1.3840327262878418, |
|
"eval_logits/rejected": -1.3491802215576172, |
|
"eval_logps/chosen": -2607.56591796875, |
|
"eval_logps/rejected": -2207.613037109375, |
|
"eval_loss": 0.6310118436813354, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": 0.6520929932594299, |
|
"eval_rewards/margins": 0.423054039478302, |
|
"eval_rewards/rejected": 0.22903895378112793, |
|
"eval_runtime": 272.7578, |
|
"eval_samples_per_second": 7.333, |
|
"eval_steps_per_second": 0.458, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6347008870186346e-07, |
|
"logits/chosen": -1.5404746532440186, |
|
"logits/rejected": -1.529778242111206, |
|
"logps/chosen": -1933.6298828125, |
|
"logps/rejected": -1536.895263671875, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5152641534805298, |
|
"rewards/margins": 0.40403586626052856, |
|
"rewards/rejected": 0.11122828722000122, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6010506368796718e-07, |
|
"logits/chosen": -1.4943128824234009, |
|
"logits/rejected": -1.456924319267273, |
|
"logps/chosen": -2417.9599609375, |
|
"logps/rejected": -2162.760009765625, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7175670862197876, |
|
"rewards/margins": 0.277101993560791, |
|
"rewards/rejected": 0.4404650628566742, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5676841980343852e-07, |
|
"logits/chosen": -1.3981895446777344, |
|
"logits/rejected": -1.4527111053466797, |
|
"logps/chosen": -2597.65966796875, |
|
"logps/rejected": -2179.39794921875, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7483694553375244, |
|
"rewards/margins": 0.6122652292251587, |
|
"rewards/rejected": 0.13610415160655975, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5346043565708167e-07, |
|
"logits/chosen": -1.3840751647949219, |
|
"logits/rejected": -1.3206273317337036, |
|
"logps/chosen": -3326.64208984375, |
|
"logps/rejected": -2419.403564453125, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9665316343307495, |
|
"rewards/margins": 0.39615345001220703, |
|
"rewards/rejected": 0.570378303527832, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5018138746462077e-07, |
|
"logits/chosen": -1.3687031269073486, |
|
"logits/rejected": -1.3737823963165283, |
|
"logps/chosen": -2446.22119140625, |
|
"logps/rejected": -2089.50146484375, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6719570755958557, |
|
"rewards/margins": 0.43817195296287537, |
|
"rewards/rejected": 0.23378506302833557, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4693154902563642e-07, |
|
"logits/chosen": -1.4316002130508423, |
|
"logits/rejected": -1.4517178535461426, |
|
"logps/chosen": -2900.650146484375, |
|
"logps/rejected": -2507.39111328125, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0101929903030396, |
|
"rewards/margins": 0.6262551546096802, |
|
"rewards/rejected": 0.3839378356933594, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4371119170070273e-07, |
|
"logits/chosen": -1.3841904401779175, |
|
"logits/rejected": -1.363883137702942, |
|
"logps/chosen": -1948.8519287109375, |
|
"logps/rejected": -1932.33984375, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.4831606447696686, |
|
"rewards/margins": 0.17371401190757751, |
|
"rewards/rejected": 0.3094465136528015, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4052058438873004e-07, |
|
"logits/chosen": -1.416628122329712, |
|
"logits/rejected": -1.4233081340789795, |
|
"logps/chosen": -2243.0478515625, |
|
"logps/rejected": -1969.6051025390625, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6370882987976074, |
|
"rewards/margins": 0.44900625944137573, |
|
"rewards/rejected": 0.18808197975158691, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.3735999350451043e-07, |
|
"logits/chosen": -1.4408037662506104, |
|
"logits/rejected": -1.404234528541565, |
|
"logps/chosen": -2907.764892578125, |
|
"logps/rejected": -2352.541259765625, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8572597503662109, |
|
"rewards/margins": 0.5889304280281067, |
|
"rewards/rejected": 0.26832932233810425, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3422968295647325e-07, |
|
"logits/chosen": -1.3623632192611694, |
|
"logits/rejected": -1.3807958364486694, |
|
"logps/chosen": -2706.734375, |
|
"logps/rejected": -2360.01220703125, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5788125395774841, |
|
"rewards/margins": 0.5309603810310364, |
|
"rewards/rejected": 0.04785219207406044, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.3678345680236816, |
|
"eval_logits/rejected": -1.3329540491104126, |
|
"eval_logps/chosen": -2602.423828125, |
|
"eval_logps/rejected": -2204.72509765625, |
|
"eval_loss": 0.6291071176528931, |
|
"eval_rewards/accuracies": 0.6520000100135803, |
|
"eval_rewards/chosen": 0.7035152912139893, |
|
"eval_rewards/margins": 0.4455997347831726, |
|
"eval_rewards/rejected": 0.2579156160354614, |
|
"eval_runtime": 270.6778, |
|
"eval_samples_per_second": 7.389, |
|
"eval_steps_per_second": 0.462, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3112991412464825e-07, |
|
"logits/chosen": -1.4341932535171509, |
|
"logits/rejected": -1.3131691217422485, |
|
"logps/chosen": -2790.000244140625, |
|
"logps/rejected": -2364.25634765625, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8394748568534851, |
|
"rewards/margins": 0.5635267496109009, |
|
"rewards/rejected": 0.2759481370449066, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2806094583884114e-07, |
|
"logits/chosen": -1.4048922061920166, |
|
"logits/rejected": -1.338818907737732, |
|
"logps/chosen": -2550.039306640625, |
|
"logps/rejected": -2193.238037109375, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.9174189567565918, |
|
"rewards/margins": 0.5785583257675171, |
|
"rewards/rejected": 0.3388606011867523, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2502303435702043e-07, |
|
"logits/chosen": -1.365466594696045, |
|
"logits/rejected": -1.372934103012085, |
|
"logps/chosen": -2571.452880859375, |
|
"logps/rejected": -2407.875244140625, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7644615173339844, |
|
"rewards/margins": 0.3260182738304138, |
|
"rewards/rejected": 0.43844324350357056, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.2201643334392082e-07, |
|
"logits/chosen": -1.4473758935928345, |
|
"logits/rejected": -1.4218379259109497, |
|
"logps/chosen": -2538.86669921875, |
|
"logps/rejected": -2070.79150390625, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5745121836662292, |
|
"rewards/margins": 0.16391856968402863, |
|
"rewards/rejected": 0.41059359908103943, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1904139384986123e-07, |
|
"logits/chosen": -1.490330696105957, |
|
"logits/rejected": -1.4432008266448975, |
|
"logps/chosen": -3188.41455078125, |
|
"logps/rejected": -2706.665283203125, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.0437430143356323, |
|
"rewards/margins": 0.7445145845413208, |
|
"rewards/rejected": 0.2992284893989563, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1609816428978359e-07, |
|
"logits/chosen": -1.4559834003448486, |
|
"logits/rejected": -1.420280933380127, |
|
"logps/chosen": -2551.58740234375, |
|
"logps/rejected": -2584.430419921875, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8050218820571899, |
|
"rewards/margins": 0.5264039635658264, |
|
"rewards/rejected": 0.2786179184913635, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1318699042250918e-07, |
|
"logits/chosen": -1.3969361782073975, |
|
"logits/rejected": -1.3813354969024658, |
|
"logps/chosen": -2994.219970703125, |
|
"logps/rejected": -3177.122802734375, |
|
"loss": 0.7049, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7557341456413269, |
|
"rewards/margins": 0.26294729113578796, |
|
"rewards/rejected": 0.4927869439125061, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.10308115330218e-07, |
|
"logits/chosen": -1.43484365940094, |
|
"logits/rejected": -1.3891115188598633, |
|
"logps/chosen": -3088.582275390625, |
|
"logps/rejected": -2204.107666015625, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.45172542333602905, |
|
"rewards/margins": 0.5553884506225586, |
|
"rewards/rejected": -0.10366306453943253, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0746177939815171e-07, |
|
"logits/chosen": -1.4643625020980835, |
|
"logits/rejected": -1.4422532320022583, |
|
"logps/chosen": -2440.419921875, |
|
"logps/rejected": -1975.225830078125, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.4540809094905853, |
|
"rewards/margins": 0.652599573135376, |
|
"rewards/rejected": -0.19851863384246826, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0464822029454179e-07, |
|
"logits/chosen": -1.4160230159759521, |
|
"logits/rejected": -1.3704195022583008, |
|
"logps/chosen": -2510.39892578125, |
|
"logps/rejected": -2070.017333984375, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3841037154197693, |
|
"rewards/margins": 0.3324592113494873, |
|
"rewards/rejected": 0.05164450407028198, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -1.3498371839523315, |
|
"eval_logits/rejected": -1.3173154592514038, |
|
"eval_logps/chosen": -2618.25341796875, |
|
"eval_logps/rejected": -2218.29443359375, |
|
"eval_loss": 0.6310141086578369, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.5452163815498352, |
|
"eval_rewards/margins": 0.42299649119377136, |
|
"eval_rewards/rejected": 0.12221993505954742, |
|
"eval_runtime": 280.1277, |
|
"eval_samples_per_second": 7.14, |
|
"eval_steps_per_second": 0.446, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0186767295076359e-07, |
|
"logits/chosen": -1.2851413488388062, |
|
"logits/rejected": -1.2941486835479736, |
|
"logps/chosen": -2658.156982421875, |
|
"logps/rejected": -2545.443359375, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.700161337852478, |
|
"rewards/margins": 0.09443531185388565, |
|
"rewards/rejected": 0.6057260632514954, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.91203695417201e-08, |
|
"logits/chosen": -1.4478733539581299, |
|
"logits/rejected": -1.4418970346450806, |
|
"logps/chosen": -2017.9547119140625, |
|
"logps/rejected": -1859.371826171875, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4665658473968506, |
|
"rewards/margins": 0.1884872317314148, |
|
"rewards/rejected": 0.2780786454677582, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.640653946645527e-08, |
|
"logits/chosen": -1.4594916105270386, |
|
"logits/rejected": -1.3838837146759033, |
|
"logps/chosen": -2871.414794921875, |
|
"logps/rejected": -2458.6142578125, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8108320236206055, |
|
"rewards/margins": 0.5832007527351379, |
|
"rewards/rejected": 0.22763130068778992, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.372640932899962e-08, |
|
"logits/chosen": -1.389960527420044, |
|
"logits/rejected": -1.3612440824508667, |
|
"logps/chosen": -1948.0992431640625, |
|
"logps/rejected": -1892.2113037109375, |
|
"loss": 0.7257, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.27821439504623413, |
|
"rewards/margins": 0.045331284403800964, |
|
"rewards/rejected": 0.23288312554359436, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.108020291944835e-08, |
|
"logits/chosen": -1.418404221534729, |
|
"logits/rejected": -1.3830190896987915, |
|
"logps/chosen": -3158.57080078125, |
|
"logps/rejected": -2620.3515625, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.5857509970664978, |
|
"rewards/margins": 0.29750341176986694, |
|
"rewards/rejected": 0.28824761509895325, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.84681411952749e-08, |
|
"logits/chosen": -1.4332993030548096, |
|
"logits/rejected": -1.414900302886963, |
|
"logps/chosen": -2372.030517578125, |
|
"logps/rejected": -2132.096435546875, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.41337814927101135, |
|
"rewards/margins": 0.3938220739364624, |
|
"rewards/rejected": 0.019556106999516487, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.589044226288156e-08, |
|
"logits/chosen": -1.4317436218261719, |
|
"logits/rejected": -1.347076177597046, |
|
"logps/chosen": -2397.37548828125, |
|
"logps/rejected": -2127.24951171875, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4847371578216553, |
|
"rewards/margins": 0.4483153820037842, |
|
"rewards/rejected": 0.03642178699374199, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.334732135938761e-08, |
|
"logits/chosen": -1.2719449996948242, |
|
"logits/rejected": -1.2198649644851685, |
|
"logps/chosen": -2358.66259765625, |
|
"logps/rejected": -2044.496826171875, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3985385596752167, |
|
"rewards/margins": 0.4965389370918274, |
|
"rewards/rejected": -0.09800038486719131, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.08389908346565e-08, |
|
"logits/chosen": -1.5145864486694336, |
|
"logits/rejected": -1.4807628393173218, |
|
"logps/chosen": -2540.138916015625, |
|
"logps/rejected": -2373.93701171875, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4005827009677887, |
|
"rewards/margins": 0.17770811915397644, |
|
"rewards/rejected": 0.22287459671497345, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.836566013356521e-08, |
|
"logits/chosen": -1.2830257415771484, |
|
"logits/rejected": -1.309693694114685, |
|
"logps/chosen": -2139.562255859375, |
|
"logps/rejected": -1838.9498291015625, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.2404891550540924, |
|
"rewards/margins": 0.255484402179718, |
|
"rewards/rejected": -0.014995294623076916, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.3759506940841675, |
|
"eval_logits/rejected": -1.34440279006958, |
|
"eval_logps/chosen": -2639.503173828125, |
|
"eval_logps/rejected": -2235.78515625, |
|
"eval_loss": 0.637482225894928, |
|
"eval_rewards/accuracies": 0.6539999842643738, |
|
"eval_rewards/chosen": 0.3327209949493408, |
|
"eval_rewards/margins": 0.38540521264076233, |
|
"eval_rewards/rejected": -0.05268419533967972, |
|
"eval_runtime": 278.5127, |
|
"eval_samples_per_second": 7.181, |
|
"eval_steps_per_second": 0.449, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.59275357785154e-08, |
|
"logits/chosen": -1.4814903736114502, |
|
"logits/rejected": -1.3584920167922974, |
|
"logps/chosen": -2739.283935546875, |
|
"logps/rejected": -1941.9361572265625, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.32887548208236694, |
|
"rewards/margins": 0.31491416692733765, |
|
"rewards/rejected": 0.01396133191883564, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.352482135218929e-08, |
|
"logits/chosen": -1.497166633605957, |
|
"logits/rejected": -1.419870138168335, |
|
"logps/chosen": -2452.299072265625, |
|
"logps/rejected": -1719.0814208984375, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4226195812225342, |
|
"rewards/margins": 0.5035466551780701, |
|
"rewards/rejected": -0.08092708140611649, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.115771748054994e-08, |
|
"logits/chosen": -1.4832055568695068, |
|
"logits/rejected": -1.4479320049285889, |
|
"logps/chosen": -2702.10791015625, |
|
"logps/rejected": -2256.0849609375, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7271806001663208, |
|
"rewards/margins": 0.25195473432540894, |
|
"rewards/rejected": 0.47522586584091187, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.882642181608938e-08, |
|
"logits/chosen": -1.3810951709747314, |
|
"logits/rejected": -1.2905547618865967, |
|
"logps/chosen": -2894.007080078125, |
|
"logps/rejected": -2063.866943359375, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7991534471511841, |
|
"rewards/margins": 0.7913631200790405, |
|
"rewards/rejected": 0.007790341041982174, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.653112902132468e-08, |
|
"logits/chosen": -1.3729079961776733, |
|
"logits/rejected": -1.3389288187026978, |
|
"logps/chosen": -2688.36376953125, |
|
"logps/rejected": -2399.32763671875, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.43051600456237793, |
|
"rewards/margins": 0.23889867961406708, |
|
"rewards/rejected": 0.19161732494831085, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.427203075254389e-08, |
|
"logits/chosen": -1.4705748558044434, |
|
"logits/rejected": -1.4179273843765259, |
|
"logps/chosen": -3436.260986328125, |
|
"logps/rejected": -2933.96923828125, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7826542854309082, |
|
"rewards/margins": 1.2829375267028809, |
|
"rewards/rejected": -0.5002831220626831, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.204931564380212e-08, |
|
"logits/chosen": -1.477654218673706, |
|
"logits/rejected": -1.4244531393051147, |
|
"logps/chosen": -2670.315673828125, |
|
"logps/rejected": -2377.605712890625, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8277036547660828, |
|
"rewards/margins": 0.34663817286491394, |
|
"rewards/rejected": 0.48106545209884644, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.98631692911713e-08, |
|
"logits/chosen": -1.3127539157867432, |
|
"logits/rejected": -1.3314340114593506, |
|
"logps/chosen": -2312.9111328125, |
|
"logps/rejected": -2017.139404296875, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5287872552871704, |
|
"rewards/margins": 0.13690263032913208, |
|
"rewards/rejected": 0.39188462495803833, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.7713774237242716e-08, |
|
"logits/chosen": -1.291377305984497, |
|
"logits/rejected": -1.405045747756958, |
|
"logps/chosen": -1881.165283203125, |
|
"logps/rejected": -1862.961181640625, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5566512942314148, |
|
"rewards/margins": 0.4035774767398834, |
|
"rewards/rejected": 0.1530737578868866, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.5601309955884965e-08, |
|
"logits/chosen": -1.4422776699066162, |
|
"logits/rejected": -1.4091880321502686, |
|
"logps/chosen": -2665.621337890625, |
|
"logps/rejected": -2591.151123046875, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6587532758712769, |
|
"rewards/margins": 0.12101428210735321, |
|
"rewards/rejected": 0.5377389788627625, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -1.3585261106491089, |
|
"eval_logits/rejected": -1.3240634202957153, |
|
"eval_logps/chosen": -2599.5048828125, |
|
"eval_logps/rejected": -2201.557861328125, |
|
"eval_loss": 0.6269048452377319, |
|
"eval_rewards/accuracies": 0.6539999842643738, |
|
"eval_rewards/chosen": 0.7327041625976562, |
|
"eval_rewards/margins": 0.4431154131889343, |
|
"eval_rewards/rejected": 0.2895888090133667, |
|
"eval_runtime": 266.5396, |
|
"eval_samples_per_second": 7.504, |
|
"eval_steps_per_second": 0.469, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.352595283725758e-08, |
|
"logits/chosen": -1.3561054468154907, |
|
"logits/rejected": -1.364051103591919, |
|
"logps/chosen": -2751.002197265625, |
|
"logps/rejected": -2776.696044921875, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9600070714950562, |
|
"rewards/margins": 0.5443626642227173, |
|
"rewards/rejected": 0.41564440727233887, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.1487876173082704e-08, |
|
"logits/chosen": -1.3982620239257812, |
|
"logits/rejected": -1.3981740474700928, |
|
"logps/chosen": -2878.243408203125, |
|
"logps/rejected": -2694.689208984375, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.723610520362854, |
|
"rewards/margins": 0.2514679431915283, |
|
"rewards/rejected": 0.4721425473690033, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.948725014217514e-08, |
|
"logits/chosen": -1.425713300704956, |
|
"logits/rejected": -1.3950655460357666, |
|
"logps/chosen": -3086.80078125, |
|
"logps/rejected": -2376.532958984375, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8816632032394409, |
|
"rewards/margins": 0.4414462447166443, |
|
"rewards/rejected": 0.44021695852279663, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.752424179623299e-08, |
|
"logits/chosen": -1.4097102880477905, |
|
"logits/rejected": -1.3888671398162842, |
|
"logps/chosen": -2709.783935546875, |
|
"logps/rejected": -2628.78759765625, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5886452794075012, |
|
"rewards/margins": 0.1681179255247116, |
|
"rewards/rejected": 0.4205273687839508, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.559901504588809e-08, |
|
"logits/chosen": -1.210430383682251, |
|
"logits/rejected": -1.2701146602630615, |
|
"logps/chosen": -2500.774169921875, |
|
"logps/rejected": -2351.170166015625, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.28235870599746704, |
|
"rewards/margins": 0.1418120115995407, |
|
"rewards/rejected": 0.14054664969444275, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.371173064702011e-08, |
|
"logits/chosen": -1.3905829191207886, |
|
"logits/rejected": -1.3594163656234741, |
|
"logps/chosen": -1871.867919921875, |
|
"logps/rejected": -2297.736328125, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.32014450430870056, |
|
"rewards/margins": 0.10169104486703873, |
|
"rewards/rejected": 0.21845343708992004, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.1862546187333145e-08, |
|
"logits/chosen": -1.4513777494430542, |
|
"logits/rejected": -1.4358501434326172, |
|
"logps/chosen": -2513.241455078125, |
|
"logps/rejected": -2141.718994140625, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5060147047042847, |
|
"rewards/margins": 0.21100831031799316, |
|
"rewards/rejected": 0.2950064539909363, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.005161607319746e-08, |
|
"logits/chosen": -1.4373111724853516, |
|
"logits/rejected": -1.399320363998413, |
|
"logps/chosen": -2743.34814453125, |
|
"logps/rejected": -2631.894775390625, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6500831246376038, |
|
"rewards/margins": 0.30057162046432495, |
|
"rewards/rejected": 0.3495115339756012, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.827909151675651e-08, |
|
"logits/chosen": -1.281432867050171, |
|
"logits/rejected": -1.2471529245376587, |
|
"logps/chosen": -2523.722412109375, |
|
"logps/rejected": -2145.090087890625, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4543123245239258, |
|
"rewards/margins": 0.2866598069667816, |
|
"rewards/rejected": 0.16765250265598297, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6545120523300554e-08, |
|
"logits/chosen": -1.2581170797348022, |
|
"logits/rejected": -1.2861872911453247, |
|
"logps/chosen": -2385.53515625, |
|
"logps/rejected": -2062.32470703125, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6952205300331116, |
|
"rewards/margins": 0.453753799200058, |
|
"rewards/rejected": 0.24146680533885956, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -1.3370972871780396, |
|
"eval_logits/rejected": -1.3038172721862793, |
|
"eval_logps/chosen": -2603.77392578125, |
|
"eval_logps/rejected": -2204.74365234375, |
|
"eval_loss": 0.6270672678947449, |
|
"eval_rewards/accuracies": 0.656000018119812, |
|
"eval_rewards/chosen": 0.6900160312652588, |
|
"eval_rewards/margins": 0.4322858154773712, |
|
"eval_rewards/rejected": 0.2577301859855652, |
|
"eval_runtime": 274.6396, |
|
"eval_samples_per_second": 7.282, |
|
"eval_steps_per_second": 0.455, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.484984787890854e-08, |
|
"logits/chosen": -1.440553903579712, |
|
"logits/rejected": -1.4239243268966675, |
|
"logps/chosen": -2027.2261962890625, |
|
"logps/rejected": -2211.42333984375, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5019555687904358, |
|
"rewards/margins": 0.24998077750205994, |
|
"rewards/rejected": 0.25197476148605347, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.3193415138358605e-08, |
|
"logits/chosen": -1.3577989339828491, |
|
"logits/rejected": -1.3291637897491455, |
|
"logps/chosen": -2733.109375, |
|
"logps/rejected": -2187.62939453125, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5953333377838135, |
|
"rewards/margins": 0.41831302642822266, |
|
"rewards/rejected": 0.1770203560590744, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.1575960613307697e-08, |
|
"logits/chosen": -1.3546245098114014, |
|
"logits/rejected": -1.3422510623931885, |
|
"logps/chosen": -3364.149169921875, |
|
"logps/rejected": -2881.01806640625, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7074936628341675, |
|
"rewards/margins": 0.3585987389087677, |
|
"rewards/rejected": 0.348894864320755, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.99976193607433e-08, |
|
"logits/chosen": -1.4342143535614014, |
|
"logits/rejected": -1.3803553581237793, |
|
"logps/chosen": -2840.54345703125, |
|
"logps/rejected": -2618.3125, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7095770835876465, |
|
"rewards/margins": 0.4237367510795593, |
|
"rewards/rejected": 0.28584036231040955, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8458523171705606e-08, |
|
"logits/chosen": -1.3679982423782349, |
|
"logits/rejected": -1.3952059745788574, |
|
"logps/chosen": -2528.96484375, |
|
"logps/rejected": -1968.1148681640625, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5186874270439148, |
|
"rewards/margins": 0.28269147872924805, |
|
"rewards/rejected": 0.23599597811698914, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6958800560283766e-08, |
|
"logits/chosen": -1.2991187572479248, |
|
"logits/rejected": -1.2987558841705322, |
|
"logps/chosen": -1767.0989990234375, |
|
"logps/rejected": -1917.181396484375, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.22372308373451233, |
|
"rewards/margins": 0.1914294809103012, |
|
"rewards/rejected": 0.032293595373630524, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.5498576752884083e-08, |
|
"logits/chosen": -1.2810406684875488, |
|
"logits/rejected": -1.2834056615829468, |
|
"logps/chosen": -2079.9150390625, |
|
"logps/rejected": -1688.689208984375, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3866010308265686, |
|
"rewards/margins": 0.4304170608520508, |
|
"rewards/rejected": -0.04381602257490158, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.4077973677774255e-08, |
|
"logits/chosen": -1.3817551136016846, |
|
"logits/rejected": -1.3114452362060547, |
|
"logps/chosen": -2581.08251953125, |
|
"logps/rejected": -2052.685302734375, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6486982107162476, |
|
"rewards/margins": 0.4967314600944519, |
|
"rewards/rejected": 0.15196672081947327, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2697109954902262e-08, |
|
"logits/chosen": -1.3522804975509644, |
|
"logits/rejected": -1.3641847372055054, |
|
"logps/chosen": -2173.435302734375, |
|
"logps/rejected": -2246.341064453125, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.4131731390953064, |
|
"rewards/margins": 0.2463291585445404, |
|
"rewards/rejected": 0.1668439656496048, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.13561008859916e-08, |
|
"logits/chosen": -1.3085618019104004, |
|
"logits/rejected": -1.340914249420166, |
|
"logps/chosen": -2336.463134765625, |
|
"logps/rejected": -1888.4931640625, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.40662437677383423, |
|
"rewards/margins": 0.2938459515571594, |
|
"rewards/rejected": 0.11277846246957779, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.3320984840393066, |
|
"eval_logits/rejected": -1.299089789390564, |
|
"eval_logps/chosen": -2609.7431640625, |
|
"eval_logps/rejected": -2209.78271484375, |
|
"eval_loss": 0.6278749704360962, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.630323052406311, |
|
"eval_rewards/margins": 0.4229816198348999, |
|
"eval_rewards/rejected": 0.20734144747257233, |
|
"eval_runtime": 272.2354, |
|
"eval_samples_per_second": 7.347, |
|
"eval_steps_per_second": 0.459, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.0055058444913507e-08, |
|
"logits/chosen": -1.448785424232483, |
|
"logits/rejected": -1.4095611572265625, |
|
"logps/chosen": -2245.84423828125, |
|
"logps/rejected": -2347.550537109375, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6017327904701233, |
|
"rewards/margins": 0.3268904685974121, |
|
"rewards/rejected": 0.2748422920703888, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.879409126833753e-08, |
|
"logits/chosen": -1.4214824438095093, |
|
"logits/rejected": -1.410632610321045, |
|
"logps/chosen": -2336.942626953125, |
|
"logps/rejected": -2101.439697265625, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.49574050307273865, |
|
"rewards/margins": 0.32954609394073486, |
|
"rewards/rejected": 0.16619448363780975, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.757330464665996e-08, |
|
"logits/chosen": -1.3709776401519775, |
|
"logits/rejected": -1.3891581296920776, |
|
"logps/chosen": -2405.004638671875, |
|
"logps/rejected": -2451.75537109375, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5401250123977661, |
|
"rewards/margins": 0.3884989619255066, |
|
"rewards/rejected": 0.15162606537342072, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.639280051521241e-08, |
|
"logits/chosen": -1.5023291110992432, |
|
"logits/rejected": -1.4649862051010132, |
|
"logps/chosen": -2664.8759765625, |
|
"logps/rejected": -2538.817626953125, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4001461863517761, |
|
"rewards/margins": 0.20141109824180603, |
|
"rewards/rejected": 0.19873513281345367, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.525267744575015e-08, |
|
"logits/chosen": -1.3901126384735107, |
|
"logits/rejected": -1.394431710243225, |
|
"logps/chosen": -2564.427490234375, |
|
"logps/rejected": -2594.514404296875, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7964712381362915, |
|
"rewards/margins": 0.5120159387588501, |
|
"rewards/rejected": 0.28445538878440857, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4153030638221375e-08, |
|
"logits/chosen": -1.4207435846328735, |
|
"logits/rejected": -1.4046471118927002, |
|
"logps/chosen": -2954.137939453125, |
|
"logps/rejected": -2511.393798828125, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6611908674240112, |
|
"rewards/margins": 0.4354327321052551, |
|
"rewards/rejected": 0.2257581651210785, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.309395191281798e-08, |
|
"logits/chosen": -1.330440878868103, |
|
"logits/rejected": -1.348436713218689, |
|
"logps/chosen": -2192.97412109375, |
|
"logps/rejected": -2151.583740234375, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.434548556804657, |
|
"rewards/margins": 0.1627674400806427, |
|
"rewards/rejected": 0.27178114652633667, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.207552970230885e-08, |
|
"logits/chosen": -1.4331698417663574, |
|
"logits/rejected": -1.414426326751709, |
|
"logps/chosen": -2461.943115234375, |
|
"logps/rejected": -2080.1328125, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4848058819770813, |
|
"rewards/margins": 0.4457271099090576, |
|
"rewards/rejected": 0.0390787310898304, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1097849044655494e-08, |
|
"logits/chosen": -1.362441897392273, |
|
"logits/rejected": -1.3546955585479736, |
|
"logps/chosen": -2819.646728515625, |
|
"logps/rejected": -2652.50439453125, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5813437700271606, |
|
"rewards/margins": 0.29619497060775757, |
|
"rewards/rejected": 0.28514885902404785, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0160991575911382e-08, |
|
"logits/chosen": -1.226768970489502, |
|
"logits/rejected": -1.2340877056121826, |
|
"logps/chosen": -2563.304931640625, |
|
"logps/rejected": -2343.0185546875, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4969327449798584, |
|
"rewards/margins": 0.1737329661846161, |
|
"rewards/rejected": 0.3231998085975647, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.334847331047058, |
|
"eval_logits/rejected": -1.3028244972229004, |
|
"eval_logps/chosen": -2617.37744140625, |
|
"eval_logps/rejected": -2216.108154296875, |
|
"eval_loss": 0.6294077038764954, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.5539795756340027, |
|
"eval_rewards/margins": 0.40989428758621216, |
|
"eval_rewards/rejected": 0.14408528804779053, |
|
"eval_runtime": 270.7461, |
|
"eval_samples_per_second": 7.387, |
|
"eval_steps_per_second": 0.462, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.265035523405628e-09, |
|
"logits/chosen": -1.4237889051437378, |
|
"logits/rejected": -1.3327428102493286, |
|
"logps/chosen": -2901.97412109375, |
|
"logps/rejected": -2182.808349609375, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6185927391052246, |
|
"rewards/margins": 0.4138878285884857, |
|
"rewards/rejected": 0.20470492541790009, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.410055699210716e-09, |
|
"logits/chosen": -1.3188023567199707, |
|
"logits/rejected": -1.3039883375167847, |
|
"logps/chosen": -2117.862060546875, |
|
"logps/rejected": -2145.378662109375, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2947086691856384, |
|
"rewards/margins": 0.19353394210338593, |
|
"rewards/rejected": 0.1011747345328331, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.59612349389599e-09, |
|
"logits/chosen": -1.3702406883239746, |
|
"logits/rejected": -1.3394161462783813, |
|
"logps/chosen": -2715.30615234375, |
|
"logps/rejected": -2510.065185546875, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7481818199157715, |
|
"rewards/margins": 0.404646635055542, |
|
"rewards/rejected": 0.3435351848602295, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.823306870566314e-09, |
|
"logits/chosen": -1.3175442218780518, |
|
"logits/rejected": -1.3016589879989624, |
|
"logps/chosen": -2922.791259765625, |
|
"logps/rejected": -2657.24951171875, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.4680928587913513, |
|
"rewards/margins": 0.19783690571784973, |
|
"rewards/rejected": 0.270255982875824, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.0916703591873396e-09, |
|
"logits/chosen": -1.373708724975586, |
|
"logits/rejected": -1.3260128498077393, |
|
"logps/chosen": -2837.0703125, |
|
"logps/rejected": -2214.2490234375, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.7062289118766785, |
|
"rewards/margins": 0.4836703836917877, |
|
"rewards/rejected": 0.22255854308605194, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.401275051197196e-09, |
|
"logits/chosen": -1.2850544452667236, |
|
"logits/rejected": -1.304872989654541, |
|
"logps/chosen": -2000.0989990234375, |
|
"logps/rejected": -1932.3131103515625, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4161587357521057, |
|
"rewards/margins": 0.26545530557632446, |
|
"rewards/rejected": 0.15070338547229767, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.752178594405465e-09, |
|
"logits/chosen": -1.2426577806472778, |
|
"logits/rejected": -1.2828842401504517, |
|
"logps/chosen": -2575.1416015625, |
|
"logps/rejected": -2630.06201171875, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5871464610099792, |
|
"rewards/margins": 0.4162687659263611, |
|
"rewards/rejected": 0.17087773978710175, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.144435188179529e-09, |
|
"logits/chosen": -1.380395531654358, |
|
"logits/rejected": -1.3460490703582764, |
|
"logps/chosen": -2178.73583984375, |
|
"logps/rejected": -1936.242919921875, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.585101306438446, |
|
"rewards/margins": 0.3356373906135559, |
|
"rewards/rejected": 0.24946394562721252, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5780955789187497e-09, |
|
"logits/chosen": -1.3782151937484741, |
|
"logits/rejected": -1.345668911933899, |
|
"logps/chosen": -3300.29248046875, |
|
"logps/rejected": -2721.07958984375, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9547792673110962, |
|
"rewards/margins": 0.46429508924484253, |
|
"rewards/rejected": 0.49048417806625366, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.0532070558177415e-09, |
|
"logits/chosen": -1.3277114629745483, |
|
"logits/rejected": -1.3181272745132446, |
|
"logps/chosen": -2194.970947265625, |
|
"logps/rejected": -1831.0181884765625, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3654665946960449, |
|
"rewards/margins": 0.46283870935440063, |
|
"rewards/rejected": -0.09737209975719452, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -1.3357877731323242, |
|
"eval_logits/rejected": -1.3033195734024048, |
|
"eval_logps/chosen": -2613.330322265625, |
|
"eval_logps/rejected": -2212.7783203125, |
|
"eval_loss": 0.6284996271133423, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": 0.5944509506225586, |
|
"eval_rewards/margins": 0.41706666350364685, |
|
"eval_rewards/rejected": 0.17738424241542816, |
|
"eval_runtime": 273.8684, |
|
"eval_samples_per_second": 7.303, |
|
"eval_steps_per_second": 0.456, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5698134469169243e-09, |
|
"logits/chosen": -1.3617829084396362, |
|
"logits/rejected": -1.394221544265747, |
|
"logps/chosen": -2001.156494140625, |
|
"logps/rejected": -1911.104736328125, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5259832143783569, |
|
"rewards/margins": 0.3205917775630951, |
|
"rewards/rejected": 0.20539140701293945, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.127955115443725e-09, |
|
"logits/chosen": -1.38681960105896, |
|
"logits/rejected": -1.381144404411316, |
|
"logps/chosen": -2056.17724609375, |
|
"logps/rejected": -1763.3297119140625, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6707687377929688, |
|
"rewards/margins": 0.6110190153121948, |
|
"rewards/rejected": 0.0597497932612896, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.727668956441497e-09, |
|
"logits/chosen": -1.3605945110321045, |
|
"logits/rejected": -1.3855431079864502, |
|
"logps/chosen": -2391.19384765625, |
|
"logps/rejected": -2493.66259765625, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.550477147102356, |
|
"rewards/margins": 0.11832934617996216, |
|
"rewards/rejected": 0.432147741317749, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3689883936894298e-09, |
|
"logits/chosen": -1.4387106895446777, |
|
"logits/rejected": -1.3756752014160156, |
|
"logps/chosen": -2820.1337890625, |
|
"logps/rejected": -2417.27783203125, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.8682245016098022, |
|
"rewards/margins": 0.6829289197921753, |
|
"rewards/rejected": 0.1852956861257553, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.051943376911224e-09, |
|
"logits/chosen": -1.4866057634353638, |
|
"logits/rejected": -1.391998529434204, |
|
"logps/chosen": -2453.385986328125, |
|
"logps/rejected": -1823.2886962890625, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4227697253227234, |
|
"rewards/margins": 0.3134082853794098, |
|
"rewards/rejected": 0.10936151444911957, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.765603792745934e-10, |
|
"logits/chosen": -1.4555871486663818, |
|
"logits/rejected": -1.450751543045044, |
|
"logps/chosen": -2521.285400390625, |
|
"logps/rejected": -2256.712890625, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5491132140159607, |
|
"rewards/margins": 0.3491145074367523, |
|
"rewards/rejected": 0.19999869167804718, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.428623951805322e-10, |
|
"logits/chosen": -1.2330322265625, |
|
"logits/rejected": -1.3336702585220337, |
|
"logps/chosen": -2287.126708984375, |
|
"logps/rejected": -2347.366455078125, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6386030316352844, |
|
"rewards/margins": 0.4102831780910492, |
|
"rewards/rejected": 0.22831980884075165, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.508689383435182e-10, |
|
"logits/chosen": -1.3702977895736694, |
|
"logits/rejected": -1.3696632385253906, |
|
"logps/chosen": -2384.869384765625, |
|
"logps/rejected": -2304.236083984375, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.36002689599990845, |
|
"rewards/margins": 0.4558481276035309, |
|
"rewards/rejected": -0.09582126140594482, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0059604016192665e-10, |
|
"logits/chosen": -1.4226309061050415, |
|
"logits/rejected": -1.3605704307556152, |
|
"logps/chosen": -2192.42529296875, |
|
"logps/rejected": -1818.756591796875, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3192385137081146, |
|
"rewards/margins": 0.25180768966674805, |
|
"rewards/rejected": 0.06743079423904419, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.205624837949066e-11, |
|
"logits/chosen": -1.3719966411590576, |
|
"logits/rejected": -1.394190788269043, |
|
"logps/chosen": -2545.56494140625, |
|
"logps/rejected": -2351.23876953125, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5859331488609314, |
|
"rewards/margins": 0.3292482793331146, |
|
"rewards/rejected": 0.25668492913246155, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.3356244564056396, |
|
"eval_logits/rejected": -1.3031599521636963, |
|
"eval_logps/chosen": -2612.92578125, |
|
"eval_logps/rejected": -2212.490234375, |
|
"eval_loss": 0.6283265948295593, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.5984972715377808, |
|
"eval_rewards/margins": 0.4182315766811371, |
|
"eval_rewards/rejected": 0.18026570975780487, |
|
"eval_runtime": 279.1253, |
|
"eval_samples_per_second": 7.165, |
|
"eval_steps_per_second": 0.448, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5258626037638618e-11, |
|
"logits/chosen": -1.3538461923599243, |
|
"logits/rejected": -1.344422698020935, |
|
"logps/chosen": -2360.3203125, |
|
"logps/rejected": -2012.327880859375, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3601759374141693, |
|
"rewards/margins": 0.11125414073467255, |
|
"rewards/rejected": 0.24892178177833557, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.087507185999371e-13, |
|
"logits/chosen": -1.4853910207748413, |
|
"logits/rejected": -1.4589704275131226, |
|
"logps/chosen": -2143.158935546875, |
|
"logps/rejected": -2006.003173828125, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.35302072763442993, |
|
"rewards/margins": 0.34298470616340637, |
|
"rewards/rejected": 0.010035954415798187, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6510803117330272, |
|
"train_runtime": 30377.0855, |
|
"train_samples_per_second": 2.013, |
|
"train_steps_per_second": 0.126 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|