|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -1.4607182741165161, |
|
"logits/rejected": -1.0577633380889893, |
|
"logps/chosen": -377.1839599609375, |
|
"logps/rejected": -1292.140625, |
|
"loss": 0.3828, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -1.4249842166900635, |
|
"logits/rejected": -1.0064045190811157, |
|
"logps/chosen": -603.1861572265625, |
|
"logps/rejected": -1303.3701171875, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 2.63900601567002e-05, |
|
"rewards/margins": 0.00017314580327365547, |
|
"rewards/rejected": -0.00014675571583211422, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -1.4733855724334717, |
|
"logits/rejected": -1.0931047201156616, |
|
"logps/chosen": -545.4578857421875, |
|
"logps/rejected": -1043.0272216796875, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0002783415839076042, |
|
"rewards/margins": -0.000188146106665954, |
|
"rewards/rejected": -9.019548451760784e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.6173245906829834, |
|
"logits/rejected": -1.0555126667022705, |
|
"logps/chosen": -654.3507080078125, |
|
"logps/rejected": -1315.276611328125, |
|
"loss": 0.3832, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0005331189604476094, |
|
"rewards/margins": 0.001845326623879373, |
|
"rewards/rejected": -0.0023784455843269825, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -1.3433172702789307, |
|
"logits/rejected": -0.6946144700050354, |
|
"logps/chosen": -665.9903564453125, |
|
"logps/rejected": -1348.9879150390625, |
|
"loss": 0.3634, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0011921643745154142, |
|
"rewards/margins": 0.0034048897214233875, |
|
"rewards/rejected": -0.004597053863108158, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.370167851448059, |
|
"logits/rejected": -0.7938201427459717, |
|
"logps/chosen": -664.8009033203125, |
|
"logps/rejected": -1172.046630859375, |
|
"loss": 0.3289, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0008570374920964241, |
|
"rewards/margins": 0.006419859826564789, |
|
"rewards/rejected": -0.007276897318661213, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.6229826211929321, |
|
"logits/rejected": -0.9315292239189148, |
|
"logps/chosen": -660.7051391601562, |
|
"logps/rejected": -1401.3045654296875, |
|
"loss": 0.3344, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.00019816956773865968, |
|
"rewards/margins": 0.020777523517608643, |
|
"rewards/rejected": -0.020975695922970772, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -1.5964797735214233, |
|
"logits/rejected": -0.636447548866272, |
|
"logps/chosen": -600.3419189453125, |
|
"logps/rejected": -1338.2294921875, |
|
"loss": 0.3384, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0026218045968562365, |
|
"rewards/margins": 0.03453027456998825, |
|
"rewards/rejected": -0.03190847486257553, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -1.6199884414672852, |
|
"logits/rejected": -0.8888322114944458, |
|
"logps/chosen": -589.5464477539062, |
|
"logps/rejected": -1346.308349609375, |
|
"loss": 0.3065, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.005537457764148712, |
|
"rewards/margins": 0.04564369469881058, |
|
"rewards/rejected": -0.05118114873766899, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -1.1931430101394653, |
|
"logits/rejected": -0.7813756465911865, |
|
"logps/chosen": -617.9501342773438, |
|
"logps/rejected": -1214.172119140625, |
|
"loss": 0.3072, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.029573017731308937, |
|
"rewards/margins": 0.08381669223308563, |
|
"rewards/rejected": -0.11338971555233002, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -1.1626551151275635, |
|
"logits/rejected": -0.36785703897476196, |
|
"logps/chosen": -628.2637329101562, |
|
"logps/rejected": -1500.320556640625, |
|
"loss": 0.2346, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11836127936840057, |
|
"rewards/margins": 0.16349823772907257, |
|
"rewards/rejected": -0.28185951709747314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -1.1234421730041504, |
|
"logits/rejected": -0.16796138882637024, |
|
"logps/chosen": -802.6090087890625, |
|
"logps/rejected": -1697.603759765625, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2379436492919922, |
|
"rewards/margins": 0.24783344566822052, |
|
"rewards/rejected": -0.48577699065208435, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.1458255052566528, |
|
"logits/rejected": -0.4321994185447693, |
|
"logps/chosen": -921.9117431640625, |
|
"logps/rejected": -1853.3466796875, |
|
"loss": 0.2603, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3034071922302246, |
|
"rewards/margins": 0.25969117879867554, |
|
"rewards/rejected": -0.5630983710289001, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -1.2709288597106934, |
|
"logits/rejected": -0.7817854881286621, |
|
"logps/chosen": -811.3619384765625, |
|
"logps/rejected": -1703.904541015625, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.21060828864574432, |
|
"rewards/margins": 0.20279932022094727, |
|
"rewards/rejected": -0.413407564163208, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -1.4565277099609375, |
|
"logits/rejected": -0.5677322149276733, |
|
"logps/chosen": -727.5758666992188, |
|
"logps/rejected": -1671.2864990234375, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2207428514957428, |
|
"rewards/margins": 0.21470816433429718, |
|
"rewards/rejected": -0.43545103073120117, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.2830668687820435, |
|
"logits/rejected": -0.31070059537887573, |
|
"logps/chosen": -957.83544921875, |
|
"logps/rejected": -1972.1334228515625, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3733420968055725, |
|
"rewards/margins": 0.27919524908065796, |
|
"rewards/rejected": -0.6525374054908752, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -1.1059176921844482, |
|
"logits/rejected": -0.29462116956710815, |
|
"logps/chosen": -703.9768676757812, |
|
"logps/rejected": -1613.99169921875, |
|
"loss": 0.3007, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.25690004229545593, |
|
"rewards/margins": 0.2521464228630066, |
|
"rewards/rejected": -0.5090464353561401, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -1.1736003160476685, |
|
"logits/rejected": -0.5167692303657532, |
|
"logps/chosen": -673.1292724609375, |
|
"logps/rejected": -1497.2391357421875, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.15533636510372162, |
|
"rewards/margins": 0.13328927755355835, |
|
"rewards/rejected": -0.2886256277561188, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -1.507161021232605, |
|
"logits/rejected": -0.1557072103023529, |
|
"logps/chosen": -770.8878784179688, |
|
"logps/rejected": -1511.53857421875, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17462053894996643, |
|
"rewards/margins": 0.15410876274108887, |
|
"rewards/rejected": -0.3287292718887329, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -1.4759384393692017, |
|
"logits/rejected": -0.15519209206104279, |
|
"logps/chosen": -887.9100341796875, |
|
"logps/rejected": -1733.956298828125, |
|
"loss": 0.2453, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2314099818468094, |
|
"rewards/margins": 0.17767982184886932, |
|
"rewards/rejected": -0.4090898036956787, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.5579626560211182, |
|
"logits/rejected": -0.15316779911518097, |
|
"logps/chosen": -958.0621337890625, |
|
"logps/rejected": -1928.722412109375, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2732312083244324, |
|
"rewards/margins": 0.27958157658576965, |
|
"rewards/rejected": -0.5528126955032349, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -1.1436049938201904, |
|
"logits/rejected": 0.03915581852197647, |
|
"logps/chosen": -968.3499145507812, |
|
"logps/rejected": -1884.228759765625, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2975374460220337, |
|
"rewards/margins": 0.2711247503757477, |
|
"rewards/rejected": -0.568662166595459, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -1.4685136079788208, |
|
"logits/rejected": -0.4126014709472656, |
|
"logps/chosen": -667.5554809570312, |
|
"logps/rejected": -1680.7935791015625, |
|
"loss": 0.1758, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16100670397281647, |
|
"rewards/margins": 0.24677987396717072, |
|
"rewards/rejected": -0.4077865481376648, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -1.1665582656860352, |
|
"logits/rejected": -0.47384729981422424, |
|
"logps/chosen": -922.4220581054688, |
|
"logps/rejected": -1947.9945068359375, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2523236870765686, |
|
"rewards/margins": 0.24001212418079376, |
|
"rewards/rejected": -0.49233585596084595, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.6133432388305664, |
|
"logits/rejected": -0.5420857071876526, |
|
"logps/chosen": -857.8092041015625, |
|
"logps/rejected": -1654.6234130859375, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2039283812046051, |
|
"rewards/margins": 0.23253926634788513, |
|
"rewards/rejected": -0.43646764755249023, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.2248852252960205, |
|
"logits/rejected": 0.04564919322729111, |
|
"logps/chosen": -836.74169921875, |
|
"logps/rejected": -1574.744140625, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.167385533452034, |
|
"rewards/margins": 0.18353143334388733, |
|
"rewards/rejected": -0.35091695189476013, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -1.4086463451385498, |
|
"logits/rejected": 0.30391791462898254, |
|
"logps/chosen": -697.5711669921875, |
|
"logps/rejected": -1787.910888671875, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12433981895446777, |
|
"rewards/margins": 0.2777388095855713, |
|
"rewards/rejected": -0.40207862854003906, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -1.4037072658538818, |
|
"logits/rejected": 0.20726795494556427, |
|
"logps/chosen": -785.6559448242188, |
|
"logps/rejected": -1549.1513671875, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1881810426712036, |
|
"rewards/margins": 0.21742673218250275, |
|
"rewards/rejected": -0.40560778975486755, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -1.345931887626648, |
|
"logits/rejected": -0.15625306963920593, |
|
"logps/chosen": -680.088623046875, |
|
"logps/rejected": -1532.015625, |
|
"loss": 0.2158, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1171560287475586, |
|
"rewards/margins": 0.1917530596256256, |
|
"rewards/rejected": -0.3089090883731842, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -1.5668309926986694, |
|
"logits/rejected": -0.32349592447280884, |
|
"logps/chosen": -740.4640502929688, |
|
"logps/rejected": -1647.676513671875, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0793481171131134, |
|
"rewards/margins": 0.25623226165771484, |
|
"rewards/rejected": -0.335580438375473, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.4235851764678955, |
|
"logits/rejected": -0.6677430868148804, |
|
"logps/chosen": -766.1461791992188, |
|
"logps/rejected": -1581.3609619140625, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17254561185836792, |
|
"rewards/margins": 0.22318947315216064, |
|
"rewards/rejected": -0.39573508501052856, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -1.3767132759094238, |
|
"logits/rejected": -0.3363776206970215, |
|
"logps/chosen": -967.8243408203125, |
|
"logps/rejected": -1914.7396240234375, |
|
"loss": 0.1578, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2716659605503082, |
|
"rewards/margins": 0.3169645071029663, |
|
"rewards/rejected": -0.5886305570602417, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -1.7191673517227173, |
|
"logits/rejected": 0.0708886906504631, |
|
"logps/chosen": -1019.0963134765625, |
|
"logps/rejected": -1888.802001953125, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.31889739632606506, |
|
"rewards/margins": 0.31718775629997253, |
|
"rewards/rejected": -0.6360851526260376, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.2595525979995728, |
|
"logits/rejected": -0.7953510880470276, |
|
"logps/chosen": -697.2740478515625, |
|
"logps/rejected": -1584.4857177734375, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15896865725517273, |
|
"rewards/margins": 0.23230549693107605, |
|
"rewards/rejected": -0.3912741243839264, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -1.3439395427703857, |
|
"logits/rejected": -0.4132903516292572, |
|
"logps/chosen": -766.2467651367188, |
|
"logps/rejected": -1648.1083984375, |
|
"loss": 1.9196, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.23903754353523254, |
|
"rewards/margins": 0.41279107332229614, |
|
"rewards/rejected": -0.6518285870552063, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.3772680759429932, |
|
"logits/rejected": -0.3717087209224701, |
|
"logps/chosen": -645.0536499023438, |
|
"logps/rejected": -1581.9832763671875, |
|
"loss": 0.2452, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15307392179965973, |
|
"rewards/margins": 0.14587204158306122, |
|
"rewards/rejected": -0.29894596338272095, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.4199841022491455, |
|
"logits/rejected": -0.7620482444763184, |
|
"logps/chosen": -858.2607421875, |
|
"logps/rejected": -1545.4052734375, |
|
"loss": 0.3538, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14831410348415375, |
|
"rewards/margins": 0.06550173461437225, |
|
"rewards/rejected": -0.213815838098526, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -1.6882193088531494, |
|
"logits/rejected": -0.8316219449043274, |
|
"logps/chosen": -846.7376098632812, |
|
"logps/rejected": -1478.4105224609375, |
|
"loss": 0.3138, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12148912250995636, |
|
"rewards/margins": 0.06598127633333206, |
|
"rewards/rejected": -0.18747039139270782, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999972922944898e-06, |
|
"logits/chosen": -1.6390502452850342, |
|
"logits/rejected": -1.078680396080017, |
|
"logps/chosen": -709.0716552734375, |
|
"logps/rejected": -1625.7291259765625, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06936102360486984, |
|
"rewards/margins": 0.168907031416893, |
|
"rewards/rejected": -0.23826804757118225, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -1.5627597570419312, |
|
"logits/rejected": -0.6666626334190369, |
|
"logps/chosen": -657.7476806640625, |
|
"logps/rejected": -1387.281494140625, |
|
"loss": 0.239, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.015483086928725243, |
|
"rewards/margins": 0.20804651081562042, |
|
"rewards/rejected": -0.2235296070575714, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -1.4536590576171875, |
|
"logits/rejected": -0.26167917251586914, |
|
"logps/chosen": -623.3753051757812, |
|
"logps/rejected": -1424.2047119140625, |
|
"loss": 0.249, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05082948878407478, |
|
"rewards/margins": 0.19376987218856812, |
|
"rewards/rejected": -0.2445993721485138, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998673339256785e-06, |
|
"logits/chosen": -1.4819873571395874, |
|
"logits/rejected": -0.8635656237602234, |
|
"logps/chosen": -731.3282470703125, |
|
"logps/rejected": -1732.039794921875, |
|
"loss": 0.2004, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10082012414932251, |
|
"rewards/margins": 0.21832840144634247, |
|
"rewards/rejected": -0.31914854049682617, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -1.3139228820800781, |
|
"logits/rejected": -0.579411506652832, |
|
"logps/chosen": -703.5878295898438, |
|
"logps/rejected": -1817.680908203125, |
|
"loss": 0.1782, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12780095636844635, |
|
"rewards/margins": 0.3425312042236328, |
|
"rewards/rejected": -0.47033214569091797, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.996724385978142e-06, |
|
"logits/chosen": -1.5831680297851562, |
|
"logits/rejected": -0.5023525953292847, |
|
"logps/chosen": -746.4442138671875, |
|
"logps/rejected": -1795.2135009765625, |
|
"loss": 0.1981, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12009434401988983, |
|
"rewards/margins": 0.3284095525741577, |
|
"rewards/rejected": -0.44850391149520874, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995425365260585e-06, |
|
"logits/chosen": -1.4668796062469482, |
|
"logits/rejected": -0.6527413129806519, |
|
"logps/chosen": -602.0538330078125, |
|
"logps/rejected": -1504.60400390625, |
|
"loss": 0.1676, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08191387355327606, |
|
"rewards/margins": 0.22698941826820374, |
|
"rewards/rejected": -0.3089032769203186, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.5859527587890625, |
|
"logits/rejected": -0.9323417544364929, |
|
"logps/chosen": -573.4935302734375, |
|
"logps/rejected": -1426.5198974609375, |
|
"loss": 0.1951, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07085999846458435, |
|
"rewards/margins": 0.21237850189208984, |
|
"rewards/rejected": -0.2832385003566742, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992178798434684e-06, |
|
"logits/chosen": -1.5028969049453735, |
|
"logits/rejected": -0.5813714265823364, |
|
"logps/chosen": -898.3291015625, |
|
"logps/rejected": -1658.8726806640625, |
|
"loss": 0.2782, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1786281317472458, |
|
"rewards/margins": 0.19002899527549744, |
|
"rewards/rejected": -0.36865711212158203, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990231533628719e-06, |
|
"logits/chosen": -1.6499645709991455, |
|
"logits/rejected": -0.9359930753707886, |
|
"logps/chosen": -693.5921630859375, |
|
"logps/rejected": -1703.711181640625, |
|
"loss": 0.2171, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13478896021842957, |
|
"rewards/margins": 0.2760683298110962, |
|
"rewards/rejected": -0.41085729002952576, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -1.5587965250015259, |
|
"logits/rejected": -0.4612099528312683, |
|
"logps/chosen": -695.7940673828125, |
|
"logps/rejected": -1690.544189453125, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1203017458319664, |
|
"rewards/margins": 0.3253653943538666, |
|
"rewards/rejected": -0.4456671178340912, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985689884830711e-06, |
|
"logits/chosen": -1.5129863023757935, |
|
"logits/rejected": -0.6448124647140503, |
|
"logps/chosen": -740.9840698242188, |
|
"logps/rejected": -1641.5972900390625, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09521947801113129, |
|
"rewards/margins": 0.17064201831817627, |
|
"rewards/rejected": -0.26586148142814636, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.5538814067840576, |
|
"logits/rejected": -0.5965372323989868, |
|
"logps/chosen": -763.7830200195312, |
|
"logps/rejected": -1575.0704345703125, |
|
"loss": 0.2584, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10452375560998917, |
|
"rewards/margins": 0.13509635627269745, |
|
"rewards/rejected": -0.2396201193332672, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.489684820175171, |
|
"logits/rejected": -0.7012004852294922, |
|
"logps/chosen": -673.0375366210938, |
|
"logps/rejected": -1428.3021240234375, |
|
"loss": 0.202, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11526355892419815, |
|
"rewards/margins": 0.1490117609500885, |
|
"rewards/rejected": -0.26427531242370605, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97726270502586e-06, |
|
"logits/chosen": -1.4848878383636475, |
|
"logits/rejected": -0.7628189921379089, |
|
"logps/chosen": -794.6495971679688, |
|
"logps/rejected": -1725.61328125, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17633184790611267, |
|
"rewards/margins": 0.21604189276695251, |
|
"rewards/rejected": -0.3923737406730652, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974024011595864e-06, |
|
"logits/chosen": -1.5632755756378174, |
|
"logits/rejected": -0.7222310304641724, |
|
"logps/chosen": -932.9734497070312, |
|
"logps/rejected": -1767.3134765625, |
|
"loss": 0.2952, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20238515734672546, |
|
"rewards/margins": 0.24491646885871887, |
|
"rewards/rejected": -0.44730162620544434, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.4104833602905273, |
|
"logits/rejected": -0.48003751039505005, |
|
"logps/chosen": -668.6126708984375, |
|
"logps/rejected": -1662.20703125, |
|
"loss": 0.2237, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11609913408756256, |
|
"rewards/margins": 0.2550623416900635, |
|
"rewards/rejected": -0.37116146087646484, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -1.7994705438613892, |
|
"logits/rejected": -0.39116740226745605, |
|
"logps/chosen": -684.5289916992188, |
|
"logps/rejected": -1445.06298828125, |
|
"loss": 0.2365, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1520206481218338, |
|
"rewards/margins": 0.17612138390541077, |
|
"rewards/rejected": -0.32814204692840576, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9630229593330226e-06, |
|
"logits/chosen": -1.5381734371185303, |
|
"logits/rejected": -0.836050808429718, |
|
"logps/chosen": -840.8229370117188, |
|
"logps/rejected": -1694.432861328125, |
|
"loss": 0.2301, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18990156054496765, |
|
"rewards/margins": 0.2644536793231964, |
|
"rewards/rejected": -0.4543551504611969, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.6244480609893799, |
|
"logits/rejected": -0.5760088562965393, |
|
"logps/chosen": -861.2122192382812, |
|
"logps/rejected": -1686.744873046875, |
|
"loss": 0.2336, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20879682898521423, |
|
"rewards/margins": 0.24533554911613464, |
|
"rewards/rejected": -0.4541323781013489, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954621338136399e-06, |
|
"logits/chosen": -1.486128807067871, |
|
"logits/rejected": -0.6559261083602905, |
|
"logps/chosen": -924.2649536132812, |
|
"logps/rejected": -1736.8707275390625, |
|
"loss": 0.2448, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1695316731929779, |
|
"rewards/margins": 0.23115842044353485, |
|
"rewards/rejected": -0.40069007873535156, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.95010131585597e-06, |
|
"logits/chosen": -1.217822790145874, |
|
"logits/rejected": -0.44435954093933105, |
|
"logps/chosen": -779.5258178710938, |
|
"logps/rejected": -1578.96240234375, |
|
"loss": 0.2707, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09426208585500717, |
|
"rewards/margins": 0.23891735076904297, |
|
"rewards/rejected": -0.33317944407463074, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.4555373191833496, |
|
"logits/rejected": -0.5545033812522888, |
|
"logps/chosen": -509.085693359375, |
|
"logps/rejected": -1402.9798583984375, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0637204647064209, |
|
"rewards/margins": 0.23001065850257874, |
|
"rewards/rejected": -0.29373109340667725, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.940424806108619e-06, |
|
"logits/chosen": -1.6595865488052368, |
|
"logits/rejected": -0.6369374990463257, |
|
"logps/chosen": -777.7699584960938, |
|
"logps/rejected": -1567.2138671875, |
|
"loss": 0.1794, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08186454325914383, |
|
"rewards/margins": 0.22620078921318054, |
|
"rewards/rejected": -0.30806535482406616, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935269157073597e-06, |
|
"logits/chosen": -1.598024606704712, |
|
"logits/rejected": -1.1374785900115967, |
|
"logps/chosen": -596.6869506835938, |
|
"logps/rejected": -1656.125244140625, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05304652452468872, |
|
"rewards/margins": 0.3274363577365875, |
|
"rewards/rejected": -0.38048288226127625, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.399924635887146, |
|
"logits/rejected": -0.20592813193798065, |
|
"logps/chosen": -607.9566040039062, |
|
"logps/rejected": -1577.6639404296875, |
|
"loss": 0.183, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07505299150943756, |
|
"rewards/margins": 0.2847192883491516, |
|
"rewards/rejected": -0.359772264957428, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924325304226745e-06, |
|
"logits/chosen": -1.6225976943969727, |
|
"logits/rejected": -0.5168310403823853, |
|
"logps/chosen": -793.9441528320312, |
|
"logps/rejected": -1665.4193115234375, |
|
"loss": 0.1601, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09130421280860901, |
|
"rewards/margins": 0.2901184856891632, |
|
"rewards/rejected": -0.3814226984977722, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -1.0889428853988647, |
|
"logits/rejected": -0.419971764087677, |
|
"logps/chosen": -714.290771484375, |
|
"logps/rejected": -1576.413330078125, |
|
"loss": 0.1755, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1179242953658104, |
|
"rewards/margins": 0.2568149268627167, |
|
"rewards/rejected": -0.37473922967910767, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -1.3426125049591064, |
|
"logits/rejected": -0.6203486919403076, |
|
"logps/chosen": -884.5665283203125, |
|
"logps/rejected": -1730.358154296875, |
|
"loss": 0.2716, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2452804297208786, |
|
"rewards/margins": 0.20476552844047546, |
|
"rewards/rejected": -0.4500458836555481, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9063353863980565e-06, |
|
"logits/chosen": -1.3583014011383057, |
|
"logits/rejected": -0.6006292104721069, |
|
"logps/chosen": -807.7757568359375, |
|
"logps/rejected": -1728.714599609375, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1912967413663864, |
|
"rewards/margins": 0.28994446992874146, |
|
"rewards/rejected": -0.48124128580093384, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899921037021719e-06, |
|
"logits/chosen": -1.689822793006897, |
|
"logits/rejected": -0.9633957147598267, |
|
"logps/chosen": -701.271240234375, |
|
"logps/rejected": -1522.19921875, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1669953167438507, |
|
"rewards/margins": 0.24159979820251465, |
|
"rewards/rejected": -0.40859508514404297, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.3598110675811768, |
|
"logits/rejected": -0.3959788382053375, |
|
"logps/chosen": -761.9188842773438, |
|
"logps/rejected": -1724.9876708984375, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.19954383373260498, |
|
"rewards/margins": 0.2616916596889496, |
|
"rewards/rejected": -0.46123552322387695, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.4325469732284546, |
|
"logits/rejected": -0.38342922925949097, |
|
"logps/chosen": -740.5088500976562, |
|
"logps/rejected": -1678.9232177734375, |
|
"loss": 0.2414, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18760952353477478, |
|
"rewards/margins": 0.2778058648109436, |
|
"rewards/rejected": -0.46541541814804077, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879432639152935e-06, |
|
"logits/chosen": -1.360878586769104, |
|
"logits/rejected": -0.29857271909713745, |
|
"logps/chosen": -834.4906005859375, |
|
"logps/rejected": -1705.520263671875, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11962740123271942, |
|
"rewards/margins": 0.23699507117271423, |
|
"rewards/rejected": -0.35662245750427246, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -1.567964792251587, |
|
"logits/rejected": -0.6299314498901367, |
|
"logps/chosen": -786.2874755859375, |
|
"logps/rejected": -1548.3878173828125, |
|
"loss": 0.216, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11282478272914886, |
|
"rewards/margins": 0.20971831679344177, |
|
"rewards/rejected": -0.32254308462142944, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.864741878038218e-06, |
|
"logits/chosen": -1.4125442504882812, |
|
"logits/rejected": -0.6314720511436462, |
|
"logps/chosen": -745.260498046875, |
|
"logps/rejected": -1698.3111572265625, |
|
"loss": 0.1798, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12032542377710342, |
|
"rewards/margins": 0.2931358516216278, |
|
"rewards/rejected": -0.413461297750473, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857088831287158e-06, |
|
"logits/chosen": -1.5918242931365967, |
|
"logits/rejected": -0.18947651982307434, |
|
"logps/chosen": -734.33984375, |
|
"logps/rejected": -1762.8245849609375, |
|
"loss": 0.1658, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1603078991174698, |
|
"rewards/margins": 0.3126041293144226, |
|
"rewards/rejected": -0.4729120135307312, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.3384909629821777, |
|
"logits/rejected": -0.19086980819702148, |
|
"logps/chosen": -781.5340576171875, |
|
"logps/rejected": -1614.2730712890625, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16732366383075714, |
|
"rewards/margins": 0.21859999001026154, |
|
"rewards/rejected": -0.38592368364334106, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841170720873723e-06, |
|
"logits/chosen": -1.4520353078842163, |
|
"logits/rejected": -0.30854731798171997, |
|
"logps/chosen": -704.5045776367188, |
|
"logps/rejected": -1557.046875, |
|
"loss": 0.2166, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14268702268600464, |
|
"rewards/margins": 0.24501653015613556, |
|
"rewards/rejected": -0.38770347833633423, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832907036453647e-06, |
|
"logits/chosen": -1.4341394901275635, |
|
"logits/rejected": -0.39417794346809387, |
|
"logps/chosen": -831.6419677734375, |
|
"logps/rejected": -1733.0474853515625, |
|
"loss": 0.1779, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14101889729499817, |
|
"rewards/margins": 0.255978524684906, |
|
"rewards/rejected": -0.39699748158454895, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.2338621616363525, |
|
"logits/rejected": -0.33901381492614746, |
|
"logps/chosen": -600.5255737304688, |
|
"logps/rejected": -1670.0986328125, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11527079343795776, |
|
"rewards/margins": 0.3371114134788513, |
|
"rewards/rejected": -0.45238223671913147, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.815773989205165e-06, |
|
"logits/chosen": -1.5467897653579712, |
|
"logits/rejected": -0.7028628587722778, |
|
"logps/chosen": -760.3792114257812, |
|
"logps/rejected": -1956.612548828125, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16627143323421478, |
|
"rewards/margins": 0.44062843918800354, |
|
"rewards/rejected": -0.6068998575210571, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.4377644062042236, |
|
"logits/rejected": -0.6477410197257996, |
|
"logps/chosen": -701.1937255859375, |
|
"logps/rejected": -1622.2232666015625, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14753028750419617, |
|
"rewards/margins": 0.2542650103569031, |
|
"rewards/rejected": -0.40179523825645447, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.40022873878479, |
|
"logits/rejected": -0.5014703869819641, |
|
"logps/chosen": -643.99609375, |
|
"logps/rejected": -1408.9022216796875, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1288548707962036, |
|
"rewards/margins": 0.18380561470985413, |
|
"rewards/rejected": -0.31266045570373535, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788571486639948e-06, |
|
"logits/chosen": -1.3129093647003174, |
|
"logits/rejected": -0.592149019241333, |
|
"logps/chosen": -842.0194091796875, |
|
"logps/rejected": -1960.482177734375, |
|
"loss": 0.1644, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14255237579345703, |
|
"rewards/margins": 0.33296501636505127, |
|
"rewards/rejected": -0.4755174219608307, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779106329331665e-06, |
|
"logits/chosen": -1.5025089979171753, |
|
"logits/rejected": -0.3978004455566406, |
|
"logps/chosen": -727.6754150390625, |
|
"logps/rejected": -1607.7178955078125, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14143696427345276, |
|
"rewards/margins": 0.26949542760849, |
|
"rewards/rejected": -0.41093239188194275, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.2928297519683838, |
|
"logits/rejected": -0.20549149811267853, |
|
"logps/chosen": -977.98095703125, |
|
"logps/rejected": -2075.953857421875, |
|
"loss": 0.1975, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2571466565132141, |
|
"rewards/margins": 0.3657899498939514, |
|
"rewards/rejected": -0.6229366064071655, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.2061738967895508, |
|
"logits/rejected": -0.282677561044693, |
|
"logps/chosen": -832.7267456054688, |
|
"logps/rejected": -1914.269775390625, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23771587014198303, |
|
"rewards/margins": 0.34755539894104004, |
|
"rewards/rejected": -0.5852713584899902, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.749529369216246e-06, |
|
"logits/chosen": -1.4981663227081299, |
|
"logits/rejected": -0.8207576870918274, |
|
"logps/chosen": -818.4663696289062, |
|
"logps/rejected": -1752.6500244140625, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19876405596733093, |
|
"rewards/margins": 0.27873286604881287, |
|
"rewards/rejected": -0.4774969518184662, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.420318841934204, |
|
"logits/rejected": -0.6080266237258911, |
|
"logps/chosen": -673.9572143554688, |
|
"logps/rejected": -1555.6048583984375, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1334313452243805, |
|
"rewards/margins": 0.25675299763679504, |
|
"rewards/rejected": -0.39018434286117554, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7288354071380415e-06, |
|
"logits/chosen": -1.2416133880615234, |
|
"logits/rejected": 0.2938464283943176, |
|
"logps/chosen": -641.5850830078125, |
|
"logps/rejected": -1431.835205078125, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1211976408958435, |
|
"rewards/margins": 0.21377813816070557, |
|
"rewards/rejected": -0.3349757790565491, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7181982937661485e-06, |
|
"logits/chosen": -1.2874345779418945, |
|
"logits/rejected": -0.42665576934814453, |
|
"logps/chosen": -844.7786254882812, |
|
"logps/rejected": -1806.428955078125, |
|
"loss": 0.1483, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2129203826189041, |
|
"rewards/margins": 0.2927423417568207, |
|
"rewards/rejected": -0.505662739276886, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.3794301748275757, |
|
"logits/rejected": -0.777490496635437, |
|
"logps/chosen": -880.0616455078125, |
|
"logps/rejected": -1896.2252197265625, |
|
"loss": 0.1484, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24488544464111328, |
|
"rewards/margins": 0.32845309376716614, |
|
"rewards/rejected": -0.573338508605957, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696348410599244e-06, |
|
"logits/chosen": -1.3112980127334595, |
|
"logits/rejected": -0.8322477340698242, |
|
"logps/chosen": -693.696044921875, |
|
"logps/rejected": -1638.334716796875, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21305444836616516, |
|
"rewards/margins": 0.29524970054626465, |
|
"rewards/rejected": -0.5083041787147522, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.685137534011549e-06, |
|
"logits/chosen": -1.5619451999664307, |
|
"logits/rejected": -0.2924344539642334, |
|
"logps/chosen": -848.5318603515625, |
|
"logps/rejected": -1702.1826171875, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2551230788230896, |
|
"rewards/margins": 0.2683793008327484, |
|
"rewards/rejected": -0.5235023498535156, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.2145551443099976, |
|
"logits/rejected": -0.5193670392036438, |
|
"logps/chosen": -665.93017578125, |
|
"logps/rejected": -1560.180419921875, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16463886201381683, |
|
"rewards/margins": 0.2519565224647522, |
|
"rewards/rejected": -0.41659536957740784, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662148767637578e-06, |
|
"logits/chosen": -1.2429286241531372, |
|
"logits/rejected": -0.5839636325836182, |
|
"logps/chosen": -835.6803588867188, |
|
"logps/rejected": -1747.1480712890625, |
|
"loss": 0.2201, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18446584045886993, |
|
"rewards/margins": 0.2667251229286194, |
|
"rewards/rejected": -0.4511910080909729, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.551847219467163, |
|
"logits/rejected": -0.7213087677955627, |
|
"logps/chosen": -739.1425170898438, |
|
"logps/rejected": -1562.636962890625, |
|
"loss": 0.2208, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10864777863025665, |
|
"rewards/margins": 0.25965723395347595, |
|
"rewards/rejected": -0.3683050274848938, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.327823281288147, |
|
"logits/rejected": -0.146169975399971, |
|
"logps/chosen": -627.8694458007812, |
|
"logps/rejected": -1442.491455078125, |
|
"loss": 0.2464, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09383858740329742, |
|
"rewards/margins": 0.18377116322517395, |
|
"rewards/rejected": -0.27760976552963257, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626263146105875e-06, |
|
"logits/chosen": -1.613373041152954, |
|
"logits/rejected": -0.5505753755569458, |
|
"logps/chosen": -789.1390380859375, |
|
"logps/rejected": -1607.309326171875, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1650734841823578, |
|
"rewards/margins": 0.2032967060804367, |
|
"rewards/rejected": -0.3683702051639557, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613931409386196e-06, |
|
"logits/chosen": -1.406640648841858, |
|
"logits/rejected": -0.3088572323322296, |
|
"logps/chosen": -764.7514038085938, |
|
"logps/rejected": -1577.6644287109375, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15214017033576965, |
|
"rewards/margins": 0.2213418185710907, |
|
"rewards/rejected": -0.37348201870918274, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.4507434368133545, |
|
"logits/rejected": -0.6369355916976929, |
|
"logps/chosen": -740.4591674804688, |
|
"logps/rejected": -1661.3743896484375, |
|
"loss": 0.1741, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13703958690166473, |
|
"rewards/margins": 0.26543471217155457, |
|
"rewards/rejected": -0.4024743139743805, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.3239177465438843, |
|
"logits/rejected": -0.3033704161643982, |
|
"logps/chosen": -753.6892700195312, |
|
"logps/rejected": -1676.83984375, |
|
"loss": 0.209, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12344950437545776, |
|
"rewards/margins": 0.274679034948349, |
|
"rewards/rejected": -0.3981285095214844, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.575841568909494e-06, |
|
"logits/chosen": -1.6556479930877686, |
|
"logits/rejected": -0.39285674691200256, |
|
"logps/chosen": -722.4796142578125, |
|
"logps/rejected": -1681.2886962890625, |
|
"loss": 0.1905, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09982794523239136, |
|
"rewards/margins": 0.2901255488395691, |
|
"rewards/rejected": -0.38995346426963806, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.4957317113876343, |
|
"logits/rejected": 0.11952666193246841, |
|
"logps/chosen": -714.2357177734375, |
|
"logps/rejected": -1432.6864013671875, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09190934896469116, |
|
"rewards/margins": 0.20793361961841583, |
|
"rewards/rejected": -0.2998429834842682, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549547190300622e-06, |
|
"logits/chosen": -1.2969706058502197, |
|
"logits/rejected": -0.45746153593063354, |
|
"logps/chosen": -787.8223266601562, |
|
"logps/rejected": -1744.7171630859375, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.161489337682724, |
|
"rewards/margins": 0.31587809324264526, |
|
"rewards/rejected": -0.4773674011230469, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536133049620143e-06, |
|
"logits/chosen": -1.13800048828125, |
|
"logits/rejected": -0.3455226421356201, |
|
"logps/chosen": -721.7028198242188, |
|
"logps/rejected": -1831.574951171875, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12353639304637909, |
|
"rewards/margins": 0.3737573027610779, |
|
"rewards/rejected": -0.49729371070861816, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.4609827995300293, |
|
"logits/rejected": -0.496757835149765, |
|
"logps/chosen": -781.8280029296875, |
|
"logps/rejected": -1767.3248291015625, |
|
"loss": 0.1728, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15113988518714905, |
|
"rewards/margins": 0.3218175768852234, |
|
"rewards/rejected": -0.4729575216770172, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508776676821739e-06, |
|
"logits/chosen": -1.4414708614349365, |
|
"logits/rejected": -0.6857733726501465, |
|
"logps/chosen": -832.9376831054688, |
|
"logps/rejected": -1553.129150390625, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17810413241386414, |
|
"rewards/margins": 0.21977660059928894, |
|
"rewards/rejected": -0.3978807330131531, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.494836815027022e-06, |
|
"logits/chosen": -1.3217737674713135, |
|
"logits/rejected": 0.03468703106045723, |
|
"logps/chosen": -752.7827758789062, |
|
"logps/rejected": -1703.7191162109375, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14807240664958954, |
|
"rewards/margins": 0.2589189410209656, |
|
"rewards/rejected": -0.4069913923740387, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.3331490755081177, |
|
"logits/rejected": -0.2640678286552429, |
|
"logps/chosen": -728.1994018554688, |
|
"logps/rejected": -1618.332275390625, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1415979564189911, |
|
"rewards/margins": 0.31241506338119507, |
|
"rewards/rejected": -0.4540129601955414, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.466439779715696e-06, |
|
"logits/chosen": -1.320003867149353, |
|
"logits/rejected": -0.5086170434951782, |
|
"logps/chosen": -710.85009765625, |
|
"logps/rejected": -1577.6478271484375, |
|
"loss": 0.1914, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09440721571445465, |
|
"rewards/margins": 0.2844494879245758, |
|
"rewards/rejected": -0.37885671854019165, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.3163551092147827, |
|
"logits/rejected": -0.6709269285202026, |
|
"logps/chosen": -674.307373046875, |
|
"logps/rejected": -1554.60498046875, |
|
"loss": 0.1749, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10322503000497818, |
|
"rewards/margins": 0.28593048453330994, |
|
"rewards/rejected": -0.3891555070877075, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.5619227886199951, |
|
"logits/rejected": -0.5209615230560303, |
|
"logps/chosen": -693.013916015625, |
|
"logps/rejected": -1884.20703125, |
|
"loss": 0.1298, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.11155110597610474, |
|
"rewards/margins": 0.3766392171382904, |
|
"rewards/rejected": -0.48819035291671753, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.422569512021332e-06, |
|
"logits/chosen": -1.4108737707138062, |
|
"logits/rejected": 0.11136605590581894, |
|
"logps/chosen": -703.5150146484375, |
|
"logps/rejected": -1624.7801513671875, |
|
"loss": 0.2085, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10121216624975204, |
|
"rewards/margins": 0.26922935247421265, |
|
"rewards/rejected": -0.3704415559768677, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.407611219118363e-06, |
|
"logits/chosen": -1.353237509727478, |
|
"logits/rejected": -0.5166940093040466, |
|
"logps/chosen": -706.7504272460938, |
|
"logps/rejected": -1707.227294921875, |
|
"loss": 0.1646, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.144940584897995, |
|
"rewards/margins": 0.2921007573604584, |
|
"rewards/rejected": -0.43704134225845337, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.3774515390396118, |
|
"logits/rejected": -0.8835982084274292, |
|
"logps/chosen": -653.8270263671875, |
|
"logps/rejected": -1665.271728515625, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11839635670185089, |
|
"rewards/margins": 0.2974711060523987, |
|
"rewards/rejected": -0.41586747765541077, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.3856669664382935, |
|
"logits/rejected": -0.6063768863677979, |
|
"logps/chosen": -692.400390625, |
|
"logps/rejected": -1679.411865234375, |
|
"loss": 0.2769, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12366262823343277, |
|
"rewards/margins": 0.3368341326713562, |
|
"rewards/rejected": -0.4604967534542084, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.361749873698707e-06, |
|
"logits/chosen": -1.4405038356781006, |
|
"logits/rejected": -0.6485291123390198, |
|
"logps/chosen": -696.2839965820312, |
|
"logps/rejected": -1726.019775390625, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08081836253404617, |
|
"rewards/margins": 0.3042358160018921, |
|
"rewards/rejected": -0.38505417108535767, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.443703055381775, |
|
"logits/rejected": -0.754281759262085, |
|
"logps/chosen": -663.7955932617188, |
|
"logps/rejected": -1472.655029296875, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05589227005839348, |
|
"rewards/margins": 0.21428577601909637, |
|
"rewards/rejected": -0.27017804980278015, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330366868729376e-06, |
|
"logits/chosen": -1.3844349384307861, |
|
"logits/rejected": -0.6004733443260193, |
|
"logps/chosen": -717.2481689453125, |
|
"logps/rejected": -1639.61328125, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09332798421382904, |
|
"rewards/margins": 0.2749372124671936, |
|
"rewards/rejected": -0.36826521158218384, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3144367917302964e-06, |
|
"logits/chosen": -1.3772212266921997, |
|
"logits/rejected": -1.007070779800415, |
|
"logps/chosen": -626.3247680664062, |
|
"logps/rejected": -1674.417724609375, |
|
"loss": 0.1665, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09171278029680252, |
|
"rewards/margins": 0.32611554861068726, |
|
"rewards/rejected": -0.4178283214569092, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.3759549856185913, |
|
"logits/rejected": -0.7071571350097656, |
|
"logps/chosen": -691.9136962890625, |
|
"logps/rejected": -1873.8238525390625, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13154932856559753, |
|
"rewards/margins": 0.37329337000846863, |
|
"rewards/rejected": -0.5048426389694214, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2821063899795015e-06, |
|
"logits/chosen": -1.50946044921875, |
|
"logits/rejected": -0.23106630146503448, |
|
"logps/chosen": -669.9387817382812, |
|
"logps/rejected": -1659.885986328125, |
|
"loss": 0.1434, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07889886945486069, |
|
"rewards/margins": 0.3387802839279175, |
|
"rewards/rejected": -0.4176791310310364, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.265708866531238e-06, |
|
"logits/chosen": -1.5640531778335571, |
|
"logits/rejected": -0.577285885810852, |
|
"logps/chosen": -672.6451416015625, |
|
"logps/rejected": -1411.490966796875, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0744541734457016, |
|
"rewards/margins": 0.2126346081495285, |
|
"rewards/rejected": -0.2870888113975525, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.3422248363494873, |
|
"logits/rejected": -0.3964029848575592, |
|
"logps/chosen": -660.489990234375, |
|
"logps/rejected": -1654.5406494140625, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.10021636635065079, |
|
"rewards/margins": 0.22853437066078186, |
|
"rewards/rejected": -0.32875072956085205, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.232456278273743e-06, |
|
"logits/chosen": -1.4052772521972656, |
|
"logits/rejected": -1.0742384195327759, |
|
"logps/chosen": -727.620849609375, |
|
"logps/rejected": -1368.65283203125, |
|
"loss": 0.2202, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13287192583084106, |
|
"rewards/margins": 0.17202439904212952, |
|
"rewards/rejected": -0.3048963248729706, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.5828149318695068, |
|
"logits/rejected": -0.8305169939994812, |
|
"logps/chosen": -674.4658813476562, |
|
"logps/rejected": -1537.18212890625, |
|
"loss": 0.269, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13081403076648712, |
|
"rewards/margins": 0.2305031269788742, |
|
"rewards/rejected": -0.3613171875476837, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.4573118686676025, |
|
"logits/rejected": -0.01602686010301113, |
|
"logps/chosen": -745.9620361328125, |
|
"logps/rejected": -1567.2252197265625, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08418073505163193, |
|
"rewards/margins": 0.2803150415420532, |
|
"rewards/rejected": -0.36449578404426575, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.181455249275701e-06, |
|
"logits/chosen": -1.1945483684539795, |
|
"logits/rejected": -0.8259621858596802, |
|
"logps/chosen": -676.2755126953125, |
|
"logps/rejected": -1655.5445556640625, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09749835729598999, |
|
"rewards/margins": 0.27395281195640564, |
|
"rewards/rejected": -0.371451199054718, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1641615463459926e-06, |
|
"logits/chosen": -1.4833742380142212, |
|
"logits/rejected": -0.5753664970397949, |
|
"logps/chosen": -676.084716796875, |
|
"logps/rejected": -1957.914794921875, |
|
"loss": 0.1464, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.045958876609802246, |
|
"rewards/margins": 0.3767011761665344, |
|
"rewards/rejected": -0.42266005277633667, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.5337693691253662, |
|
"logits/rejected": -0.5462034344673157, |
|
"logps/chosen": -618.5125732421875, |
|
"logps/rejected": -1453.6705322265625, |
|
"loss": 0.1498, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06487244367599487, |
|
"rewards/margins": 0.21656076610088348, |
|
"rewards/rejected": -0.28143322467803955, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.6736488342285156, |
|
"logits/rejected": -0.11961223185062408, |
|
"logps/chosen": -786.7601318359375, |
|
"logps/rejected": -1709.4732666015625, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08835209906101227, |
|
"rewards/margins": 0.2948789596557617, |
|
"rewards/rejected": -0.3832310736179352, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111421334905468e-06, |
|
"logits/chosen": -1.440685510635376, |
|
"logits/rejected": -0.4499839246273041, |
|
"logps/chosen": -866.4661254882812, |
|
"logps/rejected": -1788.9306640625, |
|
"loss": 0.2314, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12372098863124847, |
|
"rewards/margins": 0.28645357489585876, |
|
"rewards/rejected": -0.41017454862594604, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.2748010158538818, |
|
"logits/rejected": -0.5881192684173584, |
|
"logps/chosen": -938.8273315429688, |
|
"logps/rejected": -1917.021728515625, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16988976299762726, |
|
"rewards/margins": 0.32855165004730225, |
|
"rewards/rejected": -0.4984413981437683, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.075560538069767e-06, |
|
"logits/chosen": -1.4327054023742676, |
|
"logits/rejected": -0.4992315173149109, |
|
"logps/chosen": -743.90234375, |
|
"logps/rejected": -1680.1441650390625, |
|
"loss": 0.1669, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09745943546295166, |
|
"rewards/margins": 0.39398160576820374, |
|
"rewards/rejected": -0.4914410710334778, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05742458558068e-06, |
|
"logits/chosen": -1.5205237865447998, |
|
"logits/rejected": -0.47205105423927307, |
|
"logps/chosen": -656.0806884765625, |
|
"logps/rejected": -1458.2177734375, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08391676843166351, |
|
"rewards/margins": 0.2383715659379959, |
|
"rewards/rejected": -0.32228830456733704, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.5271790027618408, |
|
"logits/rejected": -0.13848623633384705, |
|
"logps/chosen": -667.71533203125, |
|
"logps/rejected": -1672.637939453125, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.04903538152575493, |
|
"rewards/margins": 0.32115495204925537, |
|
"rewards/rejected": -0.3701903223991394, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020749429372286e-06, |
|
"logits/chosen": -1.455418348312378, |
|
"logits/rejected": -0.3692135810852051, |
|
"logps/chosen": -778.6569213867188, |
|
"logps/rejected": -1898.542236328125, |
|
"loss": 0.1951, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10191953182220459, |
|
"rewards/margins": 0.3559776544570923, |
|
"rewards/rejected": -0.45789724588394165, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.002213403412492e-06, |
|
"logits/chosen": -1.3963444232940674, |
|
"logits/rejected": -0.1295473426580429, |
|
"logps/chosen": -613.0379028320312, |
|
"logps/rejected": -1605.5289306640625, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09437403827905655, |
|
"rewards/margins": 0.2921295762062073, |
|
"rewards/rejected": -0.3865036368370056, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.677065134048462, |
|
"logits/rejected": -0.4172869622707367, |
|
"logps/chosen": -623.8750610351562, |
|
"logps/rejected": -1598.5770263671875, |
|
"loss": 0.1837, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06079324334859848, |
|
"rewards/margins": 0.25734245777130127, |
|
"rewards/rejected": -0.31813567876815796, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.964752486015001e-06, |
|
"logits/chosen": -1.5321755409240723, |
|
"logits/rejected": -0.6257954835891724, |
|
"logps/chosen": -681.2603759765625, |
|
"logps/rejected": -1647.580810546875, |
|
"loss": 0.179, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07860198616981506, |
|
"rewards/margins": 0.2607461214065552, |
|
"rewards/rejected": -0.3393481373786926, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.5395901203155518, |
|
"logits/rejected": -0.23838794231414795, |
|
"logps/chosen": -678.4739990234375, |
|
"logps/rejected": -1783.0814208984375, |
|
"loss": 0.1444, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.10125972330570221, |
|
"rewards/margins": 0.36801964044570923, |
|
"rewards/rejected": -0.46927928924560547, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.4998096227645874, |
|
"logits/rejected": -0.35734957456588745, |
|
"logps/chosen": -606.8898315429688, |
|
"logps/rejected": -1602.2557373046875, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09188471734523773, |
|
"rewards/margins": 0.30264589190483093, |
|
"rewards/rejected": -0.39453059434890747, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.907613372729916e-06, |
|
"logits/chosen": -1.3110413551330566, |
|
"logits/rejected": -0.36875995993614197, |
|
"logps/chosen": -673.6112060546875, |
|
"logps/rejected": -1617.9066162109375, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11483339965343475, |
|
"rewards/margins": 0.2649555504322052, |
|
"rewards/rejected": -0.37978893518447876, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.888320862029699e-06, |
|
"logits/chosen": -1.726869821548462, |
|
"logits/rejected": -0.4512055814266205, |
|
"logps/chosen": -697.3773193359375, |
|
"logps/rejected": -1701.3092041015625, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07687092572450638, |
|
"rewards/margins": 0.31277498602867126, |
|
"rewards/rejected": -0.38964587450027466, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.5626089572906494, |
|
"logits/rejected": -0.7535050511360168, |
|
"logps/chosen": -776.7896118164062, |
|
"logps/rejected": -1669.3294677734375, |
|
"loss": 0.1674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.054000310599803925, |
|
"rewards/margins": 0.27108216285705566, |
|
"rewards/rejected": -0.3250824809074402, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.3612600564956665, |
|
"logits/rejected": -0.8206363916397095, |
|
"logps/chosen": -592.5975341796875, |
|
"logps/rejected": -1729.0345458984375, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03538142889738083, |
|
"rewards/margins": 0.3380415439605713, |
|
"rewards/rejected": -0.3734230101108551, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.829728312792895e-06, |
|
"logits/chosen": -1.6738402843475342, |
|
"logits/rejected": -0.5649760961532593, |
|
"logps/chosen": -617.51123046875, |
|
"logps/rejected": -1514.9173583984375, |
|
"loss": 0.2153, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08767777681350708, |
|
"rewards/margins": 0.2846985459327698, |
|
"rewards/rejected": -0.37237635254859924, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.2732475996017456, |
|
"logits/rejected": -0.4450169503688812, |
|
"logps/chosen": -827.7120361328125, |
|
"logps/rejected": -1842.5130615234375, |
|
"loss": 0.157, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15502184629440308, |
|
"rewards/margins": 0.3444690704345703, |
|
"rewards/rejected": -0.4994909167289734, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.790087713710179e-06, |
|
"logits/chosen": -1.0923655033111572, |
|
"logits/rejected": 0.02032681182026863, |
|
"logps/chosen": -974.3514404296875, |
|
"logps/rejected": -1917.356689453125, |
|
"loss": 0.22, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23814789950847626, |
|
"rewards/margins": 0.2912440896034241, |
|
"rewards/rejected": -0.5293919444084167, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.770098881416945e-06, |
|
"logits/chosen": -1.387081265449524, |
|
"logits/rejected": -0.628162145614624, |
|
"logps/chosen": -868.3365478515625, |
|
"logps/rejected": -1810.2318115234375, |
|
"loss": 0.1962, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20213904976844788, |
|
"rewards/margins": 0.28864550590515137, |
|
"rewards/rejected": -0.49078455567359924, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.5495609045028687, |
|
"logits/rejected": -0.25171366333961487, |
|
"logps/chosen": -735.914794921875, |
|
"logps/rejected": -1725.832763671875, |
|
"loss": 0.1646, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14493001997470856, |
|
"rewards/margins": 0.2965463101863861, |
|
"rewards/rejected": -0.44147634506225586, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7297928109491765e-06, |
|
"logits/chosen": -1.3633923530578613, |
|
"logits/rejected": -0.3343961536884308, |
|
"logps/chosen": -621.5894165039062, |
|
"logps/rejected": -1620.972412109375, |
|
"loss": 0.145, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11256246268749237, |
|
"rewards/margins": 0.33683252334594727, |
|
"rewards/rejected": -0.44939494132995605, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7094790651387414e-06, |
|
"logits/chosen": -1.618963599205017, |
|
"logits/rejected": -0.7474344968795776, |
|
"logps/chosen": -682.3802490234375, |
|
"logps/rejected": -1560.806396484375, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13379545509815216, |
|
"rewards/margins": 0.25923866033554077, |
|
"rewards/rejected": -0.3930341303348541, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.407928228378296, |
|
"logits/rejected": -0.4922426640987396, |
|
"logps/chosen": -765.1798706054688, |
|
"logps/rejected": -1726.2021484375, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13442331552505493, |
|
"rewards/margins": 0.28671473264694214, |
|
"rewards/rejected": -0.42113804817199707, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668538952747236e-06, |
|
"logits/chosen": -1.621731162071228, |
|
"logits/rejected": -0.3162292540073395, |
|
"logps/chosen": -807.1463623046875, |
|
"logps/rejected": -1907.406982421875, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12251107394695282, |
|
"rewards/margins": 0.4035162925720215, |
|
"rewards/rejected": -0.5260274410247803, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.415895938873291, |
|
"logits/rejected": 0.0497395396232605, |
|
"logps/chosen": -814.912353515625, |
|
"logps/rejected": -1617.039306640625, |
|
"loss": 0.2032, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13029535114765167, |
|
"rewards/margins": 0.23458731174468994, |
|
"rewards/rejected": -0.3648826479911804, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.3018121719360352, |
|
"logits/rejected": -0.9184350967407227, |
|
"logps/chosen": -798.8698120117188, |
|
"logps/rejected": -1686.4742431640625, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12557171285152435, |
|
"rewards/margins": 0.2953474819660187, |
|
"rewards/rejected": -0.42091917991638184, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6063739030204226e-06, |
|
"logits/chosen": -1.5564700365066528, |
|
"logits/rejected": -0.8456588983535767, |
|
"logps/chosen": -672.8592529296875, |
|
"logps/rejected": -1566.520263671875, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13787488639354706, |
|
"rewards/margins": 0.28301936388015747, |
|
"rewards/rejected": -0.42089423537254333, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5854580913255706e-06, |
|
"logits/chosen": -1.3799980878829956, |
|
"logits/rejected": -0.09411342442035675, |
|
"logps/chosen": -759.1764526367188, |
|
"logps/rejected": -1664.320068359375, |
|
"loss": 0.2203, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1847977340221405, |
|
"rewards/margins": 0.2686237096786499, |
|
"rewards/rejected": -0.4534215033054352, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.5500601530075073, |
|
"logits/rejected": -0.6304312348365784, |
|
"logps/chosen": -741.7132568359375, |
|
"logps/rejected": -1747.054443359375, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11834660917520523, |
|
"rewards/margins": 0.3032262921333313, |
|
"rewards/rejected": -0.4215729236602783, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.2121784687042236, |
|
"logits/rejected": -0.23208335041999817, |
|
"logps/chosen": -896.2943115234375, |
|
"logps/rejected": -1856.9176025390625, |
|
"loss": 0.1546, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18068882822990417, |
|
"rewards/margins": 0.29168596863746643, |
|
"rewards/rejected": -0.472374826669693, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.522153641615345e-06, |
|
"logits/chosen": -1.388629674911499, |
|
"logits/rejected": -0.5730769038200378, |
|
"logps/chosen": -736.3162841796875, |
|
"logps/rejected": -1783.465576171875, |
|
"loss": 0.1359, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12700147926807404, |
|
"rewards/margins": 0.37471193075180054, |
|
"rewards/rejected": -0.5017133951187134, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.4097487926483154, |
|
"logits/rejected": -0.479957640171051, |
|
"logps/chosen": -869.1336669921875, |
|
"logps/rejected": -1629.0390625, |
|
"loss": 0.1687, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1220046877861023, |
|
"rewards/margins": 0.2969478964805603, |
|
"rewards/rejected": -0.4189525544643402, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4795047994562463e-06, |
|
"logits/chosen": -1.4321268796920776, |
|
"logits/rejected": -0.3780360221862793, |
|
"logps/chosen": -764.9324340820312, |
|
"logps/rejected": -1601.4052734375, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12771736085414886, |
|
"rewards/margins": 0.2464291751384735, |
|
"rewards/rejected": -0.3741465210914612, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.458052147242494e-06, |
|
"logits/chosen": -1.6645126342773438, |
|
"logits/rejected": -0.8111234903335571, |
|
"logps/chosen": -817.3748168945312, |
|
"logps/rejected": -1478.569091796875, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1138969212770462, |
|
"rewards/margins": 0.18457883596420288, |
|
"rewards/rejected": -0.2984757423400879, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.6396286487579346, |
|
"logits/rejected": -0.3675927221775055, |
|
"logps/chosen": -737.4990234375, |
|
"logps/rejected": -1537.5166015625, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09592025727033615, |
|
"rewards/margins": 0.21111655235290527, |
|
"rewards/rejected": -0.307036817073822, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4148996743295305e-06, |
|
"logits/chosen": -1.654663324356079, |
|
"logits/rejected": -0.5259418487548828, |
|
"logps/chosen": -859.1363525390625, |
|
"logps/rejected": -1622.9510498046875, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06407856196165085, |
|
"rewards/margins": 0.22986917197704315, |
|
"rewards/rejected": -0.2939477562904358, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3932035926241103e-06, |
|
"logits/chosen": -1.4901096820831299, |
|
"logits/rejected": -1.160047173500061, |
|
"logps/chosen": -624.4628295898438, |
|
"logps/rejected": -1744.197021484375, |
|
"loss": 0.1465, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04981148615479469, |
|
"rewards/margins": 0.3574128746986389, |
|
"rewards/rejected": -0.4072244167327881, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.4609191417694092, |
|
"logits/rejected": -0.5895389318466187, |
|
"logps/chosen": -690.073974609375, |
|
"logps/rejected": -1626.9462890625, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08027863502502441, |
|
"rewards/margins": 0.2669757008552551, |
|
"rewards/rejected": -0.34725433588027954, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.349581137957604e-06, |
|
"logits/chosen": -1.6755574941635132, |
|
"logits/rejected": -0.5948814153671265, |
|
"logps/chosen": -704.0479736328125, |
|
"logps/rejected": -1561.6595458984375, |
|
"loss": 0.2339, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08259834349155426, |
|
"rewards/margins": 0.2650589346885681, |
|
"rewards/rejected": -0.3476572632789612, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.1900542974472046, |
|
"logits/rejected": -0.02419034019112587, |
|
"logps/chosen": -666.2265625, |
|
"logps/rejected": -1475.4140625, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09865899384021759, |
|
"rewards/margins": 0.23837342858314514, |
|
"rewards/rejected": -0.33703240752220154, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.4822317361831665, |
|
"logits/rejected": -0.4478825032711029, |
|
"logps/chosen": -719.041015625, |
|
"logps/rejected": -1704.8245849609375, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.046217575669288635, |
|
"rewards/margins": 0.35374483466148376, |
|
"rewards/rejected": -0.3999623954296112, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2836001237702993e-06, |
|
"logits/chosen": -1.5582091808319092, |
|
"logits/rejected": -0.370614230632782, |
|
"logps/chosen": -738.9437255859375, |
|
"logps/rejected": -1776.6015625, |
|
"loss": 0.1323, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05881623178720474, |
|
"rewards/margins": 0.310192734003067, |
|
"rewards/rejected": -0.36900898814201355, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2614681135640696e-06, |
|
"logits/chosen": -1.4925451278686523, |
|
"logits/rejected": -0.8497873544692993, |
|
"logps/chosen": -724.83935546875, |
|
"logps/rejected": -1761.577392578125, |
|
"loss": 0.1634, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07654894888401031, |
|
"rewards/margins": 0.24484257400035858, |
|
"rewards/rejected": -0.3213915228843689, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.396722435951233, |
|
"logits/rejected": -0.7455130815505981, |
|
"logps/chosen": -674.4617309570312, |
|
"logps/rejected": -1584.518798828125, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08803001791238785, |
|
"rewards/margins": 0.26931852102279663, |
|
"rewards/rejected": -0.3573485314846039, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.504368543624878, |
|
"logits/rejected": -0.6314746141433716, |
|
"logps/chosen": -874.5758056640625, |
|
"logps/rejected": -1906.4967041015625, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1991203874349594, |
|
"rewards/margins": 0.36806267499923706, |
|
"rewards/rejected": -0.5671831369400024, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1946839124862873e-06, |
|
"logits/chosen": -1.2642552852630615, |
|
"logits/rejected": -0.10561282932758331, |
|
"logps/chosen": -707.1906127929688, |
|
"logps/rejected": -1762.505126953125, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12033917009830475, |
|
"rewards/margins": 0.343368262052536, |
|
"rewards/rejected": -0.4637073874473572, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.3440335988998413, |
|
"logits/rejected": -0.3857986330986023, |
|
"logps/chosen": -733.5349731445312, |
|
"logps/rejected": -1664.541748046875, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11129789054393768, |
|
"rewards/margins": 0.3099938631057739, |
|
"rewards/rejected": -0.4212917685508728, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.149856938451094e-06, |
|
"logits/chosen": -1.6117057800292969, |
|
"logits/rejected": 0.15098969638347626, |
|
"logps/chosen": -814.0639038085938, |
|
"logps/rejected": -1576.1326904296875, |
|
"loss": 0.1593, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12756898999214172, |
|
"rewards/margins": 0.25667664408683777, |
|
"rewards/rejected": -0.38424569368362427, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.127358017790132e-06, |
|
"logits/chosen": -1.3055508136749268, |
|
"logits/rejected": 0.43101415038108826, |
|
"logps/chosen": -844.7630615234375, |
|
"logps/rejected": -1807.894287109375, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14493486285209656, |
|
"rewards/margins": 0.284994512796402, |
|
"rewards/rejected": -0.42992934584617615, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.3464422225952148, |
|
"logits/rejected": -0.17827872931957245, |
|
"logps/chosen": -613.8772583007812, |
|
"logps/rejected": -1675.2249755859375, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.057198069989681244, |
|
"rewards/margins": 0.3218960464000702, |
|
"rewards/rejected": -0.3790941536426544, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.082199056232015e-06, |
|
"logits/chosen": -1.4088603258132935, |
|
"logits/rejected": -1.116137981414795, |
|
"logps/chosen": -599.2484130859375, |
|
"logps/rejected": -1451.5693359375, |
|
"loss": 0.2227, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0676434338092804, |
|
"rewards/margins": 0.21012239158153534, |
|
"rewards/rejected": -0.27776581048965454, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.059542928183079e-06, |
|
"logits/chosen": -1.0800918340682983, |
|
"logits/rejected": -0.1234823614358902, |
|
"logps/chosen": -755.9251708984375, |
|
"logps/rejected": -1821.8336181640625, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08166182041168213, |
|
"rewards/margins": 0.3705812394618988, |
|
"rewards/rejected": -0.45224303007125854, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.2700388431549072, |
|
"logits/rejected": -0.5240074396133423, |
|
"logps/chosen": -747.2010498046875, |
|
"logps/rejected": -1727.675537109375, |
|
"loss": 0.1762, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0997215062379837, |
|
"rewards/margins": 0.32902830839157104, |
|
"rewards/rejected": -0.4287497401237488, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0140871927018466e-06, |
|
"logits/chosen": -1.4908950328826904, |
|
"logits/rejected": -0.6521574854850769, |
|
"logps/chosen": -840.5997924804688, |
|
"logps/rejected": -1888.5599365234375, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1681024730205536, |
|
"rewards/margins": 0.35152748227119446, |
|
"rewards/rejected": -0.519629955291748, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.2768045663833618, |
|
"logits/rejected": -0.5125023126602173, |
|
"logps/chosen": -649.5341796875, |
|
"logps/rejected": -1694.0074462890625, |
|
"loss": 0.1611, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10545836389064789, |
|
"rewards/margins": 0.3052206039428711, |
|
"rewards/rejected": -0.41067901253700256, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.6448793411254883, |
|
"logits/rejected": -0.8161486387252808, |
|
"logps/chosen": -683.2916870117188, |
|
"logps/rejected": -1537.4920654296875, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1456030309200287, |
|
"rewards/margins": 0.2657826840877533, |
|
"rewards/rejected": -0.411385715007782, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.945574459442917e-06, |
|
"logits/chosen": -1.5162016153335571, |
|
"logits/rejected": -0.9785049557685852, |
|
"logps/chosen": -733.1231079101562, |
|
"logps/rejected": -1667.495849609375, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15169629454612732, |
|
"rewards/margins": 0.29900026321411133, |
|
"rewards/rejected": -0.45069652795791626, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.922657025129185e-06, |
|
"logits/chosen": -1.4472191333770752, |
|
"logits/rejected": -0.4651806354522705, |
|
"logps/chosen": -756.4955444335938, |
|
"logps/rejected": -1759.114501953125, |
|
"loss": 0.2495, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15917366743087769, |
|
"rewards/margins": 0.2930217683315277, |
|
"rewards/rejected": -0.4521954655647278, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.5512139797210693, |
|
"logits/rejected": -0.7475739121437073, |
|
"logps/chosen": -771.3937377929688, |
|
"logps/rejected": -1779.020751953125, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11777035892009735, |
|
"rewards/margins": 0.30730709433555603, |
|
"rewards/rejected": -0.4250774383544922, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.3046129941940308, |
|
"logits/rejected": -0.2948853671550751, |
|
"logps/chosen": -736.4674682617188, |
|
"logps/rejected": -1671.8529052734375, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09072361886501312, |
|
"rewards/margins": 0.3495047688484192, |
|
"rewards/rejected": -0.4402283728122711, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8536929511919227e-06, |
|
"logits/chosen": -1.4932053089141846, |
|
"logits/rejected": 0.03270454332232475, |
|
"logps/chosen": -672.2532958984375, |
|
"logps/rejected": -1599.681640625, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09094641357660294, |
|
"rewards/margins": 0.29966163635253906, |
|
"rewards/rejected": -0.3906080424785614, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.2685102224349976, |
|
"logits/rejected": -0.457380473613739, |
|
"logps/chosen": -708.8721923828125, |
|
"logps/rejected": -1771.715087890625, |
|
"loss": 0.143, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13361448049545288, |
|
"rewards/margins": 0.3276425004005432, |
|
"rewards/rejected": -0.46125689148902893, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807560351340302e-06, |
|
"logits/chosen": -1.3997093439102173, |
|
"logits/rejected": -0.37530818581581116, |
|
"logps/chosen": -675.5166015625, |
|
"logps/rejected": -1654.8193359375, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11855722963809967, |
|
"rewards/margins": 0.36131593585014343, |
|
"rewards/rejected": -0.4798731207847595, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7844530781306544e-06, |
|
"logits/chosen": -1.329444408416748, |
|
"logits/rejected": -0.4007740616798401, |
|
"logps/chosen": -830.9290161132812, |
|
"logps/rejected": -2067.2666015625, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18545034527778625, |
|
"rewards/margins": 0.4029621481895447, |
|
"rewards/rejected": -0.5884124636650085, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.5382049083709717, |
|
"logits/rejected": -0.7047163844108582, |
|
"logps/chosen": -823.2682495117188, |
|
"logps/rejected": -1897.6998291015625, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1536359339952469, |
|
"rewards/margins": 0.3253306448459625, |
|
"rewards/rejected": -0.4789665639400482, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.738166595746554e-06, |
|
"logits/chosen": -1.612831711769104, |
|
"logits/rejected": -0.2793089747428894, |
|
"logps/chosen": -831.91357421875, |
|
"logps/rejected": -1897.527099609375, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.15530729293823242, |
|
"rewards/margins": 0.3191708028316498, |
|
"rewards/rejected": -0.4744780957698822, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7149913971156105e-06, |
|
"logits/chosen": -1.5521538257598877, |
|
"logits/rejected": -0.6847071051597595, |
|
"logps/chosen": -752.0145263671875, |
|
"logps/rejected": -1666.796630859375, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14488133788108826, |
|
"rewards/margins": 0.31412333250045776, |
|
"rewards/rejected": -0.4590047001838684, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.1928004026412964, |
|
"logits/rejected": -0.8584890365600586, |
|
"logps/chosen": -881.7937622070312, |
|
"logps/rejected": -1752.3238525390625, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21142208576202393, |
|
"rewards/margins": 0.31265494227409363, |
|
"rewards/rejected": -0.5240770578384399, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.668587125005663e-06, |
|
"logits/chosen": -1.4918617010116577, |
|
"logits/rejected": -0.6560184359550476, |
|
"logps/chosen": -728.0860595703125, |
|
"logps/rejected": -1561.5875244140625, |
|
"loss": 0.1206, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13730724155902863, |
|
"rewards/margins": 0.3027104437351227, |
|
"rewards/rejected": -0.4400176405906677, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.5282548666000366, |
|
"logits/rejected": -0.7147419452667236, |
|
"logps/chosen": -817.76416015625, |
|
"logps/rejected": -1859.003173828125, |
|
"loss": 0.1314, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21234384179115295, |
|
"rewards/margins": 0.3443707823753357, |
|
"rewards/rejected": -0.556714653968811, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.3335682153701782, |
|
"logits/rejected": -0.07726557552814484, |
|
"logps/chosen": -991.7344970703125, |
|
"logps/rejected": -1859.514892578125, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.26506227254867554, |
|
"rewards/margins": 0.2776499390602112, |
|
"rewards/rejected": -0.5427122116088867, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5988761950959133e-06, |
|
"logits/chosen": -1.4011818170547485, |
|
"logits/rejected": -0.5180048942565918, |
|
"logps/chosen": -809.6094360351562, |
|
"logps/rejected": -1956.6029052734375, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1943681836128235, |
|
"rewards/margins": 0.3915101885795593, |
|
"rewards/rejected": -0.585878312587738, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575619398465402e-06, |
|
"logits/chosen": -1.4724808931350708, |
|
"logits/rejected": -0.7128661274909973, |
|
"logps/chosen": -634.3349609375, |
|
"logps/rejected": -1436.007568359375, |
|
"loss": 0.2107, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13114015758037567, |
|
"rewards/margins": 0.22343416512012482, |
|
"rewards/rejected": -0.3545742630958557, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.1302558183670044, |
|
"logits/rejected": -0.5582550764083862, |
|
"logps/chosen": -715.1376953125, |
|
"logps/rejected": -1611.423828125, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11572917550802231, |
|
"rewards/margins": 0.2607465088367462, |
|
"rewards/rejected": -0.3764756917953491, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.6198726892471313, |
|
"logits/rejected": -0.6000566482543945, |
|
"logps/chosen": -762.2321166992188, |
|
"logps/rejected": -1829.3511962890625, |
|
"loss": 0.1657, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16410943865776062, |
|
"rewards/margins": 0.37973588705062866, |
|
"rewards/rejected": -0.5438452959060669, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5058177589223766e-06, |
|
"logits/chosen": -1.335745930671692, |
|
"logits/rejected": -0.5967541933059692, |
|
"logps/chosen": -677.7882080078125, |
|
"logps/rejected": -1777.054931640625, |
|
"loss": 0.1599, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12234095484018326, |
|
"rewards/margins": 0.3689095675945282, |
|
"rewards/rejected": -0.49125057458877563, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.2376234531402588, |
|
"logits/rejected": -0.5898858308792114, |
|
"logps/chosen": -797.4426879882812, |
|
"logps/rejected": -1635.3062744140625, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15089258551597595, |
|
"rewards/margins": 0.2774205803871155, |
|
"rewards/rejected": -0.4283131957054138, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4592774518353858e-06, |
|
"logits/chosen": -1.448229193687439, |
|
"logits/rejected": -0.610197901725769, |
|
"logps/chosen": -747.7297973632812, |
|
"logps/rejected": -1767.844970703125, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16316349804401398, |
|
"rewards/margins": 0.3244298994541168, |
|
"rewards/rejected": -0.4875933527946472, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.436011582865945e-06, |
|
"logits/chosen": -1.4960334300994873, |
|
"logits/rejected": -0.6341277956962585, |
|
"logps/chosen": -892.7718505859375, |
|
"logps/rejected": -1960.636474609375, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21525397896766663, |
|
"rewards/margins": 0.3495286703109741, |
|
"rewards/rejected": -0.5647826790809631, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.0745445489883423, |
|
"logits/rejected": 0.3137953281402588, |
|
"logps/chosen": -765.67919921875, |
|
"logps/rejected": -1701.5640869140625, |
|
"loss": 0.1943, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.164879709482193, |
|
"rewards/margins": 0.2781751751899719, |
|
"rewards/rejected": -0.4430549144744873, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3894984933853734e-06, |
|
"logits/chosen": -1.4759852886199951, |
|
"logits/rejected": -0.5236972570419312, |
|
"logps/chosen": -838.9854736328125, |
|
"logps/rejected": -1744.4114990234375, |
|
"loss": 0.1704, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17717286944389343, |
|
"rewards/margins": 0.257200688123703, |
|
"rewards/rejected": -0.43437355756759644, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.366255303052377e-06, |
|
"logits/chosen": -1.508411169052124, |
|
"logits/rejected": -0.39122310280799866, |
|
"logps/chosen": -843.5926513671875, |
|
"logps/rejected": -1831.277099609375, |
|
"loss": 0.1721, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14325647056102753, |
|
"rewards/margins": 0.292641818523407, |
|
"rewards/rejected": -0.4358982443809509, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.5056064128875732, |
|
"logits/rejected": 0.36290010809898376, |
|
"logps/chosen": -771.4110717773438, |
|
"logps/rejected": -1977.0465087890625, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13286644220352173, |
|
"rewards/margins": 0.41198697686195374, |
|
"rewards/rejected": -0.5448533892631531, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319805700686257e-06, |
|
"logits/chosen": -1.1897435188293457, |
|
"logits/rejected": -0.6799365282058716, |
|
"logps/chosen": -774.5003051757812, |
|
"logps/rejected": -1752.76953125, |
|
"loss": 0.1455, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10589227825403214, |
|
"rewards/margins": 0.3271291255950928, |
|
"rewards/rejected": -0.4330214560031891, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.5946743488311768, |
|
"logits/rejected": -0.24514515697956085, |
|
"logps/chosen": -728.986328125, |
|
"logps/rejected": -1501.432373046875, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12233342975378036, |
|
"rewards/margins": 0.2589247226715088, |
|
"rewards/rejected": -0.38125813007354736, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.1782925128936768, |
|
"logits/rejected": -0.13661935925483704, |
|
"logps/chosen": -568.3839721679688, |
|
"logps/rejected": -1682.452392578125, |
|
"loss": 0.1338, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12734124064445496, |
|
"rewards/margins": 0.3284556567668915, |
|
"rewards/rejected": -0.4557969570159912, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.250253418081373e-06, |
|
"logits/chosen": -1.438287615776062, |
|
"logits/rejected": 0.09851478040218353, |
|
"logps/chosen": -888.4573974609375, |
|
"logps/rejected": -1791.545654296875, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20431411266326904, |
|
"rewards/margins": 0.2950129508972168, |
|
"rewards/rejected": -0.49932703375816345, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.22710992622628e-06, |
|
"logits/chosen": -1.512485384941101, |
|
"logits/rejected": 0.1614830195903778, |
|
"logps/chosen": -803.5087890625, |
|
"logps/rejected": -1817.915771484375, |
|
"loss": 0.1446, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13419024646282196, |
|
"rewards/margins": 0.4049316346645355, |
|
"rewards/rejected": -0.5391219258308411, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.3607614040374756, |
|
"logits/rejected": -0.42570924758911133, |
|
"logps/chosen": -765.4649658203125, |
|
"logps/rejected": -1829.791259765625, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1280583143234253, |
|
"rewards/margins": 0.36656203866004944, |
|
"rewards/rejected": -0.4946204125881195, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.5544694662094116, |
|
"logits/rejected": -0.8007132411003113, |
|
"logps/chosen": -521.413818359375, |
|
"logps/rejected": -1577.453857421875, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07102310657501221, |
|
"rewards/margins": 0.3457227647304535, |
|
"rewards/rejected": -0.4167459011077881, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.157829330593008e-06, |
|
"logits/chosen": -1.4860761165618896, |
|
"logits/rejected": 0.10385887324810028, |
|
"logps/chosen": -840.1112060546875, |
|
"logps/rejected": -1826.4033203125, |
|
"loss": 0.1199, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.17368373274803162, |
|
"rewards/margins": 0.36537981033325195, |
|
"rewards/rejected": -0.5390635132789612, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.4830608367919922, |
|
"logits/rejected": -0.2886629104614258, |
|
"logps/chosen": -688.9013671875, |
|
"logps/rejected": -1467.2042236328125, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12851116061210632, |
|
"rewards/margins": 0.2524433434009552, |
|
"rewards/rejected": -0.38095444440841675, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1117871704092818e-06, |
|
"logits/chosen": -1.511211633682251, |
|
"logits/rejected": -0.5041274428367615, |
|
"logps/chosen": -755.0718994140625, |
|
"logps/rejected": -1825.1058349609375, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10548318922519684, |
|
"rewards/margins": 0.37242189049720764, |
|
"rewards/rejected": -0.4779050946235657, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0888155493550027e-06, |
|
"logits/chosen": -1.5032051801681519, |
|
"logits/rejected": -0.2272813320159912, |
|
"logps/chosen": -617.5220947265625, |
|
"logps/rejected": -1630.780517578125, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0706586018204689, |
|
"rewards/margins": 0.3596861958503723, |
|
"rewards/rejected": -0.430344820022583, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.4327954053878784, |
|
"logits/rejected": -0.20998969674110413, |
|
"logps/chosen": -752.0433349609375, |
|
"logps/rejected": -1787.226806640625, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07883577048778534, |
|
"rewards/margins": 0.3491933047771454, |
|
"rewards/rejected": -0.4280291199684143, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0429811771568468e-06, |
|
"logits/chosen": -1.4897148609161377, |
|
"logits/rejected": -0.4423850178718567, |
|
"logps/chosen": -781.8294067382812, |
|
"logps/rejected": -1615.2808837890625, |
|
"loss": 0.1639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09271921962499619, |
|
"rewards/margins": 0.27344492077827454, |
|
"rewards/rejected": -0.3661641478538513, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0201223973828917e-06, |
|
"logits/chosen": -1.5945818424224854, |
|
"logits/rejected": -0.5884144902229309, |
|
"logps/chosen": -650.7221069335938, |
|
"logps/rejected": -1742.1322021484375, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07876542955636978, |
|
"rewards/margins": 0.36041781306266785, |
|
"rewards/rejected": -0.43918323516845703, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.4727747440338135, |
|
"logits/rejected": -0.5500017404556274, |
|
"logps/chosen": -810.6484985351562, |
|
"logps/rejected": -1713.4674072265625, |
|
"loss": 0.1612, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0999779924750328, |
|
"rewards/margins": 0.28804337978363037, |
|
"rewards/rejected": -0.3880213797092438, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9745315534350157e-06, |
|
"logits/chosen": -1.5009238719940186, |
|
"logits/rejected": -0.7104870080947876, |
|
"logps/chosen": -620.9692993164062, |
|
"logps/rejected": -1662.8277587890625, |
|
"loss": 0.128, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0903405249118805, |
|
"rewards/margins": 0.30934035778045654, |
|
"rewards/rejected": -0.3996809124946594, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.17448091506958, |
|
"logits/rejected": -0.5054240822792053, |
|
"logps/chosen": -598.98046875, |
|
"logps/rejected": -1721.002197265625, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07766080647706985, |
|
"rewards/margins": 0.37883394956588745, |
|
"rewards/rejected": -0.4564947485923767, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.3139159679412842, |
|
"logits/rejected": -0.6714901924133301, |
|
"logps/chosen": -843.5751953125, |
|
"logps/rejected": -1910.8170166015625, |
|
"loss": 0.1071, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16244825720787048, |
|
"rewards/margins": 0.3335781693458557, |
|
"rewards/rejected": -0.4960264265537262, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9064916742013515e-06, |
|
"logits/chosen": -1.3740966320037842, |
|
"logits/rejected": -0.3796101212501526, |
|
"logps/chosen": -911.6195068359375, |
|
"logps/rejected": -1917.3980712890625, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19778266549110413, |
|
"rewards/margins": 0.35178643465042114, |
|
"rewards/rejected": -0.5495691895484924, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.883911948865306e-06, |
|
"logits/chosen": -1.4115228652954102, |
|
"logits/rejected": -0.3268095552921295, |
|
"logps/chosen": -669.3294677734375, |
|
"logps/rejected": -1735.1517333984375, |
|
"loss": 0.1672, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11265420913696289, |
|
"rewards/margins": 0.36892735958099365, |
|
"rewards/rejected": -0.48158153891563416, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.6518300771713257, |
|
"logits/rejected": -0.6226130127906799, |
|
"logps/chosen": -585.2733154296875, |
|
"logps/rejected": -1588.8135986328125, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09527496993541718, |
|
"rewards/margins": 0.36038875579833984, |
|
"rewards/rejected": -0.4556637406349182, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.3490536212921143, |
|
"logits/rejected": -0.4021245539188385, |
|
"logps/chosen": -672.3787231445312, |
|
"logps/rejected": -1490.810791015625, |
|
"loss": 0.2294, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09898020327091217, |
|
"rewards/margins": 0.24909377098083496, |
|
"rewards/rejected": -0.34807395935058594, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.816500865130279e-06, |
|
"logits/chosen": -1.430107831954956, |
|
"logits/rejected": -0.26797226071357727, |
|
"logps/chosen": -658.622314453125, |
|
"logps/rejected": -1494.597900390625, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08045925199985504, |
|
"rewards/margins": 0.2406737506389618, |
|
"rewards/rejected": -0.321133017539978, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.6190364360809326, |
|
"logits/rejected": -0.5775830149650574, |
|
"logps/chosen": -652.179443359375, |
|
"logps/rejected": -1785.9398193359375, |
|
"loss": 0.1489, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0696047991514206, |
|
"rewards/margins": 0.32390493154525757, |
|
"rewards/rejected": -0.39350971579551697, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7718530101256115e-06, |
|
"logits/chosen": -1.432355284690857, |
|
"logits/rejected": -0.7160090208053589, |
|
"logps/chosen": -757.6654663085938, |
|
"logps/rejected": -1582.92333984375, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10973949730396271, |
|
"rewards/margins": 0.2269624024629593, |
|
"rewards/rejected": -0.3367019295692444, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7496227534604859e-06, |
|
"logits/chosen": -1.6282392740249634, |
|
"logits/rejected": -0.9807308316230774, |
|
"logps/chosen": -681.4638671875, |
|
"logps/rejected": -1636.9273681640625, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10274797677993774, |
|
"rewards/margins": 0.3626464009284973, |
|
"rewards/rejected": -0.46539440751075745, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.4671717882156372, |
|
"logits/rejected": -0.4975617825984955, |
|
"logps/chosen": -698.2318725585938, |
|
"logps/rejected": -1724.447509765625, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11632432043552399, |
|
"rewards/margins": 0.3676297664642334, |
|
"rewards/rejected": -0.4839541018009186, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7053592124637557e-06, |
|
"logits/chosen": -1.5764734745025635, |
|
"logits/rejected": -0.617272675037384, |
|
"logps/chosen": -761.3939819335938, |
|
"logps/rejected": -1650.2503662109375, |
|
"loss": 0.2339, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15402886271476746, |
|
"rewards/margins": 0.2574668228626251, |
|
"rewards/rejected": -0.4114956855773926, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6833297633956647e-06, |
|
"logits/chosen": -1.310483694076538, |
|
"logits/rejected": 0.16814152896404266, |
|
"logps/chosen": -694.213134765625, |
|
"logps/rejected": -1697.9111328125, |
|
"loss": 0.108, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07603000104427338, |
|
"rewards/margins": 0.3201691210269928, |
|
"rewards/rejected": -0.396199107170105, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.422377347946167, |
|
"logits/rejected": -0.4117739796638489, |
|
"logps/chosen": -695.9147338867188, |
|
"logps/rejected": -1731.9359130859375, |
|
"loss": 0.1581, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.060897957533597946, |
|
"rewards/margins": 0.35658949613571167, |
|
"rewards/rejected": -0.4174874424934387, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6394850517846621e-06, |
|
"logits/chosen": -1.6102850437164307, |
|
"logits/rejected": -0.9350086450576782, |
|
"logps/chosen": -763.3278198242188, |
|
"logps/rejected": -1718.2047119140625, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1062902957201004, |
|
"rewards/margins": 0.3449605107307434, |
|
"rewards/rejected": -0.45125073194503784, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.4336785078048706, |
|
"logits/rejected": -0.12534983456134796, |
|
"logps/chosen": -599.1646728515625, |
|
"logps/rejected": -1452.4405517578125, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06159447878599167, |
|
"rewards/margins": 0.3045565187931061, |
|
"rewards/rejected": -0.36615103483200073, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.4207260608673096, |
|
"logits/rejected": -0.777606189250946, |
|
"logps/chosen": -604.3650512695312, |
|
"logps/rejected": -1650.2216796875, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06717022508382797, |
|
"rewards/margins": 0.36061280965805054, |
|
"rewards/rejected": -0.4277830123901367, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5742818947772875e-06, |
|
"logits/chosen": -1.5350821018218994, |
|
"logits/rejected": 0.10819880664348602, |
|
"logps/chosen": -758.2330322265625, |
|
"logps/rejected": -1617.786376953125, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08228493481874466, |
|
"rewards/margins": 0.2714352011680603, |
|
"rewards/rejected": -0.35372015833854675, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.552705424629898e-06, |
|
"logits/chosen": -1.4091111421585083, |
|
"logits/rejected": -0.6021371483802795, |
|
"logps/chosen": -744.669189453125, |
|
"logps/rejected": -1612.1370849609375, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08511866629123688, |
|
"rewards/margins": 0.2584208548069, |
|
"rewards/rejected": -0.3435395061969757, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.3926702737808228, |
|
"logits/rejected": -1.0168853998184204, |
|
"logps/chosen": -612.21044921875, |
|
"logps/rejected": -1413.4964599609375, |
|
"loss": 0.1469, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.064825639128685, |
|
"rewards/margins": 0.2570902705192566, |
|
"rewards/rejected": -0.3219159245491028, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.2503700256347656, |
|
"logits/rejected": -0.17403051257133484, |
|
"logps/chosen": -702.2501831054688, |
|
"logps/rejected": -1574.6552734375, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06957344710826874, |
|
"rewards/margins": 0.29189419746398926, |
|
"rewards/rejected": -0.3614676594734192, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4884759328590476e-06, |
|
"logits/chosen": -1.7622737884521484, |
|
"logits/rejected": -0.49598488211631775, |
|
"logps/chosen": -769.4957885742188, |
|
"logps/rejected": -1723.9212646484375, |
|
"loss": 0.1593, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12480386346578598, |
|
"rewards/margins": 0.3833233416080475, |
|
"rewards/rejected": -0.5081272125244141, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.5906703472137451, |
|
"logits/rejected": 0.46796292066574097, |
|
"logps/chosen": -865.5426025390625, |
|
"logps/rejected": -1904.509765625, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1493154764175415, |
|
"rewards/margins": 0.37586697936058044, |
|
"rewards/rejected": -0.5251824259757996, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.446091402744923e-06, |
|
"logits/chosen": -1.2282660007476807, |
|
"logits/rejected": -0.4859045147895813, |
|
"logps/chosen": -703.8905639648438, |
|
"logps/rejected": -1641.694580078125, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10744025558233261, |
|
"rewards/margins": 0.3389386534690857, |
|
"rewards/rejected": -0.4463788866996765, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4250351971283937e-06, |
|
"logits/chosen": -1.564168930053711, |
|
"logits/rejected": 0.4942803382873535, |
|
"logps/chosen": -766.39599609375, |
|
"logps/rejected": -1616.6973876953125, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11042846739292145, |
|
"rewards/margins": 0.29587405920028687, |
|
"rewards/rejected": -0.4063025414943695, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.4303256273269653, |
|
"logits/rejected": 0.4619303345680237, |
|
"logps/chosen": -751.2760009765625, |
|
"logps/rejected": -1650.994873046875, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11452531814575195, |
|
"rewards/margins": 0.2743874192237854, |
|
"rewards/rejected": -0.38891273736953735, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3832040268095589e-06, |
|
"logits/chosen": -1.2525991201400757, |
|
"logits/rejected": -0.3175821006298065, |
|
"logps/chosen": -702.5205688476562, |
|
"logps/rejected": -1794.9117431640625, |
|
"loss": 0.1277, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09264467656612396, |
|
"rewards/margins": 0.3793013393878937, |
|
"rewards/rejected": -0.47194600105285645, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.362432686615316e-06, |
|
"logits/chosen": -1.4419841766357422, |
|
"logits/rejected": -0.390924870967865, |
|
"logps/chosen": -554.8194580078125, |
|
"logps/rejected": -1714.5797119140625, |
|
"loss": 0.1609, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.052808977663517, |
|
"rewards/margins": 0.37850111722946167, |
|
"rewards/rejected": -0.43131011724472046, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.4737399816513062, |
|
"logits/rejected": -0.35947731137275696, |
|
"logps/chosen": -654.5718994140625, |
|
"logps/rejected": -1695.5394287109375, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.082417331635952, |
|
"rewards/margins": 0.3580096364021301, |
|
"rewards/rejected": -0.44042691588401794, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3211874947800747e-06, |
|
"logits/chosen": -1.5177228450775146, |
|
"logits/rejected": -0.705475926399231, |
|
"logps/chosen": -662.635986328125, |
|
"logps/rejected": -1590.105224609375, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08941694349050522, |
|
"rewards/margins": 0.25798147916793823, |
|
"rewards/rejected": -0.34739845991134644, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.3950124979019165, |
|
"logits/rejected": -0.012356835417449474, |
|
"logps/chosen": -656.9428100585938, |
|
"logps/rejected": -1578.4957275390625, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07990637421607971, |
|
"rewards/margins": 0.3191567063331604, |
|
"rewards/rejected": -0.3990631103515625, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.3449013233184814, |
|
"logits/rejected": -0.16039177775382996, |
|
"logps/chosen": -759.0653076171875, |
|
"logps/rejected": -1695.5814208984375, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15088282525539398, |
|
"rewards/margins": 0.29462313652038574, |
|
"rewards/rejected": -0.44550594687461853, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.260090165282645e-06, |
|
"logits/chosen": -1.3999792337417603, |
|
"logits/rejected": 0.3149392008781433, |
|
"logps/chosen": -689.9563598632812, |
|
"logps/rejected": -1669.5087890625, |
|
"loss": 0.1658, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1789911687374115, |
|
"rewards/margins": 0.27231132984161377, |
|
"rewards/rejected": -0.45130252838134766, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2399369117724582e-06, |
|
"logits/chosen": -1.4091784954071045, |
|
"logits/rejected": -0.4129961431026459, |
|
"logps/chosen": -786.0768432617188, |
|
"logps/rejected": -1682.849853515625, |
|
"loss": 0.1508, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11276306957006454, |
|
"rewards/margins": 0.3028048872947693, |
|
"rewards/rejected": -0.41556796431541443, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.5089385509490967, |
|
"logits/rejected": 0.6765385866165161, |
|
"logps/chosen": -682.0382080078125, |
|
"logps/rejected": -1677.535888671875, |
|
"loss": 0.1625, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07076996564865112, |
|
"rewards/margins": 0.3339851498603821, |
|
"rewards/rejected": -0.404755175113678, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.625929594039917, |
|
"logits/rejected": -0.4023094177246094, |
|
"logps/chosen": -682.7630615234375, |
|
"logps/rejected": -1596.6417236328125, |
|
"loss": 0.1467, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07302750647068024, |
|
"rewards/margins": 0.2907186448574066, |
|
"rewards/rejected": -0.36374616622924805, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1801391659631423e-06, |
|
"logits/chosen": -1.5305818319320679, |
|
"logits/rejected": 0.7981144785881042, |
|
"logps/chosen": -645.7145385742188, |
|
"logps/rejected": -1584.2552490234375, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05174557492136955, |
|
"rewards/margins": 0.31238168478012085, |
|
"rewards/rejected": -0.3641272485256195, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.474902629852295, |
|
"logits/rejected": -0.8396472930908203, |
|
"logps/chosen": -683.650146484375, |
|
"logps/rejected": -1486.7801513671875, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06140195578336716, |
|
"rewards/margins": 0.2520787715911865, |
|
"rewards/rejected": -0.3134807050228119, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1408429274065418e-06, |
|
"logits/chosen": -1.3169395923614502, |
|
"logits/rejected": -0.6151926517486572, |
|
"logps/chosen": -565.2825927734375, |
|
"logps/rejected": -1585.185791015625, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06299415230751038, |
|
"rewards/margins": 0.2877538800239563, |
|
"rewards/rejected": -0.35074806213378906, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1213706079298566e-06, |
|
"logits/chosen": -1.456714391708374, |
|
"logits/rejected": -0.23200377821922302, |
|
"logps/chosen": -555.6742553710938, |
|
"logps/rejected": -1574.927490234375, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.04447126016020775, |
|
"rewards/margins": 0.33361220359802246, |
|
"rewards/rejected": -0.3780834376811981, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.4803617000579834, |
|
"logits/rejected": -0.2165524661540985, |
|
"logps/chosen": -688.8192749023438, |
|
"logps/rejected": -1610.768310546875, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08206529915332794, |
|
"rewards/margins": 0.2980625331401825, |
|
"rewards/rejected": -0.38012784719467163, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0827860044369226e-06, |
|
"logits/chosen": -1.5613023042678833, |
|
"logits/rejected": -0.2233733832836151, |
|
"logps/chosen": -747.8840942382812, |
|
"logps/rejected": -1710.396240234375, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1048145517706871, |
|
"rewards/margins": 0.31736674904823303, |
|
"rewards/rejected": -0.42218121886253357, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.06367706362636e-06, |
|
"logits/chosen": -1.39828360080719, |
|
"logits/rejected": -0.848728358745575, |
|
"logps/chosen": -723.9393920898438, |
|
"logps/rejected": -1687.5172119140625, |
|
"loss": 0.1753, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11115583032369614, |
|
"rewards/margins": 0.2940528988838196, |
|
"rewards/rejected": -0.4052087366580963, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.3547604084014893, |
|
"logits/rejected": -0.15690529346466064, |
|
"logps/chosen": -790.1058959960938, |
|
"logps/rejected": -1860.5738525390625, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1280745416879654, |
|
"rewards/margins": 0.36689525842666626, |
|
"rewards/rejected": -0.49496984481811523, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0258341823102418e-06, |
|
"logits/chosen": -1.540259599685669, |
|
"logits/rejected": -0.36155903339385986, |
|
"logps/chosen": -741.6209106445312, |
|
"logps/rejected": -1832.474609375, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13528260588645935, |
|
"rewards/margins": 0.39978405833244324, |
|
"rewards/rejected": -0.5350667238235474, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.6379410028457642, |
|
"logits/rejected": -0.6687275171279907, |
|
"logps/chosen": -783.6793212890625, |
|
"logps/rejected": -1837.771728515625, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1449609398841858, |
|
"rewards/margins": 0.35221394896507263, |
|
"rewards/rejected": -0.4971748888492584, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.2955491542816162, |
|
"logits/rejected": -0.5987057685852051, |
|
"logps/chosen": -634.2613525390625, |
|
"logps/rejected": -1641.3394775390625, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11327040195465088, |
|
"rewards/margins": 0.3325853943824768, |
|
"rewards/rejected": -0.44585585594177246, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.700318703442437e-07, |
|
"logits/chosen": -1.466076135635376, |
|
"logits/rejected": -0.3946886658668518, |
|
"logps/chosen": -745.1195068359375, |
|
"logps/rejected": -1823.8948974609375, |
|
"loss": 0.1474, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09033511579036713, |
|
"rewards/margins": 0.41018062829971313, |
|
"rewards/rejected": -0.5005157589912415, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.516940936268504e-07, |
|
"logits/chosen": -1.3681772947311401, |
|
"logits/rejected": -0.35859739780426025, |
|
"logps/chosen": -648.45458984375, |
|
"logps/rejected": -1582.552978515625, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09594032913446426, |
|
"rewards/margins": 0.2905716896057129, |
|
"rewards/rejected": -0.38651204109191895, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.5419670343399048, |
|
"logits/rejected": -0.12930986285209656, |
|
"logps/chosen": -640.37646484375, |
|
"logps/rejected": -1695.059814453125, |
|
"loss": 0.1712, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07272680103778839, |
|
"rewards/margins": 0.3492538332939148, |
|
"rewards/rejected": -0.42198067903518677, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.4778330326080322, |
|
"logits/rejected": -0.7425899505615234, |
|
"logps/chosen": -604.2279052734375, |
|
"logps/rejected": -1645.4664306640625, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.060115180909633636, |
|
"rewards/margins": 0.3599664568901062, |
|
"rewards/rejected": -0.42008161544799805, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.974919888823164e-07, |
|
"logits/chosen": -1.4436981678009033, |
|
"logits/rejected": -0.3787620961666107, |
|
"logps/chosen": -735.7376708984375, |
|
"logps/rejected": -1799.7640380859375, |
|
"loss": 0.1705, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08380059152841568, |
|
"rewards/margins": 0.35864678025245667, |
|
"rewards/rejected": -0.4424474239349365, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.4138429164886475, |
|
"logits/rejected": -0.46681833267211914, |
|
"logps/chosen": -517.6033935546875, |
|
"logps/rejected": -1498.3123779296875, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05182959511876106, |
|
"rewards/margins": 0.31122758984565735, |
|
"rewards/rejected": -0.36305713653564453, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.620488984679378e-07, |
|
"logits/chosen": -1.698520302772522, |
|
"logits/rejected": -0.209329292178154, |
|
"logps/chosen": -629.7589721679688, |
|
"logps/rejected": -1619.1944580078125, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04417989403009415, |
|
"rewards/margins": 0.3446957468986511, |
|
"rewards/rejected": -0.38887566328048706, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.445394716802754e-07, |
|
"logits/chosen": -1.4968388080596924, |
|
"logits/rejected": -0.649762749671936, |
|
"logps/chosen": -686.9845581054688, |
|
"logps/rejected": -1680.4127197265625, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07763628661632538, |
|
"rewards/margins": 0.340093731880188, |
|
"rewards/rejected": -0.4177300035953522, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.3302339315414429, |
|
"logits/rejected": -0.4346889555454254, |
|
"logps/chosen": -617.5177612304688, |
|
"logps/rejected": -1592.0660400390625, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06615222990512848, |
|
"rewards/margins": 0.3168949782848358, |
|
"rewards/rejected": -0.3830471932888031, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.099524404308948e-07, |
|
"logits/chosen": -1.4116344451904297, |
|
"logits/rejected": -0.4046458601951599, |
|
"logps/chosen": -718.5609130859375, |
|
"logps/rejected": -1903.5677490234375, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0856105238199234, |
|
"rewards/margins": 0.3954885005950928, |
|
"rewards/rejected": -0.48109903931617737, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.928778328007918e-07, |
|
"logits/chosen": -1.6713218688964844, |
|
"logits/rejected": -0.07077087461948395, |
|
"logps/chosen": -666.365966796875, |
|
"logps/rejected": -1721.5794677734375, |
|
"loss": 0.1288, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06950782239437103, |
|
"rewards/margins": 0.33922192454338074, |
|
"rewards/rejected": -0.4087297320365906, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.3845415115356445, |
|
"logits/rejected": -0.046902846544981, |
|
"logps/chosen": -711.4525146484375, |
|
"logps/rejected": -1648.327392578125, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0852561965584755, |
|
"rewards/margins": 0.27144601941108704, |
|
"rewards/rejected": -0.35670217871665955, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.591738306429769e-07, |
|
"logits/chosen": -1.3664982318878174, |
|
"logits/rejected": -0.31685924530029297, |
|
"logps/chosen": -806.8741455078125, |
|
"logps/rejected": -1734.4677734375, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08330997824668884, |
|
"rewards/margins": 0.3360075056552887, |
|
"rewards/rejected": -0.41931748390197754, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.2000247240066528, |
|
"logits/rejected": -0.19667713344097137, |
|
"logps/chosen": -858.8897705078125, |
|
"logps/rejected": -1800.777587890625, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1292639523744583, |
|
"rewards/margins": 0.33466023206710815, |
|
"rewards/rejected": -0.46392422914505005, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.5677512884140015, |
|
"logits/rejected": -0.5398620367050171, |
|
"logps/chosen": -503.47052001953125, |
|
"logps/rejected": -1553.2418212890625, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04408533498644829, |
|
"rewards/margins": 0.34157511591911316, |
|
"rewards/rejected": -0.38566046953201294, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.097526647366379e-07, |
|
"logits/chosen": -1.7256578207015991, |
|
"logits/rejected": -0.6446534395217896, |
|
"logps/chosen": -649.2868041992188, |
|
"logps/rejected": -1570.2288818359375, |
|
"loss": 0.1441, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05645834654569626, |
|
"rewards/margins": 0.304313600063324, |
|
"rewards/rejected": -0.3607719838619232, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.935872887769299e-07, |
|
"logits/chosen": -1.5906898975372314, |
|
"logits/rejected": -0.8133038282394409, |
|
"logps/chosen": -777.7838134765625, |
|
"logps/rejected": -1589.012451171875, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09165602177381516, |
|
"rewards/margins": 0.2887498140335083, |
|
"rewards/rejected": -0.38040581345558167, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.3574354648590088, |
|
"logits/rejected": -0.9483124613761902, |
|
"logps/chosen": -623.078125, |
|
"logps/rejected": -1650.8558349609375, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06772687286138535, |
|
"rewards/margins": 0.29907676577568054, |
|
"rewards/rejected": -0.3668036460876465, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.6042282581329346, |
|
"logits/rejected": -0.3267243504524231, |
|
"logps/chosen": -772.8688354492188, |
|
"logps/rejected": -1827.7327880859375, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07094523310661316, |
|
"rewards/margins": 0.36197200417518616, |
|
"rewards/rejected": -0.43291720747947693, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.460358074120518e-07, |
|
"logits/chosen": -1.5008442401885986, |
|
"logits/rejected": -0.9061881303787231, |
|
"logps/chosen": -710.754638671875, |
|
"logps/rejected": -1521.227783203125, |
|
"loss": 0.1967, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06366866081953049, |
|
"rewards/margins": 0.2556148171424866, |
|
"rewards/rejected": -0.31928351521492004, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.52692449092865, |
|
"logits/rejected": -1.0452547073364258, |
|
"logps/chosen": -648.1273803710938, |
|
"logps/rejected": -1738.8756103515625, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0617615282535553, |
|
"rewards/margins": 0.3802492022514343, |
|
"rewards/rejected": -0.4420107305049896, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.151357245788917e-07, |
|
"logits/chosen": -1.479436993598938, |
|
"logits/rejected": -0.5046942234039307, |
|
"logps/chosen": -704.0694580078125, |
|
"logps/rejected": -1901.0484619140625, |
|
"loss": 0.134, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08424389362335205, |
|
"rewards/margins": 0.3907619118690491, |
|
"rewards/rejected": -0.47500577569007874, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.999299915559956e-07, |
|
"logits/chosen": -1.3773359060287476, |
|
"logits/rejected": -0.7278205156326294, |
|
"logps/chosen": -646.4566650390625, |
|
"logps/rejected": -1756.0074462890625, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07592879235744476, |
|
"rewards/margins": 0.3527293801307678, |
|
"rewards/rejected": -0.4286581873893738, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.4301416873931885, |
|
"logits/rejected": -0.6595948338508606, |
|
"logps/chosen": -633.0948486328125, |
|
"logps/rejected": -1438.056396484375, |
|
"loss": 0.2039, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0907018780708313, |
|
"rewards/margins": 0.2594781816005707, |
|
"rewards/rejected": -0.35018008947372437, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.700137297712749e-07, |
|
"logits/chosen": -1.5186036825180054, |
|
"logits/rejected": -0.2126600295305252, |
|
"logps/chosen": -719.3463134765625, |
|
"logps/rejected": -1743.2152099609375, |
|
"loss": 0.136, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07820829004049301, |
|
"rewards/margins": 0.35745203495025635, |
|
"rewards/rejected": -0.43566030263900757, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553057931370729e-07, |
|
"logits/chosen": -1.629778265953064, |
|
"logits/rejected": -0.40432286262512207, |
|
"logps/chosen": -629.0681762695312, |
|
"logps/rejected": -1662.3658447265625, |
|
"loss": 0.1704, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07306727021932602, |
|
"rewards/margins": 0.336677610874176, |
|
"rewards/rejected": -0.40974488854408264, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.5414226055145264, |
|
"logits/rejected": -0.8694823980331421, |
|
"logps/chosen": -783.411376953125, |
|
"logps/rejected": -1741.0318603515625, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08548710495233536, |
|
"rewards/margins": 0.32277002930641174, |
|
"rewards/rejected": -0.4082570970058441, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.263966802018275e-07, |
|
"logits/chosen": -1.6246259212493896, |
|
"logits/rejected": -0.6862791180610657, |
|
"logps/chosen": -805.5133056640625, |
|
"logps/rejected": -1652.5374755859375, |
|
"loss": 0.1632, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08222351223230362, |
|
"rewards/margins": 0.30940836668014526, |
|
"rewards/rejected": -0.3916319012641907, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.5992133617401123, |
|
"logits/rejected": -0.5684345364570618, |
|
"logps/chosen": -639.3607177734375, |
|
"logps/rejected": -1503.9329833984375, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.042655039578676224, |
|
"rewards/margins": 0.3029300570487976, |
|
"rewards/rejected": -0.34558507800102234, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.3770325183868408, |
|
"logits/rejected": -0.07314357906579971, |
|
"logps/chosen": -626.600341796875, |
|
"logps/rejected": -1564.7174072265625, |
|
"loss": 0.1957, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.061516791582107544, |
|
"rewards/margins": 0.29265180230140686, |
|
"rewards/rejected": -0.3541685938835144, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.843185871337722e-07, |
|
"logits/chosen": -1.302022933959961, |
|
"logits/rejected": -0.5096440315246582, |
|
"logps/chosen": -616.6043701171875, |
|
"logps/rejected": -1739.4761962890625, |
|
"loss": 0.1331, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04673675075173378, |
|
"rewards/margins": 0.33783185482025146, |
|
"rewards/rejected": -0.38456863164901733, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.706402525869633e-07, |
|
"logits/chosen": -1.244085431098938, |
|
"logits/rejected": -0.4011514186859131, |
|
"logps/chosen": -558.00341796875, |
|
"logps/rejected": -1766.441162109375, |
|
"loss": 0.135, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03591493144631386, |
|
"rewards/margins": 0.3851375877857208, |
|
"rewards/rejected": -0.4210525155067444, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.5269744396209717, |
|
"logits/rejected": -0.3547285795211792, |
|
"logps/chosen": -603.1836547851562, |
|
"logps/rejected": -1485.8277587890625, |
|
"loss": 0.1863, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.050064198672771454, |
|
"rewards/margins": 0.30074387788772583, |
|
"rewards/rejected": -0.3508080542087555, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.3431215286254883, |
|
"logits/rejected": -0.15092921257019043, |
|
"logps/chosen": -600.654296875, |
|
"logps/rejected": -1668.9749755859375, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.019012073054909706, |
|
"rewards/margins": 0.3798617422580719, |
|
"rewards/rejected": -0.39887386560440063, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3066493009749853e-07, |
|
"logits/chosen": -1.651582956314087, |
|
"logits/rejected": -0.60547935962677, |
|
"logps/chosen": -783.2088623046875, |
|
"logps/rejected": -1752.2379150390625, |
|
"loss": 0.1943, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07545865327119827, |
|
"rewards/margins": 0.28511419892311096, |
|
"rewards/rejected": -0.36057284474372864, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.4048668146133423, |
|
"logits/rejected": -0.61931312084198, |
|
"logps/chosen": -734.5963745117188, |
|
"logps/rejected": -1771.947509765625, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06757534295320511, |
|
"rewards/margins": 0.3220224380493164, |
|
"rewards/rejected": -0.3895978033542633, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.049092898095816e-07, |
|
"logits/chosen": -1.2418572902679443, |
|
"logits/rejected": -0.5778509378433228, |
|
"logps/chosen": -649.421142578125, |
|
"logps/rejected": -1639.222412109375, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06102794408798218, |
|
"rewards/margins": 0.2672490179538727, |
|
"rewards/rejected": -0.32827693223953247, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9230321284847856e-07, |
|
"logits/chosen": -1.4714672565460205, |
|
"logits/rejected": -0.19613921642303467, |
|
"logps/chosen": -517.8187255859375, |
|
"logps/rejected": -1359.996337890625, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.015176740474998951, |
|
"rewards/margins": 0.27705565094947815, |
|
"rewards/rejected": -0.29223236441612244, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.4680472612380981, |
|
"logits/rejected": -1.0179380178451538, |
|
"logps/chosen": -649.7005615234375, |
|
"logps/rejected": -1706.8095703125, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0649513378739357, |
|
"rewards/margins": 0.37964990735054016, |
|
"rewards/rejected": -0.44460123777389526, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6764000653481263e-07, |
|
"logits/chosen": -1.530711054801941, |
|
"logits/rejected": -0.5328912734985352, |
|
"logps/chosen": -678.9803466796875, |
|
"logps/rejected": -1562.3223876953125, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07924239337444305, |
|
"rewards/margins": 0.29753822088241577, |
|
"rewards/rejected": -0.37678059935569763, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.555850141530659e-07, |
|
"logits/chosen": -1.573302149772644, |
|
"logits/rejected": -0.6228225827217102, |
|
"logps/chosen": -720.739990234375, |
|
"logps/rejected": -1550.609619140625, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05328356474637985, |
|
"rewards/margins": 0.3060484826564789, |
|
"rewards/rejected": -0.35933202505111694, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.4535822868347168, |
|
"logits/rejected": -0.5261168479919434, |
|
"logps/chosen": -570.4685668945312, |
|
"logps/rejected": -1702.9281005859375, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05110562592744827, |
|
"rewards/margins": 0.36743611097335815, |
|
"rewards/rejected": -0.4185417592525482, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3203347344004737e-07, |
|
"logits/chosen": -1.2148401737213135, |
|
"logits/rejected": -0.12717768549919128, |
|
"logps/chosen": -689.377197265625, |
|
"logps/rejected": -1628.9788818359375, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1245916485786438, |
|
"rewards/margins": 0.2877344489097595, |
|
"rewards/rejected": -0.41232603788375854, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.3511433601379395, |
|
"logits/rejected": -0.6393106579780579, |
|
"logps/chosen": -607.8463745117188, |
|
"logps/rejected": -1700.497802734375, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07283592969179153, |
|
"rewards/margins": 0.3661297857761383, |
|
"rewards/rejected": -0.43896573781967163, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.3332850933074951, |
|
"logits/rejected": -0.6272661685943604, |
|
"logps/chosen": -599.9690551757812, |
|
"logps/rejected": -1547.851318359375, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.061964135617017746, |
|
"rewards/margins": 0.3017534613609314, |
|
"rewards/rejected": -0.36371761560440063, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.981174554287239e-07, |
|
"logits/chosen": -1.3444700241088867, |
|
"logits/rejected": -0.7092281579971313, |
|
"logps/chosen": -702.991455078125, |
|
"logps/rejected": -1928.198974609375, |
|
"loss": 0.134, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06982138007879257, |
|
"rewards/margins": 0.3849565088748932, |
|
"rewards/rejected": -0.45477789640426636, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.871923955178918e-07, |
|
"logits/chosen": -1.1920301914215088, |
|
"logits/rejected": -0.5040346384048462, |
|
"logps/chosen": -689.6361083984375, |
|
"logps/rejected": -1791.2109375, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08395170420408249, |
|
"rewards/margins": 0.38336819410324097, |
|
"rewards/rejected": -0.46731987595558167, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.4373012781143188, |
|
"logits/rejected": -0.292427122592926, |
|
"logps/chosen": -607.8994140625, |
|
"logps/rejected": -1627.1224365234375, |
|
"loss": 0.179, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0747733861207962, |
|
"rewards/margins": 0.31044524908065796, |
|
"rewards/rejected": -0.38521862030029297, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.4667880535125732, |
|
"logits/rejected": 0.36761537194252014, |
|
"logps/chosen": -681.8417358398438, |
|
"logps/rejected": -1700.8343505859375, |
|
"loss": 0.1297, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07614819705486298, |
|
"rewards/margins": 0.3499642312526703, |
|
"rewards/rejected": -0.42611247301101685, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.555713060848433e-07, |
|
"logits/chosen": -1.3263441324234009, |
|
"logits/rejected": -0.007822990417480469, |
|
"logps/chosen": -645.5946044921875, |
|
"logps/rejected": -1641.087646484375, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07578223943710327, |
|
"rewards/margins": 0.32165446877479553, |
|
"rewards/rejected": -0.3974367678165436, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.598440408706665, |
|
"logits/rejected": -0.6168525815010071, |
|
"logps/chosen": -796.4778442382812, |
|
"logps/rejected": -1850.823974609375, |
|
"loss": 0.1374, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11286751180887222, |
|
"rewards/margins": 0.3862745761871338, |
|
"rewards/rejected": -0.4991420805454254, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3546141258376786e-07, |
|
"logits/chosen": -1.3171125650405884, |
|
"logits/rejected": -0.3912467360496521, |
|
"logps/chosen": -731.4490966796875, |
|
"logps/rejected": -1646.4290771484375, |
|
"loss": 0.1371, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11279473453760147, |
|
"rewards/margins": 0.3294528126716614, |
|
"rewards/rejected": -0.4422474801540375, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.257003546333042e-07, |
|
"logits/chosen": -1.4132746458053589, |
|
"logits/rejected": -0.49065130949020386, |
|
"logps/chosen": -738.8904418945312, |
|
"logps/rejected": -1917.914306640625, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1128961443901062, |
|
"rewards/margins": 0.3549092411994934, |
|
"rewards/rejected": -0.46780532598495483, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.6147540807724, |
|
"logits/rejected": -0.7865034341812134, |
|
"logps/chosen": -561.64306640625, |
|
"logps/rejected": -1595.516357421875, |
|
"loss": 0.1442, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05281323194503784, |
|
"rewards/margins": 0.32325616478919983, |
|
"rewards/rejected": -0.3760693669319153, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0677024504760752e-07, |
|
"logits/chosen": -1.467961311340332, |
|
"logits/rejected": -0.4320656359195709, |
|
"logps/chosen": -556.8984375, |
|
"logps/rejected": -1729.6136474609375, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03827287629246712, |
|
"rewards/margins": 0.37058061361312866, |
|
"rewards/rejected": -0.4088534712791443, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9760283363267684e-07, |
|
"logits/chosen": -1.6456931829452515, |
|
"logits/rejected": -0.19730210304260254, |
|
"logps/chosen": -729.8663940429688, |
|
"logps/rejected": -1706.380615234375, |
|
"loss": 0.1453, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07686268538236618, |
|
"rewards/margins": 0.34378868341445923, |
|
"rewards/rejected": -0.4206513464450836, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.275883436203003, |
|
"logits/rejected": -0.10841438919305801, |
|
"logps/chosen": -601.3389892578125, |
|
"logps/rejected": -1545.714111328125, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06958813965320587, |
|
"rewards/margins": 0.32690125703811646, |
|
"rewards/rejected": -0.3964894115924835, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.798672690923828e-07, |
|
"logits/chosen": -1.5245708227157593, |
|
"logits/rejected": -0.7873867750167847, |
|
"logps/chosen": -622.5413818359375, |
|
"logps/rejected": -1778.4840087890625, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0721798688173294, |
|
"rewards/margins": 0.36624765396118164, |
|
"rewards/rejected": -0.43842750787734985, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.507137417793274, |
|
"logits/rejected": -0.2042998969554901, |
|
"logps/chosen": -680.6546630859375, |
|
"logps/rejected": -1971.2806396484375, |
|
"loss": 0.0794, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06454543769359589, |
|
"rewards/margins": 0.4576357901096344, |
|
"rewards/rejected": -0.5221812129020691, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.551574468612671, |
|
"logits/rejected": -0.49993395805358887, |
|
"logps/chosen": -704.3600463867188, |
|
"logps/rejected": -1691.518310546875, |
|
"loss": 0.1888, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08099500089883804, |
|
"rewards/margins": 0.33148878812789917, |
|
"rewards/rejected": -0.4124837815761566, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5477346284948292e-07, |
|
"logits/chosen": -1.5370080471038818, |
|
"logits/rejected": -0.8450511693954468, |
|
"logps/chosen": -749.2708740234375, |
|
"logps/rejected": -1634.89453125, |
|
"loss": 0.1592, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09636793285608292, |
|
"rewards/margins": 0.2845004200935364, |
|
"rewards/rejected": -0.3808683753013611, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4681432143872133e-07, |
|
"logits/chosen": -1.2466710805892944, |
|
"logits/rejected": -0.42571744322776794, |
|
"logps/chosen": -763.4384155273438, |
|
"logps/rejected": -1733.5638427734375, |
|
"loss": 0.177, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09244507551193237, |
|
"rewards/margins": 0.330522358417511, |
|
"rewards/rejected": -0.42296743392944336, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.5868967771530151, |
|
"logits/rejected": -0.42538732290267944, |
|
"logps/chosen": -704.2333374023438, |
|
"logps/rejected": -1660.2496337890625, |
|
"loss": 0.2384, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09102825075387955, |
|
"rewards/margins": 0.27777737379074097, |
|
"rewards/rejected": -0.3688056468963623, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.5377957820892334, |
|
"logits/rejected": -0.9308739900588989, |
|
"logps/chosen": -690.0589599609375, |
|
"logps/rejected": -1625.2911376953125, |
|
"loss": 0.1416, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07044301927089691, |
|
"rewards/margins": 0.33037734031677246, |
|
"rewards/rejected": -0.4008203446865082, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.241629335994471e-07, |
|
"logits/chosen": -1.2723309993743896, |
|
"logits/rejected": -0.2303469479084015, |
|
"logps/chosen": -564.4718017578125, |
|
"logps/rejected": -1448.8857421875, |
|
"loss": 0.1991, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06368915736675262, |
|
"rewards/margins": 0.297661155462265, |
|
"rewards/rejected": -0.36135026812553406, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.5919114351272583, |
|
"logits/rejected": -0.7112639546394348, |
|
"logps/chosen": -713.6360473632812, |
|
"logps/rejected": -1638.255126953125, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09833192080259323, |
|
"rewards/margins": 0.317642480134964, |
|
"rewards/rejected": -0.4159744381904602, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1009020308754587e-07, |
|
"logits/chosen": -1.3826844692230225, |
|
"logits/rejected": -0.3817708492279053, |
|
"logps/chosen": -922.4486083984375, |
|
"logps/rejected": -1803.8795166015625, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1334574669599533, |
|
"rewards/margins": 0.3121677041053772, |
|
"rewards/rejected": -0.4456251263618469, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0336415203768962e-07, |
|
"logits/chosen": -1.4255292415618896, |
|
"logits/rejected": 0.35872507095336914, |
|
"logps/chosen": -776.8897705078125, |
|
"logps/rejected": -1673.2662353515625, |
|
"loss": 0.1536, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0705815777182579, |
|
"rewards/margins": 0.3355250358581543, |
|
"rewards/rejected": -0.4061066210269928, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.356412649154663, |
|
"logits/rejected": -0.2489086389541626, |
|
"logps/chosen": -556.0355224609375, |
|
"logps/rejected": -1530.9036865234375, |
|
"loss": 0.1951, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07743863016366959, |
|
"rewards/margins": 0.2904726266860962, |
|
"rewards/rejected": -0.36791130900382996, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.053559223036746e-08, |
|
"logits/chosen": -1.3430372476577759, |
|
"logits/rejected": -0.30119800567626953, |
|
"logps/chosen": -748.9754638671875, |
|
"logps/rejected": -1545.953125, |
|
"loss": 0.1491, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07480500638484955, |
|
"rewards/margins": 0.2932848632335663, |
|
"rewards/rejected": -0.368089884519577, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.44341950176683e-08, |
|
"logits/chosen": -1.5457831621170044, |
|
"logits/rejected": 0.028019333258271217, |
|
"logps/chosen": -562.9949340820312, |
|
"logps/rejected": -1467.437744140625, |
|
"loss": 0.1406, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03476322442293167, |
|
"rewards/margins": 0.30484968423843384, |
|
"rewards/rejected": -0.3396129012107849, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.5118751525878906, |
|
"logits/rejected": -0.8683287501335144, |
|
"logps/chosen": -623.8234252929688, |
|
"logps/rejected": -1705.7734375, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04193173721432686, |
|
"rewards/margins": 0.3931104838848114, |
|
"rewards/rejected": -0.43504220247268677, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.285980923996989e-08, |
|
"logits/chosen": -1.451690435409546, |
|
"logits/rejected": -0.4384433627128601, |
|
"logps/chosen": -644.318603515625, |
|
"logps/rejected": -1636.7640380859375, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.048310182988643646, |
|
"rewards/margins": 0.3499411344528198, |
|
"rewards/rejected": -0.39825132489204407, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.5255537033081055, |
|
"logits/rejected": -0.24880623817443848, |
|
"logps/chosen": -594.3384399414062, |
|
"logps/rejected": -1581.1204833984375, |
|
"loss": 0.1277, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.042657919228076935, |
|
"rewards/margins": 0.32654836773872375, |
|
"rewards/rejected": -0.3692063093185425, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.2601430416107178, |
|
"logits/rejected": -0.05472012236714363, |
|
"logps/chosen": -635.506103515625, |
|
"logps/rejected": -1533.6993408203125, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04515351355075836, |
|
"rewards/margins": 0.28451424837112427, |
|
"rewards/rejected": -0.3296678066253662, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.707663716023021e-08, |
|
"logits/chosen": -1.5181065797805786, |
|
"logits/rejected": -0.2227887660264969, |
|
"logps/chosen": -702.5686645507812, |
|
"logps/rejected": -1479.894775390625, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06307029724121094, |
|
"rewards/margins": 0.28211596608161926, |
|
"rewards/rejected": -0.3451862931251526, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.22383298837098e-08, |
|
"logits/chosen": -1.5388044118881226, |
|
"logits/rejected": 0.21133394539356232, |
|
"logps/chosen": -800.4866943359375, |
|
"logps/rejected": -1580.938232421875, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08334775269031525, |
|
"rewards/margins": 0.29393917322158813, |
|
"rewards/rejected": -0.3772869110107422, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.441207766532898, |
|
"logits/rejected": -0.5496786236763, |
|
"logps/chosen": -713.4179077148438, |
|
"logps/rejected": -1426.01171875, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10053922981023788, |
|
"rewards/margins": 0.22569382190704346, |
|
"rewards/rejected": -0.32623302936553955, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.4728825092315674, |
|
"logits/rejected": -0.8214455842971802, |
|
"logps/chosen": -647.0958862304688, |
|
"logps/rejected": -1621.828369140625, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06476866453886032, |
|
"rewards/margins": 0.32229509949684143, |
|
"rewards/rejected": -0.38706374168395996, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8997527136930004e-08, |
|
"logits/chosen": -1.3555724620819092, |
|
"logits/rejected": -0.9047378301620483, |
|
"logps/chosen": -717.7271118164062, |
|
"logps/rejected": -1752.408203125, |
|
"loss": 0.1412, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11167830228805542, |
|
"rewards/margins": 0.3086579442024231, |
|
"rewards/rejected": -0.4203362464904785, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.3440874814987183, |
|
"logits/rejected": -0.38815242052078247, |
|
"logps/chosen": -572.4334716796875, |
|
"logps/rejected": -1522.956787109375, |
|
"loss": 0.1838, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.04414026811718941, |
|
"rewards/margins": 0.2903124988079071, |
|
"rewards/rejected": -0.3344528079032898, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1235869306123766e-08, |
|
"logits/chosen": -1.7093461751937866, |
|
"logits/rejected": -0.7086327075958252, |
|
"logps/chosen": -623.2698364257812, |
|
"logps/rejected": -1764.114013671875, |
|
"loss": 0.109, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.05702175572514534, |
|
"rewards/margins": 0.3699186444282532, |
|
"rewards/rejected": -0.426940381526947, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.767574008979007e-08, |
|
"logits/chosen": -1.5890508890151978, |
|
"logits/rejected": -0.45929059386253357, |
|
"logps/chosen": -709.0275268554688, |
|
"logps/rejected": -1698.400146484375, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06950731575489044, |
|
"rewards/margins": 0.3168235123157501, |
|
"rewards/rejected": -0.386330783367157, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.4923101663589478, |
|
"logits/rejected": -0.3146892189979553, |
|
"logps/chosen": -752.1887817382812, |
|
"logps/rejected": -1786.089111328125, |
|
"loss": 0.1122, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06624925881624222, |
|
"rewards/margins": 0.3419404923915863, |
|
"rewards/rejected": -0.4081897735595703, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1198423385220822e-08, |
|
"logits/chosen": -1.4802709817886353, |
|
"logits/rejected": -0.7376705408096313, |
|
"logps/chosen": -644.2679443359375, |
|
"logps/rejected": -1632.2701416015625, |
|
"loss": 0.1783, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.042442698031663895, |
|
"rewards/margins": 0.3448942005634308, |
|
"rewards/rejected": -0.3873369097709656, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.82817971312621e-08, |
|
"logits/chosen": -1.607860803604126, |
|
"logits/rejected": -0.5319154858589172, |
|
"logps/chosen": -662.9993896484375, |
|
"logps/rejected": -1706.1949462890625, |
|
"loss": 0.1668, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06334526836872101, |
|
"rewards/margins": 0.3336828351020813, |
|
"rewards/rejected": -0.3970281183719635, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.5640172958374023, |
|
"logits/rejected": -0.2071477621793747, |
|
"logps/chosen": -634.8540649414062, |
|
"logps/rejected": -1508.478515625, |
|
"loss": 0.1485, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06261152774095535, |
|
"rewards/margins": 0.30838096141815186, |
|
"rewards/rejected": -0.3709924817085266, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3093872369654148e-08, |
|
"logits/chosen": -1.425930380821228, |
|
"logits/rejected": 0.06674868613481522, |
|
"logps/chosen": -671.8314819335938, |
|
"logps/rejected": -1613.8541259765625, |
|
"loss": 0.2111, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0743933692574501, |
|
"rewards/margins": 0.2954915761947632, |
|
"rewards/rejected": -0.3698849081993103, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.6556713581085205, |
|
"logits/rejected": -0.5192909836769104, |
|
"logps/chosen": -614.2623291015625, |
|
"logps/rejected": -1719.060791015625, |
|
"loss": 0.1069, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.052135247737169266, |
|
"rewards/margins": 0.3784480690956116, |
|
"rewards/rejected": -0.43058329820632935, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.2060736417770386, |
|
"logits/rejected": -0.42587900161743164, |
|
"logps/chosen": -745.9373168945312, |
|
"logps/rejected": -1563.926513671875, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0904444083571434, |
|
"rewards/margins": 0.29221174120903015, |
|
"rewards/rejected": -0.38265615701675415, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9285359445145366e-09, |
|
"logits/chosen": -1.5781863927841187, |
|
"logits/rejected": -0.46057629585266113, |
|
"logps/chosen": -630.7301025390625, |
|
"logps/rejected": -1631.5906982421875, |
|
"loss": 0.1287, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.030789542943239212, |
|
"rewards/margins": 0.3398864269256592, |
|
"rewards/rejected": -0.3706759810447693, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.305234949880001e-09, |
|
"logits/chosen": -1.4181653261184692, |
|
"logits/rejected": -0.7784129977226257, |
|
"logps/chosen": -598.7530517578125, |
|
"logps/rejected": -1537.0283203125, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.052745379507541656, |
|
"rewards/margins": 0.27565133571624756, |
|
"rewards/rejected": -0.328396737575531, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.4237000942230225, |
|
"logits/rejected": -0.025372039526700974, |
|
"logps/chosen": -612.72998046875, |
|
"logps/rejected": -1505.1551513671875, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.048105791211128235, |
|
"rewards/margins": 0.317216157913208, |
|
"rewards/rejected": -0.36532193422317505, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.537367582321167, |
|
"logits/rejected": -0.594673752784729, |
|
"logps/chosen": -647.0428466796875, |
|
"logps/rejected": -1649.4918212890625, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06700156629085541, |
|
"rewards/margins": 0.32940348982810974, |
|
"rewards/rejected": -0.39640507102012634, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7327344598702667e-09, |
|
"logits/chosen": -1.3739880323410034, |
|
"logits/rejected": -0.8361124992370605, |
|
"logps/chosen": -633.2244262695312, |
|
"logps/rejected": -1885.47265625, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.052168119698762894, |
|
"rewards/margins": 0.41237759590148926, |
|
"rewards/rejected": -0.46454566717147827, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.3567665815353394, |
|
"logits/rejected": -0.42118167877197266, |
|
"logps/chosen": -696.8218994140625, |
|
"logps/rejected": -1684.748779296875, |
|
"loss": 0.1024, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07300444692373276, |
|
"rewards/margins": 0.34567874670028687, |
|
"rewards/rejected": -0.4186832010746002, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.332211510807427e-10, |
|
"logits/chosen": -1.3411390781402588, |
|
"logits/rejected": -0.13048940896987915, |
|
"logps/chosen": -543.6580810546875, |
|
"logps/rejected": -1574.061279296875, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04192924499511719, |
|
"rewards/margins": 0.32620683312416077, |
|
"rewards/rejected": -0.36813604831695557, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0830763387897902e-10, |
|
"logits/chosen": -1.376807451248169, |
|
"logits/rejected": 0.7155303955078125, |
|
"logps/chosen": -710.9319458007812, |
|
"logps/rejected": -1784.8050537109375, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06954208761453629, |
|
"rewards/margins": 0.3634827136993408, |
|
"rewards/rejected": -0.4330248236656189, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.697113275527954, |
|
"logits/rejected": -0.9965224266052246, |
|
"logps/chosen": -698.7452392578125, |
|
"logps/rejected": -1657.3492431640625, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07053602486848831, |
|
"rewards/margins": 0.3382786512374878, |
|
"rewards/rejected": -0.4088146686553955, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3750, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1831410455385844, |
|
"train_runtime": 17876.9578, |
|
"train_samples_per_second": 0.839, |
|
"train_steps_per_second": 0.21 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|