|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.985279685966634, |
|
"eval_steps": 100, |
|
"global_step": 1270, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 3.9370078740157486e-08, |
|
"logits/chosen": -2.356706142425537, |
|
"logits/rejected": -2.3367161750793457, |
|
"logps/chosen": -287.937255859375, |
|
"logps/rejected": -266.50421142578125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 106.5, |
|
"learning_rate": 3.937007874015748e-07, |
|
"logits/chosen": -2.3996241092681885, |
|
"logits/rejected": -2.353182554244995, |
|
"logps/chosen": -273.30889892578125, |
|
"logps/rejected": -240.43850708007812, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.46388885378837585, |
|
"rewards/chosen": 0.00026795046869665384, |
|
"rewards/margins": 0.00038285815389826894, |
|
"rewards/rejected": -0.00011490769247757271, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 129.0, |
|
"learning_rate": 7.874015748031496e-07, |
|
"logits/chosen": -2.38779878616333, |
|
"logits/rejected": -2.3378489017486572, |
|
"logps/chosen": -266.8452453613281, |
|
"logps/rejected": -258.0478515625, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.5525000095367432, |
|
"rewards/chosen": 0.0013033099239692092, |
|
"rewards/margins": 0.0008336402243003249, |
|
"rewards/rejected": 0.0004696696996688843, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 108.0, |
|
"learning_rate": 1.1811023622047246e-06, |
|
"logits/chosen": -2.43515682220459, |
|
"logits/rejected": -2.382305145263672, |
|
"logps/chosen": -299.69366455078125, |
|
"logps/rejected": -271.0403747558594, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.4999999403953552, |
|
"rewards/chosen": 0.002506708027794957, |
|
"rewards/margins": 0.0005399176734499633, |
|
"rewards/rejected": 0.0019667900633066893, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 95.0, |
|
"learning_rate": 1.5748031496062992e-06, |
|
"logits/chosen": -2.3607640266418457, |
|
"logits/rejected": -2.313088893890381, |
|
"logps/chosen": -288.7026672363281, |
|
"logps/rejected": -253.4452667236328, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.5774999856948853, |
|
"rewards/chosen": 0.002137306611984968, |
|
"rewards/margins": 0.0012014020467177033, |
|
"rewards/rejected": 0.0009359045652672648, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 109.0, |
|
"learning_rate": 1.968503937007874e-06, |
|
"logits/chosen": -2.407731533050537, |
|
"logits/rejected": -2.3856372833251953, |
|
"logps/chosen": -267.39801025390625, |
|
"logps/rejected": -264.8341064453125, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.0022850066889077425, |
|
"rewards/margins": 0.0014212832320481539, |
|
"rewards/rejected": 0.0008637232822366059, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 135.0, |
|
"learning_rate": 2.362204724409449e-06, |
|
"logits/chosen": -2.3825833797454834, |
|
"logits/rejected": -2.3412365913391113, |
|
"logps/chosen": -272.9526672363281, |
|
"logps/rejected": -239.8430633544922, |
|
"loss": 0.1314, |
|
"rewards/accuracies": 0.6150000095367432, |
|
"rewards/chosen": 0.002347386907786131, |
|
"rewards/margins": 0.001956633059307933, |
|
"rewards/rejected": 0.00039075379027053714, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 89.5, |
|
"learning_rate": 2.755905511811024e-06, |
|
"logits/chosen": -2.382422924041748, |
|
"logits/rejected": -2.3432180881500244, |
|
"logps/chosen": -273.59881591796875, |
|
"logps/rejected": -257.91790771484375, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.6025000214576721, |
|
"rewards/chosen": 0.002635209821164608, |
|
"rewards/margins": 0.0014770927373319864, |
|
"rewards/rejected": 0.0011581169674172997, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 125.0, |
|
"learning_rate": 3.1496062992125985e-06, |
|
"logits/chosen": -2.394813060760498, |
|
"logits/rejected": -2.3611693382263184, |
|
"logps/chosen": -279.48638916015625, |
|
"logps/rejected": -260.6809997558594, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.6100000739097595, |
|
"rewards/chosen": 0.0013221392873674631, |
|
"rewards/margins": 0.0018236342584714293, |
|
"rewards/rejected": -0.0005014949128963053, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 146.0, |
|
"learning_rate": 3.5433070866141735e-06, |
|
"logits/chosen": -2.3927130699157715, |
|
"logits/rejected": -2.3439621925354004, |
|
"logps/chosen": -264.44195556640625, |
|
"logps/rejected": -236.5146942138672, |
|
"loss": 0.2032, |
|
"rewards/accuracies": 0.5475000143051147, |
|
"rewards/chosen": 0.001684651942923665, |
|
"rewards/margins": 0.0011215232079848647, |
|
"rewards/rejected": 0.0005631285603158176, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 145.0, |
|
"learning_rate": 3.937007874015748e-06, |
|
"logits/chosen": -2.4192750453948975, |
|
"logits/rejected": -2.3637657165527344, |
|
"logps/chosen": -280.88238525390625, |
|
"logps/rejected": -253.8561553955078, |
|
"loss": 0.2536, |
|
"rewards/accuracies": 0.5900000333786011, |
|
"rewards/chosen": 0.003346907440572977, |
|
"rewards/margins": 0.002142687328159809, |
|
"rewards/rejected": 0.001204220112413168, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -2.412179470062256, |
|
"eval_logits/rejected": -2.3735485076904297, |
|
"eval_logps/chosen": -267.4339904785156, |
|
"eval_logps/rejected": -242.3385467529297, |
|
"eval_loss": 0.27917271852493286, |
|
"eval_rewards/accuracies": 0.6041666865348816, |
|
"eval_rewards/chosen": 0.002446565078571439, |
|
"eval_rewards/margins": 0.0018827051389962435, |
|
"eval_rewards/rejected": 0.0005638597067445517, |
|
"eval_runtime": 124.6972, |
|
"eval_samples_per_second": 16.039, |
|
"eval_steps_per_second": 0.337, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 196.0, |
|
"learning_rate": 4.330708661417324e-06, |
|
"logits/chosen": -2.383650064468384, |
|
"logits/rejected": -2.345914125442505, |
|
"logps/chosen": -298.79864501953125, |
|
"logps/rejected": -272.7557373046875, |
|
"loss": 0.2924, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004891454242169857, |
|
"rewards/margins": 0.002777325687929988, |
|
"rewards/rejected": 0.0021141283214092255, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 138.0, |
|
"learning_rate": 4.724409448818898e-06, |
|
"logits/chosen": -2.3592543601989746, |
|
"logits/rejected": -2.29665470123291, |
|
"logps/chosen": -285.70538330078125, |
|
"logps/rejected": -253.1526336669922, |
|
"loss": 0.3272, |
|
"rewards/accuracies": 0.6274999380111694, |
|
"rewards/chosen": 0.005152740981429815, |
|
"rewards/margins": 0.0025280567351728678, |
|
"rewards/rejected": 0.002624684479087591, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 227.0, |
|
"learning_rate": 4.999915012051437e-06, |
|
"logits/chosen": -2.3975679874420166, |
|
"logits/rejected": -2.3674397468566895, |
|
"logps/chosen": -261.0414733886719, |
|
"logps/rejected": -249.3390350341797, |
|
"loss": 0.3621, |
|
"rewards/accuracies": 0.5925000309944153, |
|
"rewards/chosen": 0.004668754059821367, |
|
"rewards/margins": 0.0027659868355840445, |
|
"rewards/rejected": 0.0019027665257453918, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 226.0, |
|
"learning_rate": 4.9984042759305375e-06, |
|
"logits/chosen": -2.4002552032470703, |
|
"logits/rejected": -2.34761381149292, |
|
"logps/chosen": -273.1309509277344, |
|
"logps/rejected": -247.2257843017578, |
|
"loss": 0.387, |
|
"rewards/accuracies": 0.5449999570846558, |
|
"rewards/chosen": 0.0018628574907779694, |
|
"rewards/margins": 0.0016136768972501159, |
|
"rewards/rejected": 0.00024918062263168395, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 196.0, |
|
"learning_rate": 4.9950062323425556e-06, |
|
"logits/chosen": -2.3986167907714844, |
|
"logits/rejected": -2.358851194381714, |
|
"logps/chosen": -271.7406921386719, |
|
"logps/rejected": -249.7233428955078, |
|
"loss": 0.3955, |
|
"rewards/accuracies": 0.6075000166893005, |
|
"rewards/chosen": 0.00026552577037364244, |
|
"rewards/margins": 0.0028056656010448933, |
|
"rewards/rejected": -0.002540139714255929, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 190.0, |
|
"learning_rate": 4.989723448187132e-06, |
|
"logits/chosen": -2.3923754692077637, |
|
"logits/rejected": -2.3684566020965576, |
|
"logps/chosen": -285.33184814453125, |
|
"logps/rejected": -282.5887756347656, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.5924999713897705, |
|
"rewards/chosen": 0.0046120877377688885, |
|
"rewards/margins": 0.0034965365193784237, |
|
"rewards/rejected": 0.0011155509855598211, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 222.0, |
|
"learning_rate": 4.982559914106645e-06, |
|
"logits/chosen": -2.416792392730713, |
|
"logits/rejected": -2.3636841773986816, |
|
"logps/chosen": -297.1885681152344, |
|
"logps/rejected": -281.99481201171875, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.004418404307216406, |
|
"rewards/margins": 0.0038651120848953724, |
|
"rewards/rejected": 0.0005532926879823208, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 189.0, |
|
"learning_rate": 4.973521041471662e-06, |
|
"logits/chosen": -2.443068504333496, |
|
"logits/rejected": -2.4043824672698975, |
|
"logps/chosen": -284.85546875, |
|
"logps/rejected": -246.3272705078125, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.5925000309944153, |
|
"rewards/chosen": 0.003813292132690549, |
|
"rewards/margins": 0.0034597956109791994, |
|
"rewards/rejected": 0.00035349628888070583, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 186.0, |
|
"learning_rate": 4.962613658293158e-06, |
|
"logits/chosen": -2.364473581314087, |
|
"logits/rejected": -2.336862802505493, |
|
"logps/chosen": -260.23297119140625, |
|
"logps/rejected": -244.9732666015625, |
|
"loss": 0.4326, |
|
"rewards/accuracies": 0.6074999570846558, |
|
"rewards/chosen": 0.0008181848679669201, |
|
"rewards/margins": 0.003228846937417984, |
|
"rewards/rejected": -0.0024106616619974375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 196.0, |
|
"learning_rate": 4.949846004064605e-06, |
|
"logits/chosen": -2.414769411087036, |
|
"logits/rejected": -2.394395351409912, |
|
"logps/chosen": -281.43670654296875, |
|
"logps/rejected": -265.92974853515625, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.5974999666213989, |
|
"rewards/chosen": 0.00015548830560874194, |
|
"rewards/margins": 0.003786542685702443, |
|
"rewards/rejected": -0.003631054190918803, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -2.4014458656311035, |
|
"eval_logits/rejected": -2.362912893295288, |
|
"eval_logps/chosen": -267.5639953613281, |
|
"eval_logps/rejected": -242.58323669433594, |
|
"eval_loss": 0.5010271072387695, |
|
"eval_rewards/accuracies": 0.574404776096344, |
|
"eval_rewards/chosen": 0.0011464261915534735, |
|
"eval_rewards/margins": 0.003029454033821821, |
|
"eval_rewards/rejected": -0.001883027609437704, |
|
"eval_runtime": 123.1297, |
|
"eval_samples_per_second": 16.243, |
|
"eval_steps_per_second": 0.341, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 254.0, |
|
"learning_rate": 4.935227723537811e-06, |
|
"logits/chosen": -2.406309127807617, |
|
"logits/rejected": -2.360525131225586, |
|
"logps/chosen": -296.66070556640625, |
|
"logps/rejected": -266.5440979003906, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.0017778485780581832, |
|
"rewards/margins": 0.004943528212606907, |
|
"rewards/rejected": -0.0031656797509640455, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 288.0, |
|
"learning_rate": 4.918769859437233e-06, |
|
"logits/chosen": -2.3826467990875244, |
|
"logits/rejected": -2.3224892616271973, |
|
"logps/chosen": -274.14825439453125, |
|
"logps/rejected": -252.51400756835938, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.6050000190734863, |
|
"rewards/chosen": 0.004731293302029371, |
|
"rewards/margins": 0.0043090214021503925, |
|
"rewards/rejected": 0.0004222726565785706, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 182.0, |
|
"learning_rate": 4.900484844118235e-06, |
|
"logits/chosen": -2.3914456367492676, |
|
"logits/rejected": -2.3371217250823975, |
|
"logps/chosen": -280.33282470703125, |
|
"logps/rejected": -240.6001739501953, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.6175000071525574, |
|
"rewards/chosen": 0.002294857520610094, |
|
"rewards/margins": 0.0033697611652314663, |
|
"rewards/rejected": -0.0010749038774520159, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 196.0, |
|
"learning_rate": 4.880386490175634e-06, |
|
"logits/chosen": -2.359574794769287, |
|
"logits/rejected": -2.327416181564331, |
|
"logps/chosen": -290.989013671875, |
|
"logps/rejected": -268.49395751953125, |
|
"loss": 0.5661, |
|
"rewards/accuracies": 0.5975000262260437, |
|
"rewards/chosen": -0.00016075666644610465, |
|
"rewards/margins": 0.003780897008255124, |
|
"rewards/rejected": -0.003941653296351433, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 232.0, |
|
"learning_rate": 4.8584899800095865e-06, |
|
"logits/chosen": -2.4217326641082764, |
|
"logits/rejected": -2.3503971099853516, |
|
"logps/chosen": -288.73858642578125, |
|
"logps/rejected": -258.19317626953125, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.00230691721662879, |
|
"rewards/margins": 0.004124250262975693, |
|
"rewards/rejected": -0.0018173331627622247, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 162.0, |
|
"learning_rate": 4.834811854356729e-06, |
|
"logits/chosen": -2.406905174255371, |
|
"logits/rejected": -2.3617682456970215, |
|
"logps/chosen": -266.1622619628906, |
|
"logps/rejected": -245.754150390625, |
|
"loss": 0.4313, |
|
"rewards/accuracies": 0.6574999690055847, |
|
"rewards/chosen": 0.006862832698971033, |
|
"rewards/margins": 0.010333456099033356, |
|
"rewards/rejected": -0.0034706243313848972, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 119.0, |
|
"learning_rate": 4.809369999795219e-06, |
|
"logits/chosen": -2.367124080657959, |
|
"logits/rejected": -2.352210760116577, |
|
"logps/chosen": -271.3493957519531, |
|
"logps/rejected": -274.4390563964844, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.7775000333786011, |
|
"rewards/chosen": 0.011581487953662872, |
|
"rewards/margins": 0.02090141549706459, |
|
"rewards/rejected": -0.009319926612079144, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 152.0, |
|
"learning_rate": 4.7821836352331235e-06, |
|
"logits/chosen": -2.4189422130584717, |
|
"logits/rejected": -2.362922430038452, |
|
"logps/chosen": -276.532470703125, |
|
"logps/rejected": -254.9628143310547, |
|
"loss": 0.3068, |
|
"rewards/accuracies": 0.7899999618530273, |
|
"rewards/chosen": 0.010409007780253887, |
|
"rewards/margins": 0.019158251583576202, |
|
"rewards/rejected": -0.008749241940677166, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 122.5, |
|
"learning_rate": 4.7532732973903525e-06, |
|
"logits/chosen": -2.392087936401367, |
|
"logits/rejected": -2.3331363201141357, |
|
"logps/chosen": -281.2344970703125, |
|
"logps/rejected": -266.2068176269531, |
|
"loss": 0.2544, |
|
"rewards/accuracies": 0.7375000715255737, |
|
"rewards/chosen": 0.01263010036200285, |
|
"rewards/margins": 0.01806234009563923, |
|
"rewards/rejected": -0.0054322415962815285, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 177.0, |
|
"learning_rate": 4.722660825285122e-06, |
|
"logits/chosen": -2.413367509841919, |
|
"logits/rejected": -2.3771374225616455, |
|
"logps/chosen": -278.57904052734375, |
|
"logps/rejected": -270.5982360839844, |
|
"loss": 0.3676, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.014171945862472057, |
|
"rewards/margins": 0.020834611728787422, |
|
"rewards/rejected": -0.0066626654006540775, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_logits/chosen": -2.416761636734009, |
|
"eval_logits/rejected": -2.3788468837738037, |
|
"eval_logps/chosen": -266.88555908203125, |
|
"eval_logps/rejected": -242.12106323242188, |
|
"eval_loss": 0.8293091654777527, |
|
"eval_rewards/accuracies": 0.5982142686843872, |
|
"eval_rewards/chosen": 0.00793052464723587, |
|
"eval_rewards/margins": 0.005191552918404341, |
|
"eval_rewards/rejected": 0.0027389726601541042, |
|
"eval_runtime": 123.1487, |
|
"eval_samples_per_second": 16.241, |
|
"eval_steps_per_second": 0.341, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 140.0, |
|
"learning_rate": 4.690369343736637e-06, |
|
"logits/chosen": -2.4030745029449463, |
|
"logits/rejected": -2.368807077407837, |
|
"logps/chosen": -278.1623229980469, |
|
"logps/rejected": -264.98541259765625, |
|
"loss": 0.3085, |
|
"rewards/accuracies": 0.7649999856948853, |
|
"rewards/chosen": 0.01418610941618681, |
|
"rewards/margins": 0.01857883669435978, |
|
"rewards/rejected": -0.004392724949866533, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 185.0, |
|
"learning_rate": 4.656423245896494e-06, |
|
"logits/chosen": -2.4111828804016113, |
|
"logits/rejected": -2.3645176887512207, |
|
"logps/chosen": -270.58477783203125, |
|
"logps/rejected": -256.71099853515625, |
|
"loss": 0.2759, |
|
"rewards/accuracies": 0.7575000524520874, |
|
"rewards/chosen": 0.008453365415334702, |
|
"rewards/margins": 0.01515539176762104, |
|
"rewards/rejected": -0.0067020258866250515, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 163.0, |
|
"learning_rate": 4.6208481748219645e-06, |
|
"logits/chosen": -2.392019271850586, |
|
"logits/rejected": -2.367377519607544, |
|
"logps/chosen": -271.10015869140625, |
|
"logps/rejected": -256.1971435546875, |
|
"loss": 0.2666, |
|
"rewards/accuracies": 0.7675000429153442, |
|
"rewards/chosen": 0.007386817596852779, |
|
"rewards/margins": 0.019648974761366844, |
|
"rewards/rejected": -0.01226215623319149, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 142.0, |
|
"learning_rate": 4.583671004105096e-06, |
|
"logits/chosen": -2.3817129135131836, |
|
"logits/rejected": -2.342694044113159, |
|
"logps/chosen": -275.9081115722656, |
|
"logps/rejected": -251.59848022460938, |
|
"loss": 0.2849, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0086748655885458, |
|
"rewards/margins": 0.01924619823694229, |
|
"rewards/rejected": -0.010571330785751343, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 113.0, |
|
"learning_rate": 4.544919817572262e-06, |
|
"logits/chosen": -2.3859992027282715, |
|
"logits/rejected": -2.325930118560791, |
|
"logps/chosen": -272.119873046875, |
|
"logps/rejected": -247.908935546875, |
|
"loss": 0.2871, |
|
"rewards/accuracies": 0.7649999856948853, |
|
"rewards/chosen": 0.012000922113656998, |
|
"rewards/margins": 0.019705070182681084, |
|
"rewards/rejected": -0.007704148534685373, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 154.0, |
|
"learning_rate": 4.504623888069497e-06, |
|
"logits/chosen": -2.397146701812744, |
|
"logits/rejected": -2.3492813110351562, |
|
"logps/chosen": -271.32171630859375, |
|
"logps/rejected": -247.8098907470703, |
|
"loss": 0.3405, |
|
"rewards/accuracies": 0.7600000500679016, |
|
"rewards/chosen": 0.013489668257534504, |
|
"rewards/margins": 0.017752837389707565, |
|
"rewards/rejected": -0.0042631677351891994, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 139.0, |
|
"learning_rate": 4.462813655349637e-06, |
|
"logits/chosen": -2.372323751449585, |
|
"logits/rejected": -2.3170628547668457, |
|
"logps/chosen": -268.582275390625, |
|
"logps/rejected": -245.34097290039062, |
|
"loss": 0.3015, |
|
"rewards/accuracies": 0.7275000214576721, |
|
"rewards/chosen": 0.013011058792471886, |
|
"rewards/margins": 0.018547596409916878, |
|
"rewards/rejected": -0.005536535754799843, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 195.0, |
|
"learning_rate": 4.419520703077975e-06, |
|
"logits/chosen": -2.3980116844177246, |
|
"logits/rejected": -2.3060975074768066, |
|
"logps/chosen": -284.5008544921875, |
|
"logps/rejected": -232.28515625, |
|
"loss": 0.2953, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": 0.009602969512343407, |
|
"rewards/margins": 0.01879434660077095, |
|
"rewards/rejected": -0.00919137429445982, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 184.0, |
|
"learning_rate": 4.3747777349737905e-06, |
|
"logits/chosen": -2.394030809402466, |
|
"logits/rejected": -2.3477187156677246, |
|
"logps/chosen": -299.9769592285156, |
|
"logps/rejected": -268.77301025390625, |
|
"loss": 0.318, |
|
"rewards/accuracies": 0.7575000524520874, |
|
"rewards/chosen": 0.011008193716406822, |
|
"rewards/margins": 0.020197119563817978, |
|
"rewards/rejected": -0.009188923053443432, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 153.0, |
|
"learning_rate": 4.328618550105802e-06, |
|
"logits/chosen": -2.3696258068084717, |
|
"logits/rejected": -2.341409206390381, |
|
"logps/chosen": -271.8193664550781, |
|
"logps/rejected": -264.6459045410156, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.7400000691413879, |
|
"rewards/chosen": 0.012812617234885693, |
|
"rewards/margins": 0.018871381878852844, |
|
"rewards/rejected": -0.006058765109628439, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_logits/chosen": -2.4146392345428467, |
|
"eval_logits/rejected": -2.377370595932007, |
|
"eval_logps/chosen": -267.025634765625, |
|
"eval_logps/rejected": -242.3221435546875, |
|
"eval_loss": 0.8238700032234192, |
|
"eval_rewards/accuracies": 0.6398809552192688, |
|
"eval_rewards/chosen": 0.006530104670673609, |
|
"eval_rewards/margins": 0.005802116356790066, |
|
"eval_rewards/rejected": 0.0007279877318069339, |
|
"eval_runtime": 123.0966, |
|
"eval_samples_per_second": 16.247, |
|
"eval_steps_per_second": 0.341, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 91.0, |
|
"learning_rate": 4.2810780173601675e-06, |
|
"logits/chosen": -2.3998053073883057, |
|
"logits/rejected": -2.341407060623169, |
|
"logps/chosen": -285.62054443359375, |
|
"logps/rejected": -247.3552703857422, |
|
"loss": 0.3234, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.008955768309533596, |
|
"rewards/margins": 0.020532304421067238, |
|
"rewards/rejected": -0.011576534248888493, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 174.0, |
|
"learning_rate": 4.232192049100351e-06, |
|
"logits/chosen": -2.411689043045044, |
|
"logits/rejected": -2.384003162384033, |
|
"logps/chosen": -242.8949737548828, |
|
"logps/rejected": -236.754150390625, |
|
"loss": 0.292, |
|
"rewards/accuracies": 0.747499942779541, |
|
"rewards/chosen": 0.005577466916292906, |
|
"rewards/margins": 0.016019560396671295, |
|
"rewards/rejected": -0.010442093946039677, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 110.0, |
|
"learning_rate": 4.1819975740387406e-06, |
|
"logits/chosen": -2.4044318199157715, |
|
"logits/rejected": -2.3681979179382324, |
|
"logps/chosen": -276.5671691894531, |
|
"logps/rejected": -259.7832336425781, |
|
"loss": 0.3016, |
|
"rewards/accuracies": 0.7475000619888306, |
|
"rewards/chosen": 0.011015561409294605, |
|
"rewards/margins": 0.02647540345788002, |
|
"rewards/rejected": -0.01545984111726284, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 158.0, |
|
"learning_rate": 4.1305325093405045e-06, |
|
"logits/chosen": -2.4186065196990967, |
|
"logits/rejected": -2.406249523162842, |
|
"logps/chosen": -295.4107971191406, |
|
"logps/rejected": -280.9339294433594, |
|
"loss": 0.4026, |
|
"rewards/accuracies": 0.7675000429153442, |
|
"rewards/chosen": 0.010993788950145245, |
|
"rewards/margins": 0.020805999636650085, |
|
"rewards/rejected": -0.00981221068650484, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 175.0, |
|
"learning_rate": 4.077835731980775e-06, |
|
"logits/chosen": -2.416654348373413, |
|
"logits/rejected": -2.368619203567505, |
|
"logps/chosen": -279.9720764160156, |
|
"logps/rejected": -245.94345092773438, |
|
"loss": 0.3414, |
|
"rewards/accuracies": 0.7575000524520874, |
|
"rewards/chosen": 0.006698101758956909, |
|
"rewards/margins": 0.016826082020998, |
|
"rewards/rejected": -0.010127981193363667, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 175.0, |
|
"learning_rate": 4.02394704937677e-06, |
|
"logits/chosen": -2.3919434547424316, |
|
"logits/rejected": -2.3505940437316895, |
|
"logps/chosen": -280.6643981933594, |
|
"logps/rejected": -252.3192901611328, |
|
"loss": 0.3603, |
|
"rewards/accuracies": 0.7575000524520874, |
|
"rewards/chosen": 0.00701780105009675, |
|
"rewards/margins": 0.01795104146003723, |
|
"rewards/rejected": -0.010933240875601768, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 176.0, |
|
"learning_rate": 3.96890716931708e-06, |
|
"logits/chosen": -2.381404399871826, |
|
"logits/rejected": -2.369319438934326, |
|
"logps/chosen": -251.976806640625, |
|
"logps/rejected": -239.9644775390625, |
|
"loss": 0.3975, |
|
"rewards/accuracies": 0.7425000071525574, |
|
"rewards/chosen": 0.00673043355345726, |
|
"rewards/margins": 0.01468564011156559, |
|
"rewards/rejected": -0.00795520469546318, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 129.0, |
|
"learning_rate": 3.912757669210783e-06, |
|
"logits/chosen": -2.4172403812408447, |
|
"logits/rejected": -2.354468584060669, |
|
"logps/chosen": -258.93780517578125, |
|
"logps/rejected": -234.1327362060547, |
|
"loss": 0.354, |
|
"rewards/accuracies": 0.7425000071525574, |
|
"rewards/chosen": 0.015040628612041473, |
|
"rewards/margins": 0.020659491419792175, |
|
"rewards/rejected": -0.005618864204734564, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 127.5, |
|
"learning_rate": 3.855540964679658e-06, |
|
"logits/chosen": -2.3677306175231934, |
|
"logits/rejected": -2.323366641998291, |
|
"logps/chosen": -239.04776000976562, |
|
"logps/rejected": -228.19580078125, |
|
"loss": 0.2687, |
|
"rewards/accuracies": 0.7850000262260437, |
|
"rewards/chosen": 0.0076850662007927895, |
|
"rewards/margins": 0.01919684186577797, |
|
"rewards/rejected": -0.01151177566498518, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 100.5, |
|
"learning_rate": 3.797300277517212e-06, |
|
"logits/chosen": -2.412917137145996, |
|
"logits/rejected": -2.38498592376709, |
|
"logps/chosen": -285.268310546875, |
|
"logps/rejected": -264.0386962890625, |
|
"loss": 0.292, |
|
"rewards/accuracies": 0.7850000262260437, |
|
"rewards/chosen": 0.010482062585651875, |
|
"rewards/margins": 0.020365219563245773, |
|
"rewards/rejected": -0.009883158840239048, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": -2.4342868328094482, |
|
"eval_logits/rejected": -2.3977575302124023, |
|
"eval_logps/chosen": -267.17938232421875, |
|
"eval_logps/rejected": -242.4461669921875, |
|
"eval_loss": 0.8145859837532043, |
|
"eval_rewards/accuracies": 0.6398809552192688, |
|
"eval_rewards/chosen": 0.004992412868887186, |
|
"eval_rewards/margins": 0.005504657980054617, |
|
"eval_rewards/rejected": -0.0005122453439980745, |
|
"eval_runtime": 123.1687, |
|
"eval_samples_per_second": 16.238, |
|
"eval_steps_per_second": 0.341, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 150.0, |
|
"learning_rate": 3.7380796030387035e-06, |
|
"logits/chosen": -2.4117255210876465, |
|
"logits/rejected": -2.3580873012542725, |
|
"logps/chosen": -288.262451171875, |
|
"logps/rejected": -250.64486694335938, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 0.7850000262260437, |
|
"rewards/chosen": 0.012420935556292534, |
|
"rewards/margins": 0.022166112437844276, |
|
"rewards/rejected": -0.009745175018906593, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 72.0, |
|
"learning_rate": 3.6779236768468647e-06, |
|
"logits/chosen": -2.416080951690674, |
|
"logits/rejected": -2.3825385570526123, |
|
"logps/chosen": -266.83453369140625, |
|
"logps/rejected": -257.94635009765625, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.017391610890626907, |
|
"rewards/margins": 0.030041953548789024, |
|
"rewards/rejected": -0.012650340795516968, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 40.75, |
|
"learning_rate": 3.6168779410383905e-06, |
|
"logits/chosen": -2.4022631645202637, |
|
"logits/rejected": -2.366995334625244, |
|
"logps/chosen": -274.7615661621094, |
|
"logps/rejected": -253.46835327148438, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.014151136390864849, |
|
"rewards/margins": 0.029283767566084862, |
|
"rewards/rejected": -0.015132628381252289, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 109.0, |
|
"learning_rate": 3.554988509876747e-06, |
|
"logits/chosen": -2.411635637283325, |
|
"logits/rejected": -2.379657030105591, |
|
"logps/chosen": -264.20758056640625, |
|
"logps/rejected": -248.7351837158203, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.8475000262260437, |
|
"rewards/chosen": 0.01760762929916382, |
|
"rewards/margins": 0.03199198096990585, |
|
"rewards/rejected": -0.014384354464709759, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 162.0, |
|
"learning_rate": 3.4923021349572183e-06, |
|
"logits/chosen": -2.4204351902008057, |
|
"logits/rejected": -2.342064619064331, |
|
"logps/chosen": -293.338623046875, |
|
"logps/rejected": -249.83984375, |
|
"loss": 0.1199, |
|
"rewards/accuracies": 0.8449999690055847, |
|
"rewards/chosen": 0.016306212171912193, |
|
"rewards/margins": 0.03437874838709831, |
|
"rewards/rejected": -0.01807253621518612, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 156.0, |
|
"learning_rate": 3.428866169890511e-06, |
|
"logits/chosen": -2.4187042713165283, |
|
"logits/rejected": -2.3788833618164062, |
|
"logps/chosen": -280.169921875, |
|
"logps/rejected": -266.49139404296875, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.8550000190734863, |
|
"rewards/chosen": 0.020975306630134583, |
|
"rewards/margins": 0.0341680608689785, |
|
"rewards/rejected": -0.013192756101489067, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 52.5, |
|
"learning_rate": 3.3647285345315933e-06, |
|
"logits/chosen": -2.426948308944702, |
|
"logits/rejected": -2.3513236045837402, |
|
"logps/chosen": -301.64703369140625, |
|
"logps/rejected": -252.04910278320312, |
|
"loss": 0.1179, |
|
"rewards/accuracies": 0.8324999809265137, |
|
"rewards/chosen": 0.02232900820672512, |
|
"rewards/margins": 0.03726055473089218, |
|
"rewards/rejected": -0.01493154652416706, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 63.0, |
|
"learning_rate": 3.299937678780786e-06, |
|
"logits/chosen": -2.3919901847839355, |
|
"logits/rejected": -2.376873016357422, |
|
"logps/chosen": -270.5113830566406, |
|
"logps/rejected": -262.92120361328125, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 0.8525000810623169, |
|
"rewards/chosen": 0.01391147542744875, |
|
"rewards/margins": 0.03057609498500824, |
|
"rewards/rejected": -0.016664620488882065, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 38.25, |
|
"learning_rate": 3.234542545984464e-06, |
|
"logits/chosen": -2.3860366344451904, |
|
"logits/rejected": -2.3532588481903076, |
|
"logps/chosen": -279.0345764160156, |
|
"logps/rejected": -268.3638000488281, |
|
"loss": 0.113, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.013167209923267365, |
|
"rewards/margins": 0.03231758996844292, |
|
"rewards/rejected": -0.0191503819078207, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 95.5, |
|
"learning_rate": 3.1685925359629928e-06, |
|
"logits/chosen": -2.382845401763916, |
|
"logits/rejected": -2.345613479614258, |
|
"logps/chosen": -270.888427734375, |
|
"logps/rejected": -262.33251953125, |
|
"loss": 0.1355, |
|
"rewards/accuracies": 0.8575000762939453, |
|
"rewards/chosen": 0.01834903098642826, |
|
"rewards/margins": 0.034947365522384644, |
|
"rewards/rejected": -0.016598336398601532, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_logits/chosen": -2.4177558422088623, |
|
"eval_logits/rejected": -2.3796002864837646, |
|
"eval_logps/chosen": -267.20611572265625, |
|
"eval_logps/rejected": -242.52117919921875, |
|
"eval_loss": 0.9650812745094299, |
|
"eval_rewards/accuracies": 0.6160714030265808, |
|
"eval_rewards/chosen": 0.004725400358438492, |
|
"eval_rewards/margins": 0.00598777923732996, |
|
"eval_rewards/rejected": -0.0012623785296455026, |
|
"eval_runtime": 123.1103, |
|
"eval_samples_per_second": 16.246, |
|
"eval_steps_per_second": 0.341, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 79.5, |
|
"learning_rate": 3.102137467693858e-06, |
|
"logits/chosen": -2.3922505378723145, |
|
"logits/rejected": -2.3382246494293213, |
|
"logps/chosen": -273.4150390625, |
|
"logps/rejected": -258.9840393066406, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.8725000619888306, |
|
"rewards/chosen": 0.01791740581393242, |
|
"rewards/margins": 0.032240770757198334, |
|
"rewards/rejected": -0.014323368668556213, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 120.0, |
|
"learning_rate": 3.0352275416781465e-06, |
|
"logits/chosen": -2.416335344314575, |
|
"logits/rejected": -2.379333019256592, |
|
"logps/chosen": -273.5201110839844, |
|
"logps/rejected": -258.6203918457031, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.8400000333786011, |
|
"rewards/chosen": 0.02479313686490059, |
|
"rewards/margins": 0.0343189537525177, |
|
"rewards/rejected": -0.00952581875026226, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 106.0, |
|
"learning_rate": 2.96791330201883e-06, |
|
"logits/chosen": -2.421025514602661, |
|
"logits/rejected": -2.3913843631744385, |
|
"logps/chosen": -266.0569763183594, |
|
"logps/rejected": -255.9671173095703, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.8274999856948853, |
|
"rewards/chosen": 0.021568376570940018, |
|
"rewards/margins": 0.03504693880677223, |
|
"rewards/rejected": -0.013478565029799938, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 94.5, |
|
"learning_rate": 2.9002455982394946e-06, |
|
"logits/chosen": -2.3834731578826904, |
|
"logits/rejected": -2.3404629230499268, |
|
"logps/chosen": -279.171630859375, |
|
"logps/rejected": -251.27841186523438, |
|
"loss": 0.1115, |
|
"rewards/accuracies": 0.8674999475479126, |
|
"rewards/chosen": 0.020785773172974586, |
|
"rewards/margins": 0.03259057179093361, |
|
"rewards/rejected": -0.011804800480604172, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 61.0, |
|
"learning_rate": 2.832275546872339e-06, |
|
"logits/chosen": -2.401367664337158, |
|
"logits/rejected": -2.3691532611846924, |
|
"logps/chosen": -261.18377685546875, |
|
"logps/rejected": -267.6328125, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 0.8850000500679016, |
|
"rewards/chosen": 0.018077706918120384, |
|
"rewards/margins": 0.03268102556467056, |
|
"rewards/rejected": -0.014603319577872753, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 130.0, |
|
"learning_rate": 2.7640544928444927e-06, |
|
"logits/chosen": -2.418788194656372, |
|
"logits/rejected": -2.3343942165374756, |
|
"logps/chosen": -288.7831726074219, |
|
"logps/rejected": -252.0687713623047, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.8675000071525574, |
|
"rewards/chosen": 0.020471712574362755, |
|
"rewards/margins": 0.03600749000906944, |
|
"rewards/rejected": -0.015535781159996986, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 86.0, |
|
"learning_rate": 2.695633970691786e-06, |
|
"logits/chosen": -2.3701846599578857, |
|
"logits/rejected": -2.351933240890503, |
|
"logps/chosen": -257.39752197265625, |
|
"logps/rejected": -252.69287109375, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.8550000190734863, |
|
"rewards/chosen": 0.019604947417974472, |
|
"rewards/margins": 0.031910307705402374, |
|
"rewards/rejected": -0.012305359356105328, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 101.5, |
|
"learning_rate": 2.6270656656293007e-06, |
|
"logits/chosen": -2.394273281097412, |
|
"logits/rejected": -2.348475694656372, |
|
"logps/chosen": -264.9925537109375, |
|
"logps/rejected": -248.74368286132812, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.8574999570846558, |
|
"rewards/chosen": 0.020610950887203217, |
|
"rewards/margins": 0.03306712210178375, |
|
"rewards/rejected": -0.012456170283257961, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 63.25, |
|
"learning_rate": 2.558401374508089e-06, |
|
"logits/chosen": -2.402439594268799, |
|
"logits/rejected": -2.3409905433654785, |
|
"logps/chosen": -276.2132873535156, |
|
"logps/rejected": -251.0520782470703, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.8650000691413879, |
|
"rewards/chosen": 0.02115057222545147, |
|
"rewards/margins": 0.030901487916707993, |
|
"rewards/rejected": -0.009750919416546822, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 57.75, |
|
"learning_rate": 2.4896929666875665e-06, |
|
"logits/chosen": -2.4019179344177246, |
|
"logits/rejected": -2.3663971424102783, |
|
"logps/chosen": -274.6024475097656, |
|
"logps/rejected": -264.2455139160156, |
|
"loss": 0.1327, |
|
"rewards/accuracies": 0.8574999570846558, |
|
"rewards/chosen": 0.016096513718366623, |
|
"rewards/margins": 0.03081604465842247, |
|
"rewards/rejected": -0.014719529077410698, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_logits/chosen": -2.4065868854522705, |
|
"eval_logits/rejected": -2.369014263153076, |
|
"eval_logps/chosen": -267.2229919433594, |
|
"eval_logps/rejected": -242.58834838867188, |
|
"eval_loss": 0.9984952211380005, |
|
"eval_rewards/accuracies": 0.6339285969734192, |
|
"eval_rewards/chosen": 0.004556288011372089, |
|
"eval_rewards/margins": 0.006490407045930624, |
|
"eval_rewards/rejected": -0.001934119500219822, |
|
"eval_runtime": 122.9942, |
|
"eval_samples_per_second": 16.261, |
|
"eval_steps_per_second": 0.341, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 104.0, |
|
"learning_rate": 2.420992344853132e-06, |
|
"logits/chosen": -2.4031834602355957, |
|
"logits/rejected": -2.380056142807007, |
|
"logps/chosen": -276.49700927734375, |
|
"logps/rejected": -262.06341552734375, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 0.8399999737739563, |
|
"rewards/chosen": 0.019898083060979843, |
|
"rewards/margins": 0.033728718757629395, |
|
"rewards/rejected": -0.013830636627972126, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 111.0, |
|
"learning_rate": 2.3523514058086093e-06, |
|
"logits/chosen": -2.410182237625122, |
|
"logits/rejected": -2.326798915863037, |
|
"logps/chosen": -288.55609130859375, |
|
"logps/rejected": -250.171630859375, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.8675000071525574, |
|
"rewards/chosen": 0.01868962123990059, |
|
"rewards/margins": 0.02993825078010559, |
|
"rewards/rejected": -0.011248626746237278, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 63.5, |
|
"learning_rate": 2.2838220012731365e-06, |
|
"logits/chosen": -2.3818917274475098, |
|
"logits/rejected": -2.3685965538024902, |
|
"logps/chosen": -270.9535217285156, |
|
"logps/rejected": -267.3423767089844, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.8825000524520874, |
|
"rewards/chosen": 0.023929597809910774, |
|
"rewards/margins": 0.041905276477336884, |
|
"rewards/rejected": -0.01797567494213581, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 136.0, |
|
"learning_rate": 2.2154558987121054e-06, |
|
"logits/chosen": -2.3983840942382812, |
|
"logits/rejected": -2.3515264987945557, |
|
"logps/chosen": -274.8974609375, |
|
"logps/rejected": -253.7775115966797, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.025471847504377365, |
|
"rewards/margins": 0.03200577199459076, |
|
"rewards/rejected": -0.006533923093229532, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 208.0, |
|
"learning_rate": 2.147304742231758e-06, |
|
"logits/chosen": -2.3778913021087646, |
|
"logits/rejected": -2.3485236167907715, |
|
"logps/chosen": -254.48635864257812, |
|
"logps/rejected": -267.0797119140625, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.8550001382827759, |
|
"rewards/chosen": 0.018020575866103172, |
|
"rewards/margins": 0.030409198254346848, |
|
"rewards/rejected": -0.012388622388243675, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 62.5, |
|
"learning_rate": 2.0794200135669586e-06, |
|
"logits/chosen": -2.399770498275757, |
|
"logits/rejected": -2.364065647125244, |
|
"logps/chosen": -277.4521484375, |
|
"logps/rejected": -267.6507568359375, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.8574999570846558, |
|
"rewards/chosen": 0.021130980923771858, |
|
"rewards/margins": 0.036186493933200836, |
|
"rewards/rejected": -0.015055513009428978, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 43.25, |
|
"learning_rate": 2.011852993191625e-06, |
|
"logits/chosen": -2.3711681365966797, |
|
"logits/rejected": -2.331266403198242, |
|
"logps/chosen": -284.1138916015625, |
|
"logps/rejected": -270.9895324707031, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": 0.02099769189953804, |
|
"rewards/margins": 0.03560823202133179, |
|
"rewards/rejected": -0.014610541984438896, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 91.5, |
|
"learning_rate": 1.944654721581196e-06, |
|
"logits/chosen": -2.3276844024658203, |
|
"logits/rejected": -2.3009562492370605, |
|
"logps/chosen": -260.98626708984375, |
|
"logps/rejected": -247.24685668945312, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.9300001263618469, |
|
"rewards/chosen": 0.024031776934862137, |
|
"rewards/margins": 0.04021488502621651, |
|
"rewards/rejected": -0.01618310809135437, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 29.5, |
|
"learning_rate": 1.877875960656394e-06, |
|
"logits/chosen": -2.3537003993988037, |
|
"logits/rejected": -2.3234972953796387, |
|
"logps/chosen": -275.46453857421875, |
|
"logps/rejected": -260.1392517089844, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.021290091797709465, |
|
"rewards/margins": 0.032937195152044296, |
|
"rewards/rejected": -0.011647104285657406, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 42.5, |
|
"learning_rate": 1.8115671554374067e-06, |
|
"logits/chosen": -2.399651050567627, |
|
"logits/rejected": -2.3859167098999023, |
|
"logps/chosen": -268.92803955078125, |
|
"logps/rejected": -275.9405822753906, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.025255614891648293, |
|
"rewards/margins": 0.04190623760223389, |
|
"rewards/rejected": -0.016650624573230743, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_logits/chosen": -2.3946948051452637, |
|
"eval_logits/rejected": -2.3562896251678467, |
|
"eval_logps/chosen": -266.8748474121094, |
|
"eval_logps/rejected": -242.36962890625, |
|
"eval_loss": 0.8932417035102844, |
|
"eval_rewards/accuracies": 0.6517857313156128, |
|
"eval_rewards/chosen": 0.008037895895540714, |
|
"eval_rewards/margins": 0.007784782908856869, |
|
"eval_rewards/rejected": 0.00025311243371106684, |
|
"eval_runtime": 123.1142, |
|
"eval_samples_per_second": 16.245, |
|
"eval_steps_per_second": 0.341, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 19.25, |
|
"learning_rate": 1.7457783959374585e-06, |
|
"logits/chosen": -2.404486894607544, |
|
"logits/rejected": -2.3604061603546143, |
|
"logps/chosen": -278.95855712890625, |
|
"logps/rejected": -251.1371307373047, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.9275000691413879, |
|
"rewards/chosen": 0.025924455374479294, |
|
"rewards/margins": 0.037114791572093964, |
|
"rewards/rejected": -0.011190338991582394, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 32.25, |
|
"learning_rate": 1.680559379324558e-06, |
|
"logits/chosen": -2.390227794647217, |
|
"logits/rejected": -2.3385822772979736, |
|
"logps/chosen": -292.6279296875, |
|
"logps/rejected": -254.8878173828125, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.9099999666213989, |
|
"rewards/chosen": 0.02545427717268467, |
|
"rewards/margins": 0.03745580464601517, |
|
"rewards/rejected": -0.012001526542007923, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"grad_norm": 60.5, |
|
"learning_rate": 1.6159593723800013e-06, |
|
"logits/chosen": -2.4059481620788574, |
|
"logits/rejected": -2.3442025184631348, |
|
"logps/chosen": -264.06060791015625, |
|
"logps/rejected": -246.68521118164062, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.0275394506752491, |
|
"rewards/margins": 0.036764778196811676, |
|
"rewards/rejected": -0.00922533217817545, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 49.75, |
|
"learning_rate": 1.5520271742819883e-06, |
|
"logits/chosen": -2.389446973800659, |
|
"logits/rejected": -2.3524551391601562, |
|
"logps/chosen": -271.63641357421875, |
|
"logps/rejected": -255.3404998779297, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.877500057220459, |
|
"rewards/chosen": 0.02547125145792961, |
|
"rewards/margins": 0.03725877031683922, |
|
"rewards/rejected": -0.01178752165287733, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 24.625, |
|
"learning_rate": 1.4888110797424783e-06, |
|
"logits/chosen": -2.4469919204711914, |
|
"logits/rejected": -2.3791050910949707, |
|
"logps/chosen": -315.74066162109375, |
|
"logps/rejected": -277.9239501953125, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.9025000333786011, |
|
"rewards/chosen": 0.03072303533554077, |
|
"rewards/margins": 0.048735830932855606, |
|
"rewards/rejected": -0.018012793734669685, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 54.0, |
|
"learning_rate": 1.4263588425251052e-06, |
|
"logits/chosen": -2.4028658866882324, |
|
"logits/rejected": -2.3509697914123535, |
|
"logps/chosen": -289.78485107421875, |
|
"logps/rejected": -251.5, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 0.9375001192092896, |
|
"rewards/chosen": 0.028129303827881813, |
|
"rewards/margins": 0.04094386473298073, |
|
"rewards/rejected": -0.01281456183642149, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 25.625, |
|
"learning_rate": 1.3647176393717509e-06, |
|
"logits/chosen": -2.4022791385650635, |
|
"logits/rejected": -2.3641726970672607, |
|
"logps/chosen": -278.85333251953125, |
|
"logps/rejected": -270.0050964355469, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.025644132867455482, |
|
"rewards/margins": 0.03728828951716423, |
|
"rewards/rejected": -0.011644158512353897, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 30.875, |
|
"learning_rate": 1.303934034364983e-06, |
|
"logits/chosen": -2.3777599334716797, |
|
"logits/rejected": -2.3236684799194336, |
|
"logps/chosen": -261.53216552734375, |
|
"logps/rejected": -239.6273651123047, |
|
"loss": 0.0216, |
|
"rewards/accuracies": 0.9025000333786011, |
|
"rewards/chosen": 0.024678941816091537, |
|
"rewards/margins": 0.03661385923624039, |
|
"rewards/rejected": -0.011934916488826275, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 87.0, |
|
"learning_rate": 1.2440539437533075e-06, |
|
"logits/chosen": -2.352806806564331, |
|
"logits/rejected": -2.3354265689849854, |
|
"logps/chosen": -269.5535583496094, |
|
"logps/rejected": -268.3061218261719, |
|
"loss": 0.0273, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": 0.022415757179260254, |
|
"rewards/margins": 0.036310791969299316, |
|
"rewards/rejected": -0.013895031996071339, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 33.25, |
|
"learning_rate": 1.1851226012658015e-06, |
|
"logits/chosen": -2.366988182067871, |
|
"logits/rejected": -2.323378562927246, |
|
"logps/chosen": -264.4871520996094, |
|
"logps/rejected": -251.3701934814453, |
|
"loss": 0.029, |
|
"rewards/accuracies": 0.9175000190734863, |
|
"rewards/chosen": 0.023964881896972656, |
|
"rewards/margins": 0.039505355060100555, |
|
"rewards/rejected": -0.015540470369160175, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_logits/chosen": -2.411829948425293, |
|
"eval_logits/rejected": -2.3752424716949463, |
|
"eval_logps/chosen": -266.7797546386719, |
|
"eval_logps/rejected": -242.3114013671875, |
|
"eval_loss": 0.9391952157020569, |
|
"eval_rewards/accuracies": 0.6577380895614624, |
|
"eval_rewards/chosen": 0.008988723158836365, |
|
"eval_rewards/margins": 0.008153370581567287, |
|
"eval_rewards/rejected": 0.0008353526936843991, |
|
"eval_runtime": 123.1579, |
|
"eval_samples_per_second": 16.239, |
|
"eval_steps_per_second": 0.341, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 16.5, |
|
"learning_rate": 1.1271845239423196e-06, |
|
"logits/chosen": -2.4022092819213867, |
|
"logits/rejected": -2.357339382171631, |
|
"logps/chosen": -289.2544250488281, |
|
"logps/rejected": -258.82379150390625, |
|
"loss": 0.0269, |
|
"rewards/accuracies": 0.9175000190734863, |
|
"rewards/chosen": 0.022589916363358498, |
|
"rewards/margins": 0.03589435666799545, |
|
"rewards/rejected": -0.013304440304636955, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 24.25, |
|
"learning_rate": 1.0702834785050893e-06, |
|
"logits/chosen": -2.3661084175109863, |
|
"logits/rejected": -2.3483455181121826, |
|
"logps/chosen": -276.6420593261719, |
|
"logps/rejected": -278.82080078125, |
|
"loss": 0.033, |
|
"rewards/accuracies": 0.9250000715255737, |
|
"rewards/chosen": 0.021236615255475044, |
|
"rewards/margins": 0.038433950394392014, |
|
"rewards/rejected": -0.017197338864207268, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 40.5, |
|
"learning_rate": 1.0144624482971082e-06, |
|
"logits/chosen": -2.4388625621795654, |
|
"logits/rejected": -2.380392551422119, |
|
"logps/chosen": -271.2420349121094, |
|
"logps/rejected": -256.2777404785156, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.9350000619888306, |
|
"rewards/chosen": 0.022215455770492554, |
|
"rewards/margins": 0.03798101097345352, |
|
"rewards/rejected": -0.01576555334031582, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 14.0, |
|
"learning_rate": 9.597636008123052e-07, |
|
"logits/chosen": -2.4123058319091797, |
|
"logits/rejected": -2.3628923892974854, |
|
"logps/chosen": -308.3174743652344, |
|
"logps/rejected": -278.7733459472656, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.942500114440918, |
|
"rewards/chosen": 0.024744439870119095, |
|
"rewards/margins": 0.038902923464775085, |
|
"rewards/rejected": -0.014158482663333416, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 20.375, |
|
"learning_rate": 9.06228255841991e-07, |
|
"logits/chosen": -2.382359504699707, |
|
"logits/rejected": -2.3492817878723145, |
|
"logps/chosen": -264.2523193359375, |
|
"logps/rejected": -256.5533752441406, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.022538714110851288, |
|
"rewards/margins": 0.03617415204644203, |
|
"rewards/rejected": -0.013635434210300446, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 36.25, |
|
"learning_rate": 8.538968542616846e-07, |
|
"logits/chosen": -2.41325044631958, |
|
"logits/rejected": -2.375253677368164, |
|
"logps/chosen": -281.48248291015625, |
|
"logps/rejected": -265.0972900390625, |
|
"loss": 0.0179, |
|
"rewards/accuracies": 0.9075000882148743, |
|
"rewards/chosen": 0.025317683815956116, |
|
"rewards/margins": 0.038605697453022, |
|
"rewards/rejected": -0.013288016431033611, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 29.625, |
|
"learning_rate": 8.028089274818624e-07, |
|
"logits/chosen": -2.4157140254974365, |
|
"logits/rejected": -2.377472162246704, |
|
"logps/chosen": -278.72564697265625, |
|
"logps/rejected": -257.7362365722656, |
|
"loss": 0.034, |
|
"rewards/accuracies": 0.9274999499320984, |
|
"rewards/chosen": 0.023796474561095238, |
|
"rewards/margins": 0.03968465328216553, |
|
"rewards/rejected": -0.01588817685842514, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 24.5, |
|
"learning_rate": 7.530030675857252e-07, |
|
"logits/chosen": -2.371452569961548, |
|
"logits/rejected": -2.33616304397583, |
|
"logps/chosen": -278.2137756347656, |
|
"logps/rejected": -254.2333984375, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 0.9350000619888306, |
|
"rewards/chosen": 0.028386935591697693, |
|
"rewards/margins": 0.044838014990091324, |
|
"rewards/rejected": -0.01645107939839363, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"grad_norm": 31.5, |
|
"learning_rate": 7.045168981765427e-07, |
|
"logits/chosen": -2.4061717987060547, |
|
"logits/rejected": -2.367501735687256, |
|
"logps/chosen": -277.0457763671875, |
|
"logps/rejected": -248.8456268310547, |
|
"loss": 0.0248, |
|
"rewards/accuracies": 0.9325000047683716, |
|
"rewards/chosen": 0.025927498936653137, |
|
"rewards/margins": 0.03844950348138809, |
|
"rewards/rejected": -0.012522002682089806, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 12.875, |
|
"learning_rate": 6.573870459565907e-07, |
|
"logits/chosen": -2.381437301635742, |
|
"logits/rejected": -2.3325188159942627, |
|
"logps/chosen": -293.3424377441406, |
|
"logps/rejected": -262.5635681152344, |
|
"loss": 0.0198, |
|
"rewards/accuracies": 0.9225000143051147, |
|
"rewards/chosen": 0.02644011378288269, |
|
"rewards/margins": 0.04168093949556351, |
|
"rewards/rejected": -0.015240825712680817, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_logits/chosen": -2.4145004749298096, |
|
"eval_logits/rejected": -2.3780155181884766, |
|
"eval_logps/chosen": -266.8046875, |
|
"eval_logps/rejected": -242.29171752929688, |
|
"eval_loss": 0.820038914680481, |
|
"eval_rewards/accuracies": 0.6577380895614624, |
|
"eval_rewards/chosen": 0.008739516139030457, |
|
"eval_rewards/margins": 0.007707077078521252, |
|
"eval_rewards/rejected": 0.0010324395261704922, |
|
"eval_runtime": 123.2067, |
|
"eval_samples_per_second": 16.233, |
|
"eval_steps_per_second": 0.341, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 33.75, |
|
"learning_rate": 6.116491130591478e-07, |
|
"logits/chosen": -2.410226821899414, |
|
"logits/rejected": -2.366502285003662, |
|
"logps/chosen": -279.39166259765625, |
|
"logps/rejected": -251.89364624023438, |
|
"loss": 0.0233, |
|
"rewards/accuracies": 0.9175001382827759, |
|
"rewards/chosen": 0.024083226919174194, |
|
"rewards/margins": 0.03586304560303688, |
|
"rewards/rejected": -0.011779818683862686, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 5.673376501544641e-07, |
|
"logits/chosen": -2.41102933883667, |
|
"logits/rejected": -2.36838960647583, |
|
"logps/chosen": -274.3736877441406, |
|
"logps/rejected": -243.4824981689453, |
|
"loss": 0.017, |
|
"rewards/accuracies": 0.9175000190734863, |
|
"rewards/chosen": 0.023644987493753433, |
|
"rewards/margins": 0.0368778295814991, |
|
"rewards/rejected": -0.013232842087745667, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 20.5, |
|
"learning_rate": 5.244861303500026e-07, |
|
"logits/chosen": -2.413541793823242, |
|
"logits/rejected": -2.362837314605713, |
|
"logps/chosen": -272.99359130859375, |
|
"logps/rejected": -239.32608032226562, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.9325000643730164, |
|
"rewards/chosen": 0.021824661642313004, |
|
"rewards/margins": 0.0353974774479866, |
|
"rewards/rejected": -0.013572819530963898, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 4.831269239046851e-07, |
|
"logits/chosen": -2.4089815616607666, |
|
"logits/rejected": -2.366555690765381, |
|
"logps/chosen": -266.49798583984375, |
|
"logps/rejected": -252.52145385742188, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.025229623541235924, |
|
"rewards/margins": 0.03910643607378006, |
|
"rewards/rejected": -0.013876812532544136, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 18.75, |
|
"learning_rate": 4.4329127377623127e-07, |
|
"logits/chosen": -2.380474328994751, |
|
"logits/rejected": -2.3557381629943848, |
|
"logps/chosen": -275.6029968261719, |
|
"logps/rejected": -260.39776611328125, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.9550000429153442, |
|
"rewards/chosen": 0.025818094611167908, |
|
"rewards/margins": 0.038945622742176056, |
|
"rewards/rejected": -0.013127523474395275, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 14.8125, |
|
"learning_rate": 4.050092720200638e-07, |
|
"logits/chosen": -2.384019374847412, |
|
"logits/rejected": -2.338085889816284, |
|
"logps/chosen": -280.4701232910156, |
|
"logps/rejected": -250.2019805908203, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 0.9575001001358032, |
|
"rewards/chosen": 0.028378132730722427, |
|
"rewards/margins": 0.042529620230197906, |
|
"rewards/rejected": -0.014151493087410927, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.683098370576196e-07, |
|
"logits/chosen": -2.406979560852051, |
|
"logits/rejected": -2.3714377880096436, |
|
"logps/chosen": -287.4407958984375, |
|
"logps/rejected": -259.043212890625, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 0.9449999928474426, |
|
"rewards/chosen": 0.024499880149960518, |
|
"rewards/margins": 0.03765181452035904, |
|
"rewards/rejected": -0.013151939027011395, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 3.3322069183122253e-07, |
|
"logits/chosen": -2.415555477142334, |
|
"logits/rejected": -2.3626112937927246, |
|
"logps/chosen": -270.87249755859375, |
|
"logps/rejected": -251.2595977783203, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.028504956513643265, |
|
"rewards/margins": 0.043677303940057755, |
|
"rewards/rejected": -0.015172350220382214, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 60.75, |
|
"learning_rate": 2.997683428620296e-07, |
|
"logits/chosen": -2.4119620323181152, |
|
"logits/rejected": -2.3438758850097656, |
|
"logps/chosen": -288.2101745605469, |
|
"logps/rejected": -261.5445251464844, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.9550000429153442, |
|
"rewards/chosen": 0.026679161936044693, |
|
"rewards/margins": 0.042851291596889496, |
|
"rewards/rejected": -0.016172129660844803, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 23.25, |
|
"learning_rate": 2.6797806022686835e-07, |
|
"logits/chosen": -2.3794476985931396, |
|
"logits/rejected": -2.350893974304199, |
|
"logps/chosen": -262.9915771484375, |
|
"logps/rejected": -261.4454650878906, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 0.9449998736381531, |
|
"rewards/chosen": 0.024703029543161392, |
|
"rewards/margins": 0.04311930388212204, |
|
"rewards/rejected": -0.018416276201605797, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_logits/chosen": -2.410806894302368, |
|
"eval_logits/rejected": -2.3743793964385986, |
|
"eval_logps/chosen": -266.8759765625, |
|
"eval_logps/rejected": -242.3739471435547, |
|
"eval_loss": 0.8903548717498779, |
|
"eval_rewards/accuracies": 0.6577380895614624, |
|
"eval_rewards/chosen": 0.008026321418583393, |
|
"eval_rewards/margins": 0.007816384546458721, |
|
"eval_rewards/rejected": 0.00020993576617911458, |
|
"eval_runtime": 123.126, |
|
"eval_samples_per_second": 16.244, |
|
"eval_steps_per_second": 0.341, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 33.5, |
|
"learning_rate": 2.378738584690926e-07, |
|
"logits/chosen": -2.3809187412261963, |
|
"logits/rejected": -2.345142126083374, |
|
"logps/chosen": -274.513916015625, |
|
"logps/rejected": -259.05767822265625, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 0.9474999308586121, |
|
"rewards/chosen": 0.024536920711398125, |
|
"rewards/margins": 0.0396781824529171, |
|
"rewards/rejected": -0.015141261741518974, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 2.0947847845787073e-07, |
|
"logits/chosen": -2.380807638168335, |
|
"logits/rejected": -2.3750548362731934, |
|
"logps/chosen": -273.5718688964844, |
|
"logps/rejected": -271.99774169921875, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 0.9699999690055847, |
|
"rewards/chosen": 0.025182534009218216, |
|
"rewards/margins": 0.04230727255344391, |
|
"rewards/rejected": -0.017124736681580544, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 12.75, |
|
"learning_rate": 1.828133702096152e-07, |
|
"logits/chosen": -2.4007954597473145, |
|
"logits/rejected": -2.3510959148406982, |
|
"logps/chosen": -297.80023193359375, |
|
"logps/rejected": -267.09271240234375, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 0.9350000619888306, |
|
"rewards/chosen": 0.027246862649917603, |
|
"rewards/margins": 0.04566134512424469, |
|
"rewards/rejected": -0.01841447874903679, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 1.5789867668453224e-07, |
|
"logits/chosen": -2.359222650527954, |
|
"logits/rejected": -2.316779613494873, |
|
"logps/chosen": -256.35321044921875, |
|
"logps/rejected": -244.2194366455078, |
|
"loss": 0.005, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.02366521954536438, |
|
"rewards/margins": 0.03908833488821983, |
|
"rewards/rejected": -0.015423113480210304, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"grad_norm": 10.625, |
|
"learning_rate": 1.3475321857052387e-07, |
|
"logits/chosen": -2.3972084522247314, |
|
"logits/rejected": -2.3675570487976074, |
|
"logps/chosen": -278.0527038574219, |
|
"logps/rejected": -256.3372497558594, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 0.9575001001358032, |
|
"rewards/chosen": 0.025035608559846878, |
|
"rewards/margins": 0.039394162595272064, |
|
"rewards/rejected": -0.014358552172780037, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 24.5, |
|
"learning_rate": 1.1339448006594284e-07, |
|
"logits/chosen": -2.3742661476135254, |
|
"logits/rejected": -2.3620691299438477, |
|
"logps/chosen": -269.88934326171875, |
|
"logps/rejected": -263.372314453125, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 0.9574999809265137, |
|
"rewards/chosen": 0.02747185155749321, |
|
"rewards/margins": 0.04529104381799698, |
|
"rewards/rejected": -0.01781919226050377, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 15.125, |
|
"learning_rate": 9.383859567194148e-08, |
|
"logits/chosen": -2.4092886447906494, |
|
"logits/rejected": -2.3820230960845947, |
|
"logps/chosen": -292.05755615234375, |
|
"logps/rejected": -274.00262451171875, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.02904806099832058, |
|
"rewards/margins": 0.04705999046564102, |
|
"rewards/rejected": -0.018011927604675293, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 6.125, |
|
"learning_rate": 7.610033800438343e-08, |
|
"logits/chosen": -2.404353141784668, |
|
"logits/rejected": -2.3535656929016113, |
|
"logps/chosen": -279.27001953125, |
|
"logps/rejected": -261.3420104980469, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.023024918511509895, |
|
"rewards/margins": 0.038382213562726974, |
|
"rewards/rejected": -0.015357298776507378, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 6.019310663453654e-08, |
|
"logits/chosen": -2.379361867904663, |
|
"logits/rejected": -2.348564624786377, |
|
"logps/chosen": -272.57965087890625, |
|
"logps/rejected": -280.6869201660156, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.9474999308586121, |
|
"rewards/chosen": 0.02704106830060482, |
|
"rewards/margins": 0.04761399328708649, |
|
"rewards/rejected": -0.02057291939854622, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 4.6128917966964394e-08, |
|
"logits/chosen": -2.3975164890289307, |
|
"logits/rejected": -2.354431390762329, |
|
"logps/chosen": -264.81683349609375, |
|
"logps/rejected": -240.6919403076172, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.023663988336920738, |
|
"rewards/margins": 0.037333834916353226, |
|
"rewards/rejected": -0.013669842854142189, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_logits/chosen": -2.4119250774383545, |
|
"eval_logits/rejected": -2.375300407409668, |
|
"eval_logps/chosen": -266.87713623046875, |
|
"eval_logps/rejected": -242.38916015625, |
|
"eval_loss": 0.8778771162033081, |
|
"eval_rewards/accuracies": 0.6517857313156128, |
|
"eval_rewards/chosen": 0.00801478698849678, |
|
"eval_rewards/margins": 0.007957086898386478, |
|
"eval_rewards/rejected": 5.7699922763276845e-05, |
|
"eval_runtime": 123.014, |
|
"eval_samples_per_second": 16.258, |
|
"eval_steps_per_second": 0.341, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 27.0, |
|
"learning_rate": 3.3918396162275214e-08, |
|
"logits/chosen": -2.429567337036133, |
|
"logits/rejected": -2.401862621307373, |
|
"logps/chosen": -265.3700866699219, |
|
"logps/rejected": -255.75082397460938, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.9475001096725464, |
|
"rewards/chosen": 0.02286478877067566, |
|
"rewards/margins": 0.037350136786699295, |
|
"rewards/rejected": -0.01448534894734621, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 29.625, |
|
"learning_rate": 2.3570765111574357e-08, |
|
"logits/chosen": -2.420926570892334, |
|
"logits/rejected": -2.3869175910949707, |
|
"logps/chosen": -275.02593994140625, |
|
"logps/rejected": -250.9207000732422, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 0.9325000643730164, |
|
"rewards/chosen": 0.025391753762960434, |
|
"rewards/margins": 0.04114841669797897, |
|
"rewards/rejected": -0.01575666293501854, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 1.5093841468690473e-08, |
|
"logits/chosen": -2.378108501434326, |
|
"logits/rejected": -2.3375566005706787, |
|
"logps/chosen": -278.16180419921875, |
|
"logps/rejected": -249.80557250976562, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9550000429153442, |
|
"rewards/chosen": 0.02722669579088688, |
|
"rewards/margins": 0.042906779795885086, |
|
"rewards/rejected": -0.015680085867643356, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 8.494028745434368e-09, |
|
"logits/chosen": -2.4071974754333496, |
|
"logits/rejected": -2.3638107776641846, |
|
"logps/chosen": -272.5426940917969, |
|
"logps/rejected": -254.20095825195312, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 0.9424999952316284, |
|
"rewards/chosen": 0.024697447195649147, |
|
"rewards/margins": 0.0481376014649868, |
|
"rewards/rejected": -0.023440156131982803, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"grad_norm": 13.625, |
|
"learning_rate": 3.776312474353394e-09, |
|
"logits/chosen": -2.382949113845825, |
|
"logits/rejected": -2.3320465087890625, |
|
"logps/chosen": -262.14154052734375, |
|
"logps/rejected": -247.5254669189453, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 0.9550000429153442, |
|
"rewards/chosen": 0.022636910900473595, |
|
"rewards/margins": 0.03510580584406853, |
|
"rewards/rejected": -0.012468894943594933, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 9.442564426342949e-10, |
|
"logits/chosen": -2.3739681243896484, |
|
"logits/rejected": -2.3734383583068848, |
|
"logps/chosen": -248.2006072998047, |
|
"logps/rejected": -255.94161987304688, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9275000691413879, |
|
"rewards/chosen": 0.025133823975920677, |
|
"rewards/margins": 0.04240426793694496, |
|
"rewards/rejected": -0.017270449548959732, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.393690586090088, |
|
"logits/rejected": -2.3561182022094727, |
|
"logps/chosen": -263.6891174316406, |
|
"logps/rejected": -243.8829345703125, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.024886978790163994, |
|
"rewards/margins": 0.0397338829934597, |
|
"rewards/rejected": -0.014846903271973133, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"step": 1270, |
|
"total_flos": 0.0, |
|
"train_loss": 0.164279118501603, |
|
"train_runtime": 43545.4617, |
|
"train_samples_per_second": 7.02, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1270, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|