|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 476, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01050420168067227, |
|
"grad_norm": 18.321366845625462, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.9222915172576904, |
|
"logits/rejected": -2.8865013122558594, |
|
"logps/chosen": -0.9845348596572876, |
|
"logps/rejected": -1.163271427154541, |
|
"loss": 1.6281, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.9690697193145752, |
|
"rewards/margins": 0.35747313499450684, |
|
"rewards/rejected": -2.326542854309082, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02100840336134454, |
|
"grad_norm": 17.6534655125861, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.9073705673217773, |
|
"logits/rejected": -2.8619837760925293, |
|
"logps/chosen": -0.9123918414115906, |
|
"logps/rejected": -1.1516292095184326, |
|
"loss": 1.5762, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8247836828231812, |
|
"rewards/margins": 0.47847509384155273, |
|
"rewards/rejected": -2.3032584190368652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031512605042016806, |
|
"grad_norm": 19.44309460886479, |
|
"learning_rate": 9.375e-08, |
|
"logits/chosen": -2.939253807067871, |
|
"logits/rejected": -2.871269941329956, |
|
"logps/chosen": -0.9964561462402344, |
|
"logps/rejected": -1.157931923866272, |
|
"loss": 1.6292, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9929122924804688, |
|
"rewards/margins": 0.32295167446136475, |
|
"rewards/rejected": -2.315863847732544, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04201680672268908, |
|
"grad_norm": 23.00550320924175, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -2.8980793952941895, |
|
"logits/rejected": -2.8317883014678955, |
|
"logps/chosen": -1.0304123163223267, |
|
"logps/rejected": -1.2014151811599731, |
|
"loss": 1.598, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0608246326446533, |
|
"rewards/margins": 0.34200599789619446, |
|
"rewards/rejected": -2.4028303623199463, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052521008403361345, |
|
"grad_norm": 25.91201580448508, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": -2.89921236038208, |
|
"logits/rejected": -2.838594913482666, |
|
"logps/chosen": -0.9657201766967773, |
|
"logps/rejected": -1.170414686203003, |
|
"loss": 1.6399, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9314403533935547, |
|
"rewards/margins": 0.40938907861709595, |
|
"rewards/rejected": -2.340829372406006, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06302521008403361, |
|
"grad_norm": 19.053951631856187, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -2.915055513381958, |
|
"logits/rejected": -2.8307695388793945, |
|
"logps/chosen": -1.031659722328186, |
|
"logps/rejected": -1.2121422290802002, |
|
"loss": 1.5382, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.063319444656372, |
|
"rewards/margins": 0.3609650731086731, |
|
"rewards/rejected": -2.4242844581604004, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07352941176470588, |
|
"grad_norm": 22.225870405405676, |
|
"learning_rate": 2.1874999999999997e-07, |
|
"logits/chosen": -2.8420331478118896, |
|
"logits/rejected": -2.8062918186187744, |
|
"logps/chosen": -1.0356570482254028, |
|
"logps/rejected": -1.2093064785003662, |
|
"loss": 1.5637, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0713140964508057, |
|
"rewards/margins": 0.34729865193367004, |
|
"rewards/rejected": -2.4186129570007324, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08403361344537816, |
|
"grad_norm": 25.66800900270909, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -2.845728635787964, |
|
"logits/rejected": -2.8214545249938965, |
|
"logps/chosen": -1.0431854724884033, |
|
"logps/rejected": -1.3399583101272583, |
|
"loss": 1.5204, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.0863709449768066, |
|
"rewards/margins": 0.5935453176498413, |
|
"rewards/rejected": -2.6799166202545166, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09453781512605042, |
|
"grad_norm": 18.254417500947117, |
|
"learning_rate": 2.8125e-07, |
|
"logits/chosen": -2.8101553916931152, |
|
"logits/rejected": -2.773531436920166, |
|
"logps/chosen": -1.061798334121704, |
|
"logps/rejected": -1.3759087324142456, |
|
"loss": 1.501, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.123596668243408, |
|
"rewards/margins": 0.6282207369804382, |
|
"rewards/rejected": -2.751817464828491, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10504201680672269, |
|
"grad_norm": 20.430861520566957, |
|
"learning_rate": 2.999838368626891e-07, |
|
"logits/chosen": -2.9204559326171875, |
|
"logits/rejected": -2.878157615661621, |
|
"logps/chosen": -1.0430495738983154, |
|
"logps/rejected": -1.2767090797424316, |
|
"loss": 1.5858, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.086099147796631, |
|
"rewards/margins": 0.4673191010951996, |
|
"rewards/rejected": -2.5534181594848633, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11554621848739496, |
|
"grad_norm": 19.914448467924856, |
|
"learning_rate": 2.9980204156901854e-07, |
|
"logits/chosen": -2.7936322689056396, |
|
"logits/rejected": -2.7450051307678223, |
|
"logps/chosen": -1.1547470092773438, |
|
"logps/rejected": -1.436762809753418, |
|
"loss": 1.5254, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3094940185546875, |
|
"rewards/margins": 0.5640314817428589, |
|
"rewards/rejected": -2.873525619506836, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12605042016806722, |
|
"grad_norm": 27.25108493191, |
|
"learning_rate": 2.994184927185504e-07, |
|
"logits/chosen": -2.8165132999420166, |
|
"logits/rejected": -2.765676736831665, |
|
"logps/chosen": -1.178091287612915, |
|
"logps/rejected": -1.3924609422683716, |
|
"loss": 1.5556, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.35618257522583, |
|
"rewards/margins": 0.428739458322525, |
|
"rewards/rejected": -2.784921884536743, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13655462184873948, |
|
"grad_norm": 25.118665709906168, |
|
"learning_rate": 2.9883370687530456e-07, |
|
"logits/chosen": -2.8244755268096924, |
|
"logits/rejected": -2.7773241996765137, |
|
"logps/chosen": -1.1520100831985474, |
|
"logps/rejected": -1.447547197341919, |
|
"loss": 1.451, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.3040201663970947, |
|
"rewards/margins": 0.5910741090774536, |
|
"rewards/rejected": -2.895094394683838, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14705882352941177, |
|
"grad_norm": 29.16487182636346, |
|
"learning_rate": 2.980484716295075e-07, |
|
"logits/chosen": -2.787673234939575, |
|
"logits/rejected": -2.726388692855835, |
|
"logps/chosen": -1.0457687377929688, |
|
"logps/rejected": -1.5030543804168701, |
|
"loss": 1.4511, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0915374755859375, |
|
"rewards/margins": 0.9145712852478027, |
|
"rewards/rejected": -3.0061087608337402, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15756302521008403, |
|
"grad_norm": 26.07757243320597, |
|
"learning_rate": 2.970638445368648e-07, |
|
"logits/chosen": -2.776176929473877, |
|
"logits/rejected": -2.7326908111572266, |
|
"logps/chosen": -1.0123913288116455, |
|
"logps/rejected": -1.404775619506836, |
|
"loss": 1.4303, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.024782657623291, |
|
"rewards/margins": 0.78476881980896, |
|
"rewards/rejected": -2.809551239013672, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16806722689075632, |
|
"grad_norm": 35.195975635749924, |
|
"learning_rate": 2.958811516942438e-07, |
|
"logits/chosen": -2.767622470855713, |
|
"logits/rejected": -2.7111330032348633, |
|
"logps/chosen": -1.1310784816741943, |
|
"logps/rejected": -1.712956428527832, |
|
"loss": 1.3445, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.2621569633483887, |
|
"rewards/margins": 1.1637558937072754, |
|
"rewards/rejected": -3.425912857055664, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 29.558096882428416, |
|
"learning_rate": 2.9450198595368514e-07, |
|
"logits/chosen": -2.7697668075561523, |
|
"logits/rejected": -2.7279648780822754, |
|
"logps/chosen": -1.150879979133606, |
|
"logps/rejected": -1.5715720653533936, |
|
"loss": 1.3627, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.301759958267212, |
|
"rewards/margins": 0.8413840532302856, |
|
"rewards/rejected": -3.143144130706787, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18907563025210083, |
|
"grad_norm": 31.18138106236945, |
|
"learning_rate": 2.929282047771477e-07, |
|
"logits/chosen": -2.696549892425537, |
|
"logits/rejected": -2.6848576068878174, |
|
"logps/chosen": -1.1329095363616943, |
|
"logps/rejected": -1.585242509841919, |
|
"loss": 1.3747, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2658190727233887, |
|
"rewards/margins": 0.9046661257743835, |
|
"rewards/rejected": -3.170485019683838, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19957983193277312, |
|
"grad_norm": 91.23116963300726, |
|
"learning_rate": 2.9116192773487665e-07, |
|
"logits/chosen": -2.682312488555908, |
|
"logits/rejected": -2.673649549484253, |
|
"logps/chosen": -1.3071677684783936, |
|
"logps/rejected": -1.7945388555526733, |
|
"loss": 1.4405, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.614335536956787, |
|
"rewards/margins": 0.9747417569160461, |
|
"rewards/rejected": -3.5890777111053467, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21008403361344538, |
|
"grad_norm": 38.910751298944, |
|
"learning_rate": 2.892055336507641e-07, |
|
"logits/chosen": -2.6822099685668945, |
|
"logits/rejected": -2.6384642124176025, |
|
"logps/chosen": -1.2206847667694092, |
|
"logps/rejected": -1.8117921352386475, |
|
"loss": 1.3468, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.4413695335388184, |
|
"rewards/margins": 1.1822149753570557, |
|
"rewards/rejected": -3.623584270477295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22058823529411764, |
|
"grad_norm": 27.439545713989038, |
|
"learning_rate": 2.8706165739854637e-07, |
|
"logits/chosen": -2.684013605117798, |
|
"logits/rejected": -2.660853147506714, |
|
"logps/chosen": -1.1910176277160645, |
|
"logps/rejected": -1.6350994110107422, |
|
"loss": 1.3852, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.382035255432129, |
|
"rewards/margins": 0.8881640434265137, |
|
"rewards/rejected": -3.2701988220214844, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23109243697478993, |
|
"grad_norm": 29.807019016962876, |
|
"learning_rate": 2.847331863531529e-07, |
|
"logits/chosen": -2.6825053691864014, |
|
"logits/rejected": -2.6679558753967285, |
|
"logps/chosen": -1.1532232761383057, |
|
"logps/rejected": -1.7548431158065796, |
|
"loss": 1.2615, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.3064465522766113, |
|
"rewards/margins": 1.203240156173706, |
|
"rewards/rejected": -3.509686231613159, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2415966386554622, |
|
"grad_norm": 47.6414807939217, |
|
"learning_rate": 2.8222325650198677e-07, |
|
"logits/chosen": -2.676471471786499, |
|
"logits/rejected": -2.6575491428375244, |
|
"logps/chosen": -1.2915210723876953, |
|
"logps/rejected": -1.9804328680038452, |
|
"loss": 1.3405, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5830421447753906, |
|
"rewards/margins": 1.377823829650879, |
|
"rewards/rejected": -3.9608657360076904, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25210084033613445, |
|
"grad_norm": 33.68771160542956, |
|
"learning_rate": 2.7953524822137317e-07, |
|
"logits/chosen": -2.6282732486724854, |
|
"logits/rejected": -2.6111860275268555, |
|
"logps/chosen": -1.2532024383544922, |
|
"logps/rejected": -2.1360292434692383, |
|
"loss": 1.2154, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.5064048767089844, |
|
"rewards/margins": 1.7656539678573608, |
|
"rewards/rejected": -4.272058486938477, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26260504201680673, |
|
"grad_norm": 36.94049761692212, |
|
"learning_rate": 2.766727817238648e-07, |
|
"logits/chosen": -2.625383138656616, |
|
"logits/rejected": -2.5985493659973145, |
|
"logps/chosen": -1.3159258365631104, |
|
"logps/rejected": -1.8669437170028687, |
|
"loss": 1.3794, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6318516731262207, |
|
"rewards/margins": 1.1020352840423584, |
|
"rewards/rejected": -3.7338874340057373, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27310924369747897, |
|
"grad_norm": 44.2795876444211, |
|
"learning_rate": 2.7363971218253573e-07, |
|
"logits/chosen": -2.585216760635376, |
|
"logits/rejected": -2.5424036979675293, |
|
"logps/chosen": -1.410796046257019, |
|
"logps/rejected": -2.0416605472564697, |
|
"loss": 1.3051, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.821592092514038, |
|
"rewards/margins": 1.261729121208191, |
|
"rewards/rejected": -4.0833210945129395, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28361344537815125, |
|
"grad_norm": 41.62676495102148, |
|
"learning_rate": 2.7044012453882974e-07, |
|
"logits/chosen": -2.5913612842559814, |
|
"logits/rejected": -2.554213047027588, |
|
"logps/chosen": -1.5970208644866943, |
|
"logps/rejected": -2.28006649017334, |
|
"loss": 1.2034, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.1940417289733887, |
|
"rewards/margins": 1.3660913705825806, |
|
"rewards/rejected": -4.56013298034668, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 36.45682514602446, |
|
"learning_rate": 2.670783280009569e-07, |
|
"logits/chosen": -2.583467960357666, |
|
"logits/rejected": -2.563615083694458, |
|
"logps/chosen": -1.3852840662002563, |
|
"logps/rejected": -1.976252794265747, |
|
"loss": 1.2209, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.7705681324005127, |
|
"rewards/margins": 1.1819374561309814, |
|
"rewards/rejected": -3.952505588531494, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30462184873949577, |
|
"grad_norm": 32.90514134094626, |
|
"learning_rate": 2.635588502402468e-07, |
|
"logits/chosen": -2.6025681495666504, |
|
"logits/rejected": -2.5791728496551514, |
|
"logps/chosen": -1.444962978363037, |
|
"logps/rejected": -2.082648515701294, |
|
"loss": 1.2251, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.889925956726074, |
|
"rewards/margins": 1.2753708362579346, |
|
"rewards/rejected": -4.165297031402588, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31512605042016806, |
|
"grad_norm": 46.925189207028446, |
|
"learning_rate": 2.598864312932762e-07, |
|
"logits/chosen": -2.5708370208740234, |
|
"logits/rejected": -2.5425729751586914, |
|
"logps/chosen": -1.558255910873413, |
|
"logps/rejected": -2.360576629638672, |
|
"loss": 1.2404, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.116511821746826, |
|
"rewards/margins": 1.6046416759490967, |
|
"rewards/rejected": -4.721153259277344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32563025210084034, |
|
"grad_norm": 44.68173396497493, |
|
"learning_rate": 2.560660171779821e-07, |
|
"logits/chosen": -2.5237948894500732, |
|
"logits/rejected": -2.5131349563598633, |
|
"logps/chosen": -1.7005817890167236, |
|
"logps/rejected": -2.477543592453003, |
|
"loss": 1.2383, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.4011635780334473, |
|
"rewards/margins": 1.5539240837097168, |
|
"rewards/rejected": -4.955087184906006, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33613445378151263, |
|
"grad_norm": 42.56897964236879, |
|
"learning_rate": 2.521027532323594e-07, |
|
"logits/chosen": -2.50708270072937, |
|
"logits/rejected": -2.4973719120025635, |
|
"logps/chosen": -1.5736862421035767, |
|
"logps/rejected": -2.4314279556274414, |
|
"loss": 1.2177, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.1473724842071533, |
|
"rewards/margins": 1.7154836654663086, |
|
"rewards/rejected": -4.862855911254883, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34663865546218486, |
|
"grad_norm": 42.67514136639567, |
|
"learning_rate": 2.480019771847139e-07, |
|
"logits/chosen": -2.4965438842773438, |
|
"logits/rejected": -2.5141289234161377, |
|
"logps/chosen": -1.6085281372070312, |
|
"logps/rejected": -2.5046117305755615, |
|
"loss": 1.1715, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.2170562744140625, |
|
"rewards/margins": 1.79216730594635, |
|
"rewards/rejected": -5.009223461151123, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 56.3843788509327, |
|
"learning_rate": 2.4376921196480405e-07, |
|
"logits/chosen": -2.4241461753845215, |
|
"logits/rejected": -2.4171204566955566, |
|
"logps/chosen": -1.8740981817245483, |
|
"logps/rejected": -2.842223644256592, |
|
"loss": 1.1553, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.7481963634490967, |
|
"rewards/margins": 1.9362504482269287, |
|
"rewards/rejected": -5.684447288513184, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36764705882352944, |
|
"grad_norm": 58.35243830598972, |
|
"learning_rate": 2.3941015826555265e-07, |
|
"logits/chosen": -2.433060646057129, |
|
"logits/rejected": -2.4348819255828857, |
|
"logps/chosen": -2.003147840499878, |
|
"logps/rejected": -2.907435894012451, |
|
"loss": 1.2262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.006295680999756, |
|
"rewards/margins": 1.808576226234436, |
|
"rewards/rejected": -5.814871788024902, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37815126050420167, |
|
"grad_norm": 62.00858329659252, |
|
"learning_rate": 2.3493068686534757e-07, |
|
"logits/chosen": -2.4191861152648926, |
|
"logits/rejected": -2.4209141731262207, |
|
"logps/chosen": -2.0410985946655273, |
|
"logps/rejected": -3.1209053993225098, |
|
"loss": 1.2189, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -4.082197189331055, |
|
"rewards/margins": 2.159613847732544, |
|
"rewards/rejected": -6.2418107986450195, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38865546218487396, |
|
"grad_norm": 52.62029016306216, |
|
"learning_rate": 2.3033683072127066e-07, |
|
"logits/chosen": -2.4004642963409424, |
|
"logits/rejected": -2.3723645210266113, |
|
"logps/chosen": -1.9122893810272217, |
|
"logps/rejected": -3.104297161102295, |
|
"loss": 1.1119, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.8245787620544434, |
|
"rewards/margins": 2.38401460647583, |
|
"rewards/rejected": -6.20859432220459, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39915966386554624, |
|
"grad_norm": 49.57165162916381, |
|
"learning_rate": 2.2563477684390454e-07, |
|
"logits/chosen": -2.394556999206543, |
|
"logits/rejected": -2.4077131748199463, |
|
"logps/chosen": -1.9445598125457764, |
|
"logps/rejected": -3.2773900032043457, |
|
"loss": 1.0746, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.8891196250915527, |
|
"rewards/margins": 2.6656596660614014, |
|
"rewards/rejected": -6.554780006408691, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4096638655462185, |
|
"grad_norm": 42.22482180826213, |
|
"learning_rate": 2.2083085796465976e-07, |
|
"logits/chosen": -2.3444042205810547, |
|
"logits/rejected": -2.3371148109436035, |
|
"logps/chosen": -2.0608248710632324, |
|
"logps/rejected": -2.9502105712890625, |
|
"loss": 1.1684, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -4.121649742126465, |
|
"rewards/margins": 1.7787716388702393, |
|
"rewards/rejected": -5.900421142578125, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42016806722689076, |
|
"grad_norm": 62.069592428442725, |
|
"learning_rate": 2.1593154400684523e-07, |
|
"logits/chosen": -2.3920085430145264, |
|
"logits/rejected": -2.3790066242218018, |
|
"logps/chosen": -2.172396183013916, |
|
"logps/rejected": -3.3875110149383545, |
|
"loss": 1.1134, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -4.344792366027832, |
|
"rewards/margins": 2.430229663848877, |
|
"rewards/rejected": -6.775022029876709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43067226890756305, |
|
"grad_norm": 63.80548454611886, |
|
"learning_rate": 2.1094343337196797e-07, |
|
"logits/chosen": -2.2799956798553467, |
|
"logits/rejected": -2.3044838905334473, |
|
"logps/chosen": -2.1241445541381836, |
|
"logps/rejected": -3.2871341705322266, |
|
"loss": 1.074, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.248289108276367, |
|
"rewards/margins": 2.325979471206665, |
|
"rewards/rejected": -6.574268341064453, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 60.76644197865358, |
|
"learning_rate": 2.058732440529989e-07, |
|
"logits/chosen": -2.369267225265503, |
|
"logits/rejected": -2.3428282737731934, |
|
"logps/chosen": -2.2345564365386963, |
|
"logps/rejected": -3.428501844406128, |
|
"loss": 1.0777, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.469112873077393, |
|
"rewards/margins": 2.3878910541534424, |
|
"rewards/rejected": -6.857003688812256, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45168067226890757, |
|
"grad_norm": 49.5591416904311, |
|
"learning_rate": 2.0072780458657222e-07, |
|
"logits/chosen": -2.3571441173553467, |
|
"logits/rejected": -2.3563666343688965, |
|
"logps/chosen": -2.1674928665161133, |
|
"logps/rejected": -3.2230000495910645, |
|
"loss": 1.0862, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.334985733032227, |
|
"rewards/margins": 2.1110141277313232, |
|
"rewards/rejected": -6.446000099182129, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46218487394957986, |
|
"grad_norm": 53.25790647881489, |
|
"learning_rate": 1.9551404485630487e-07, |
|
"logits/chosen": -2.3252339363098145, |
|
"logits/rejected": -2.3368701934814453, |
|
"logps/chosen": -2.3293991088867188, |
|
"logps/rejected": -3.515172243118286, |
|
"loss": 1.113, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.6587982177734375, |
|
"rewards/margins": 2.371546506881714, |
|
"rewards/rejected": -7.030344486236572, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4726890756302521, |
|
"grad_norm": 107.94133477979558, |
|
"learning_rate": 1.9023898675962123e-07, |
|
"logits/chosen": -2.2349350452423096, |
|
"logits/rejected": -2.270430088043213, |
|
"logps/chosen": -2.319396495819092, |
|
"logps/rejected": -3.6063385009765625, |
|
"loss": 1.0598, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.638792991638184, |
|
"rewards/margins": 2.573883533477783, |
|
"rewards/rejected": -7.212677001953125, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4831932773109244, |
|
"grad_norm": 51.80093777317445, |
|
"learning_rate": 1.8490973475065407e-07, |
|
"logits/chosen": -2.2946877479553223, |
|
"logits/rejected": -2.2905642986297607, |
|
"logps/chosen": -2.3950748443603516, |
|
"logps/rejected": -3.634678602218628, |
|
"loss": 1.0982, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.790149688720703, |
|
"rewards/margins": 2.4792075157165527, |
|
"rewards/rejected": -7.269357204437256, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49369747899159666, |
|
"grad_norm": 72.76258850252798, |
|
"learning_rate": 1.795334662719576e-07, |
|
"logits/chosen": -2.278480052947998, |
|
"logits/rejected": -2.299923896789551, |
|
"logps/chosen": -2.357292652130127, |
|
"logps/rejected": -3.7696902751922607, |
|
"loss": 1.0057, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.714585304260254, |
|
"rewards/margins": 2.8247950077056885, |
|
"rewards/rejected": -7.5393805503845215, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5042016806722689, |
|
"grad_norm": 64.28632501194514, |
|
"learning_rate": 1.7411742208792024e-07, |
|
"logits/chosen": -2.2843871116638184, |
|
"logits/rejected": -2.300901412963867, |
|
"logps/chosen": -2.508634090423584, |
|
"logps/rejected": -3.8370189666748047, |
|
"loss": 1.033, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -5.017268180847168, |
|
"rewards/margins": 2.6567699909210205, |
|
"rewards/rejected": -7.674037933349609, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5147058823529411, |
|
"grad_norm": 56.78201656922531, |
|
"learning_rate": 1.686688965328944e-07, |
|
"logits/chosen": -2.2179243564605713, |
|
"logits/rejected": -2.2388010025024414, |
|
"logps/chosen": -2.3462517261505127, |
|
"logps/rejected": -3.506201982498169, |
|
"loss": 0.9703, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.692503452301025, |
|
"rewards/margins": 2.3199009895324707, |
|
"rewards/rejected": -7.012403964996338, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5252100840336135, |
|
"grad_norm": 66.31368878059381, |
|
"learning_rate": 1.6319522768717944e-07, |
|
"logits/chosen": -2.254875421524048, |
|
"logits/rejected": -2.2779059410095215, |
|
"logps/chosen": -2.398496150970459, |
|
"logps/rejected": -3.7779440879821777, |
|
"loss": 1.0355, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.796992301940918, |
|
"rewards/margins": 2.758897542953491, |
|
"rewards/rejected": -7.5558881759643555, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 56.3335721813079, |
|
"learning_rate": 1.5770378749408654e-07, |
|
"logits/chosen": -2.2989799976348877, |
|
"logits/rejected": -2.2941720485687256, |
|
"logps/chosen": -2.581568479537964, |
|
"logps/rejected": -3.853482723236084, |
|
"loss": 1.0114, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -5.163136959075928, |
|
"rewards/margins": 2.5438289642333984, |
|
"rewards/rejected": -7.706965446472168, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5462184873949579, |
|
"grad_norm": 64.04241236117856, |
|
"learning_rate": 1.522019718313975e-07, |
|
"logits/chosen": -2.2507102489471436, |
|
"logits/rejected": -2.272916316986084, |
|
"logps/chosen": -2.6012022495269775, |
|
"logps/rejected": -4.0311384201049805, |
|
"loss": 0.992, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.202404499053955, |
|
"rewards/margins": 2.859873056411743, |
|
"rewards/rejected": -8.062276840209961, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5567226890756303, |
|
"grad_norm": 59.88114738443522, |
|
"learning_rate": 1.4669719055058805e-07, |
|
"logits/chosen": -2.2266743183135986, |
|
"logits/rejected": -2.2351810932159424, |
|
"logps/chosen": -2.7907989025115967, |
|
"logps/rejected": -3.9706473350524902, |
|
"loss": 1.0608, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -5.581597805023193, |
|
"rewards/margins": 2.35969614982605, |
|
"rewards/rejected": -7.9412946701049805, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5672268907563025, |
|
"grad_norm": 63.37030995368488, |
|
"learning_rate": 1.411968574972317e-07, |
|
"logits/chosen": -2.230888843536377, |
|
"logits/rejected": -2.2535951137542725, |
|
"logps/chosen": -2.7027249336242676, |
|
"logps/rejected": -4.1824774742126465, |
|
"loss": 0.8988, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.405449867248535, |
|
"rewards/margins": 2.9595046043395996, |
|
"rewards/rejected": -8.364954948425293, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5777310924369747, |
|
"grad_norm": 69.41737055216304, |
|
"learning_rate": 1.357083805260243e-07, |
|
"logits/chosen": -2.2285051345825195, |
|
"logits/rejected": -2.2328968048095703, |
|
"logps/chosen": -2.7089076042175293, |
|
"logps/rejected": -3.9290478229522705, |
|
"loss": 0.969, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -5.417815208435059, |
|
"rewards/margins": 2.440279483795166, |
|
"rewards/rejected": -7.858095645904541, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 63.48615863862009, |
|
"learning_rate": 1.302391515238772e-07, |
|
"logits/chosen": -2.2015397548675537, |
|
"logits/rejected": -2.2215192317962646, |
|
"logps/chosen": -2.722857713699341, |
|
"logps/rejected": -4.155056953430176, |
|
"loss": 0.9593, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.445715427398682, |
|
"rewards/margins": 2.86439847946167, |
|
"rewards/rejected": -8.310113906860352, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5987394957983193, |
|
"grad_norm": 87.6726372411929, |
|
"learning_rate": 1.247965364545152e-07, |
|
"logits/chosen": -2.1690385341644287, |
|
"logits/rejected": -2.1941065788269043, |
|
"logps/chosen": -2.697335720062256, |
|
"logps/rejected": -4.129209995269775, |
|
"loss": 1.0182, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -5.394671440124512, |
|
"rewards/margins": 2.8637471199035645, |
|
"rewards/rejected": -8.25841999053955, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6092436974789915, |
|
"grad_norm": 54.49746884782157, |
|
"learning_rate": 1.193878654379889e-07, |
|
"logits/chosen": -2.1245057582855225, |
|
"logits/rejected": -2.1610589027404785, |
|
"logps/chosen": -2.6949501037597656, |
|
"logps/rejected": -4.0747246742248535, |
|
"loss": 1.0182, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -5.389900207519531, |
|
"rewards/margins": 2.759549617767334, |
|
"rewards/rejected": -8.149449348449707, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6197478991596639, |
|
"grad_norm": 49.136356343546524, |
|
"learning_rate": 1.1402042287846068e-07, |
|
"logits/chosen": -2.1676132678985596, |
|
"logits/rejected": -2.1930439472198486, |
|
"logps/chosen": -2.85373592376709, |
|
"logps/rejected": -4.212955951690674, |
|
"loss": 1.0398, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.70747184753418, |
|
"rewards/margins": 2.7184391021728516, |
|
"rewards/rejected": -8.425911903381348, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6302521008403361, |
|
"grad_norm": 56.2186810691314, |
|
"learning_rate": 1.0870143765356105e-07, |
|
"logits/chosen": -2.1709885597229004, |
|
"logits/rejected": -2.1842150688171387, |
|
"logps/chosen": -2.9935240745544434, |
|
"logps/rejected": -4.36973762512207, |
|
"loss": 1.0064, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -5.987048149108887, |
|
"rewards/margins": 2.7524266242980957, |
|
"rewards/rejected": -8.73947525024414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6407563025210085, |
|
"grad_norm": 74.55055606717697, |
|
"learning_rate": 1.0343807337852794e-07, |
|
"logits/chosen": -2.1351749897003174, |
|
"logits/rejected": -2.1373703479766846, |
|
"logps/chosen": -2.965303897857666, |
|
"logps/rejected": -4.419961929321289, |
|
"loss": 1.0268, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -5.930607795715332, |
|
"rewards/margins": 2.9093151092529297, |
|
"rewards/rejected": -8.839923858642578, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6512605042016807, |
|
"grad_norm": 53.97579171817796, |
|
"learning_rate": 9.82374187582421e-08, |
|
"logits/chosen": -2.1092991828918457, |
|
"logits/rejected": -2.133781909942627, |
|
"logps/chosen": -2.9700093269348145, |
|
"logps/rejected": -4.346618175506592, |
|
"loss": 0.9648, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -5.940018653869629, |
|
"rewards/margins": 2.753218650817871, |
|
"rewards/rejected": -8.693236351013184, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6617647058823529, |
|
"grad_norm": 66.28146153490614, |
|
"learning_rate": 9.310647804015124e-08, |
|
"logits/chosen": -2.133643627166748, |
|
"logits/rejected": -2.160266637802124, |
|
"logps/chosen": -2.9957821369171143, |
|
"logps/rejected": -4.556756973266602, |
|
"loss": 0.937, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -5.9915642738342285, |
|
"rewards/margins": 3.1219494342803955, |
|
"rewards/rejected": -9.113513946533203, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6722689075630253, |
|
"grad_norm": 49.303213418937055, |
|
"learning_rate": 8.805216158094177e-08, |
|
"logits/chosen": -2.076920986175537, |
|
"logits/rejected": -2.103963851928711, |
|
"logps/chosen": -2.907010555267334, |
|
"logps/rejected": -4.666647911071777, |
|
"loss": 0.9387, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.814021110534668, |
|
"rewards/margins": 3.5192761421203613, |
|
"rewards/rejected": -9.333295822143555, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6827731092436975, |
|
"grad_norm": 67.32319494946066, |
|
"learning_rate": 8.308127653966262e-08, |
|
"logits/chosen": -2.0415196418762207, |
|
"logits/rejected": -2.0577666759490967, |
|
"logps/chosen": -3.1487503051757812, |
|
"logps/rejected": -4.704668045043945, |
|
"loss": 0.9346, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.2975006103515625, |
|
"rewards/margins": 3.111835241317749, |
|
"rewards/rejected": -9.40933609008789, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6932773109243697, |
|
"grad_norm": 60.93426199203996, |
|
"learning_rate": 7.820051770983612e-08, |
|
"logits/chosen": -2.0549426078796387, |
|
"logits/rejected": -2.080475330352783, |
|
"logps/chosen": -3.1458420753479004, |
|
"logps/rejected": -4.8635969161987305, |
|
"loss": 0.966, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.291684150695801, |
|
"rewards/margins": 3.4355111122131348, |
|
"rewards/rejected": -9.727193832397461, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7037815126050421, |
|
"grad_norm": 72.28419657503075, |
|
"learning_rate": 7.341645850290216e-08, |
|
"logits/chosen": -2.1288955211639404, |
|
"logits/rejected": -2.1594443321228027, |
|
"logps/chosen": -3.1346468925476074, |
|
"logps/rejected": -4.768304347991943, |
|
"loss": 1.019, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -6.269293785095215, |
|
"rewards/margins": 3.267315626144409, |
|
"rewards/rejected": -9.536608695983887, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 60.72644174180833, |
|
"learning_rate": 6.873554209514085e-08, |
|
"logits/chosen": -2.0705599784851074, |
|
"logits/rejected": -2.0726349353790283, |
|
"logps/chosen": -2.935683488845825, |
|
"logps/rejected": -4.3867692947387695, |
|
"loss": 0.9702, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -5.87136697769165, |
|
"rewards/margins": 2.9021708965301514, |
|
"rewards/rejected": -8.773538589477539, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7247899159663865, |
|
"grad_norm": 52.6099555735741, |
|
"learning_rate": 6.416407274999497e-08, |
|
"logits/chosen": -2.113405227661133, |
|
"logits/rejected": -2.1457953453063965, |
|
"logps/chosen": -3.0049102306365967, |
|
"logps/rejected": -4.615386962890625, |
|
"loss": 0.9687, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -6.009820461273193, |
|
"rewards/margins": 3.2209534645080566, |
|
"rewards/rejected": -9.23077392578125, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 69.6143506053754, |
|
"learning_rate": 5.970820732748143e-08, |
|
"logits/chosen": -2.145555257797241, |
|
"logits/rejected": -2.155163288116455, |
|
"logps/chosen": -2.938427209854126, |
|
"logps/rejected": -4.6191511154174805, |
|
"loss": 0.878, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.876854419708252, |
|
"rewards/margins": 3.3614463806152344, |
|
"rewards/rejected": -9.238302230834961, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7457983193277311, |
|
"grad_norm": 70.71307640111154, |
|
"learning_rate": 5.537394699212498e-08, |
|
"logits/chosen": -2.1382346153259277, |
|
"logits/rejected": -2.163740634918213, |
|
"logps/chosen": -2.980686664581299, |
|
"logps/rejected": -4.480741500854492, |
|
"loss": 0.9898, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.961373329162598, |
|
"rewards/margins": 3.0001087188720703, |
|
"rewards/rejected": -8.961483001708984, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7563025210084033, |
|
"grad_norm": 73.19945321147338, |
|
"learning_rate": 5.1167129130583346e-08, |
|
"logits/chosen": -2.109528064727783, |
|
"logits/rejected": -2.1514618396759033, |
|
"logps/chosen": -2.996703624725342, |
|
"logps/rejected": -4.683353900909424, |
|
"loss": 1.0311, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.993407249450684, |
|
"rewards/margins": 3.373300075531006, |
|
"rewards/rejected": -9.366707801818848, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7668067226890757, |
|
"grad_norm": 70.68128938841156, |
|
"learning_rate": 4.709341948984809e-08, |
|
"logits/chosen": -2.0933072566986084, |
|
"logits/rejected": -2.1408255100250244, |
|
"logps/chosen": -2.9475154876708984, |
|
"logps/rejected": -4.628712177276611, |
|
"loss": 1.0051, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -5.895030975341797, |
|
"rewards/margins": 3.3623931407928467, |
|
"rewards/rejected": -9.257424354553223, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7773109243697479, |
|
"grad_norm": 64.71452548748283, |
|
"learning_rate": 4.315830454661059e-08, |
|
"logits/chosen": -2.086402654647827, |
|
"logits/rejected": -2.1012749671936035, |
|
"logps/chosen": -2.9121134281158447, |
|
"logps/rejected": -4.349917888641357, |
|
"loss": 0.9727, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -5.8242268562316895, |
|
"rewards/margins": 2.8756089210510254, |
|
"rewards/rejected": -8.699835777282715, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7878151260504201, |
|
"grad_norm": 71.60834624596436, |
|
"learning_rate": 3.936708411806887e-08, |
|
"logits/chosen": -2.124846935272217, |
|
"logits/rejected": -2.1803550720214844, |
|
"logps/chosen": -2.9349002838134766, |
|
"logps/rejected": -4.718347549438477, |
|
"loss": 0.9764, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -5.869800567626953, |
|
"rewards/margins": 3.566895008087158, |
|
"rewards/rejected": -9.436695098876953, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7983193277310925, |
|
"grad_norm": 55.835007766843376, |
|
"learning_rate": 3.572486422412786e-08, |
|
"logits/chosen": -2.104611873626709, |
|
"logits/rejected": -2.1398825645446777, |
|
"logps/chosen": -2.874159336090088, |
|
"logps/rejected": -4.522528648376465, |
|
"loss": 0.9513, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -5.748318672180176, |
|
"rewards/margins": 3.296739101409912, |
|
"rewards/rejected": -9.04505729675293, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8088235294117647, |
|
"grad_norm": 54.54718274731096, |
|
"learning_rate": 3.2236550210606293e-08, |
|
"logits/chosen": -2.13325834274292, |
|
"logits/rejected": -2.1514346599578857, |
|
"logps/chosen": -2.728529691696167, |
|
"logps/rejected": -4.492846488952637, |
|
"loss": 0.9402, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.457059383392334, |
|
"rewards/margins": 3.5286338329315186, |
|
"rewards/rejected": -8.985692977905273, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.819327731092437, |
|
"grad_norm": 64.73590798684994, |
|
"learning_rate": 2.8906840142711338e-08, |
|
"logits/chosen": -2.0870397090911865, |
|
"logits/rejected": -2.1221370697021484, |
|
"logps/chosen": -2.9295685291290283, |
|
"logps/rejected": -4.712892055511475, |
|
"loss": 0.9203, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.859137058258057, |
|
"rewards/margins": 3.5666465759277344, |
|
"rewards/rejected": -9.42578411102295, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8298319327731093, |
|
"grad_norm": 56.24812000405815, |
|
"learning_rate": 2.5740218477679143e-08, |
|
"logits/chosen": -2.076784610748291, |
|
"logits/rejected": -2.0827224254608154, |
|
"logps/chosen": -2.910884141921997, |
|
"logps/rejected": -4.398539066314697, |
|
"loss": 0.8926, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -5.821768283843994, |
|
"rewards/margins": 2.975309371948242, |
|
"rewards/rejected": -8.797078132629395, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8403361344537815, |
|
"grad_norm": 65.02327391971039, |
|
"learning_rate": 2.2740950025102763e-08, |
|
"logits/chosen": -2.0536999702453613, |
|
"logits/rejected": -2.058232545852661, |
|
"logps/chosen": -3.009183883666992, |
|
"logps/rejected": -4.569349765777588, |
|
"loss": 0.9758, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -6.018367767333984, |
|
"rewards/margins": 3.1203320026397705, |
|
"rewards/rejected": -9.138699531555176, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8508403361344538, |
|
"grad_norm": 71.60344245483444, |
|
"learning_rate": 1.9913074203082053e-08, |
|
"logits/chosen": -2.0714104175567627, |
|
"logits/rejected": -2.0895228385925293, |
|
"logps/chosen": -3.0680434703826904, |
|
"logps/rejected": -4.809669494628906, |
|
"loss": 1.002, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -6.136086940765381, |
|
"rewards/margins": 3.483250856399536, |
|
"rewards/rejected": -9.619338989257812, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8613445378151261, |
|
"grad_norm": 65.02582256297173, |
|
"learning_rate": 1.726039959793059e-08, |
|
"logits/chosen": -2.0531625747680664, |
|
"logits/rejected": -2.0893194675445557, |
|
"logps/chosen": -3.2407803535461426, |
|
"logps/rejected": -4.729245185852051, |
|
"loss": 0.9391, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -6.481560707092285, |
|
"rewards/margins": 2.9769301414489746, |
|
"rewards/rejected": -9.458490371704102, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8718487394957983, |
|
"grad_norm": 66.60722999226081, |
|
"learning_rate": 1.4786498834767618e-08, |
|
"logits/chosen": -1.971679449081421, |
|
"logits/rejected": -2.0226242542266846, |
|
"logps/chosen": -2.956986427307129, |
|
"logps/rejected": -4.357911109924316, |
|
"loss": 0.9793, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.913972854614258, |
|
"rewards/margins": 2.8018486499786377, |
|
"rewards/rejected": -8.715822219848633, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 67.46172075980118, |
|
"learning_rate": 1.2494703765902337e-08, |
|
"logits/chosen": -2.0839121341705322, |
|
"logits/rejected": -2.104898452758789, |
|
"logps/chosen": -3.1962718963623047, |
|
"logps/rejected": -4.687077522277832, |
|
"loss": 0.9073, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -6.392543792724609, |
|
"rewards/margins": 2.9816107749938965, |
|
"rewards/rejected": -9.374155044555664, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 80.87130272740922, |
|
"learning_rate": 1.0388100983491676e-08, |
|
"logits/chosen": -2.0597221851348877, |
|
"logits/rejected": -2.0896944999694824, |
|
"logps/chosen": -3.026052236557007, |
|
"logps/rejected": -4.573755741119385, |
|
"loss": 0.9555, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.052104473114014, |
|
"rewards/margins": 3.0954062938690186, |
|
"rewards/rejected": -9.14751148223877, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9033613445378151, |
|
"grad_norm": 70.56768229498226, |
|
"learning_rate": 8.469527662514425e-09, |
|
"logits/chosen": -2.0741794109344482, |
|
"logits/rejected": -2.097032070159912, |
|
"logps/chosen": -3.0541605949401855, |
|
"logps/rejected": -4.719814777374268, |
|
"loss": 1.0143, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.108321189880371, |
|
"rewards/margins": 3.331307888031006, |
|
"rewards/rejected": -9.439629554748535, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9138655462184874, |
|
"grad_norm": 72.71200868786163, |
|
"learning_rate": 6.7415677396608474e-09, |
|
"logits/chosen": -2.0740599632263184, |
|
"logits/rejected": -2.0966227054595947, |
|
"logps/chosen": -3.1755881309509277, |
|
"logps/rejected": -5.003739356994629, |
|
"loss": 0.9747, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -6.3511762619018555, |
|
"rewards/margins": 3.6563029289245605, |
|
"rewards/rejected": -10.007478713989258, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9243697478991597, |
|
"grad_norm": 68.28482752709235, |
|
"learning_rate": 5.206548433283803e-09, |
|
"logits/chosen": -2.015186071395874, |
|
"logits/rejected": -2.100969076156616, |
|
"logps/chosen": -3.135103464126587, |
|
"logps/rejected": -4.680062294006348, |
|
"loss": 0.9059, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -6.270206928253174, |
|
"rewards/margins": 3.0899174213409424, |
|
"rewards/rejected": -9.360124588012695, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9348739495798319, |
|
"grad_norm": 53.32723170520827, |
|
"learning_rate": 3.866537109098561e-09, |
|
"logits/chosen": -2.0853240489959717, |
|
"logits/rejected": -2.0845720767974854, |
|
"logps/chosen": -2.9771265983581543, |
|
"logps/rejected": -4.7920613288879395, |
|
"loss": 0.9242, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -5.954253196716309, |
|
"rewards/margins": 3.6298699378967285, |
|
"rewards/rejected": -9.584122657775879, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9453781512605042, |
|
"grad_norm": 77.58999305035255, |
|
"learning_rate": 2.7233384958522676e-09, |
|
"logits/chosen": -2.0929324626922607, |
|
"logits/rejected": -2.088423490524292, |
|
"logps/chosen": -3.0112125873565674, |
|
"logps/rejected": -4.747193336486816, |
|
"loss": 0.859, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -6.022425174713135, |
|
"rewards/margins": 3.471961498260498, |
|
"rewards/rejected": -9.494386672973633, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9558823529411765, |
|
"grad_norm": 69.00371191627924, |
|
"learning_rate": 1.7784922547133318e-09, |
|
"logits/chosen": -2.03417706489563, |
|
"logits/rejected": -2.0785162448883057, |
|
"logps/chosen": -3.0350539684295654, |
|
"logps/rejected": -4.6372761726379395, |
|
"loss": 1.0211, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -6.070107936859131, |
|
"rewards/margins": 3.204444408416748, |
|
"rewards/rejected": -9.274552345275879, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9663865546218487, |
|
"grad_norm": 80.70006340013546, |
|
"learning_rate": 1.033270905653949e-09, |
|
"logits/chosen": -2.077859878540039, |
|
"logits/rejected": -2.1275644302368164, |
|
"logps/chosen": -3.1961588859558105, |
|
"logps/rejected": -5.026784420013428, |
|
"loss": 0.9054, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.392317771911621, |
|
"rewards/margins": 3.6612517833709717, |
|
"rewards/rejected": -10.053568840026855, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.976890756302521, |
|
"grad_norm": 65.94555657144473, |
|
"learning_rate": 4.8867811361889e-10, |
|
"logits/chosen": -2.0415802001953125, |
|
"logits/rejected": -2.073897123336792, |
|
"logps/chosen": -3.136763572692871, |
|
"logps/rejected": -4.838761329650879, |
|
"loss": 0.9205, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -6.273527145385742, |
|
"rewards/margins": 3.40399432182312, |
|
"rewards/rejected": -9.677522659301758, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9873949579831933, |
|
"grad_norm": 72.55584643358395, |
|
"learning_rate": 1.454473367883291e-10, |
|
"logits/chosen": -2.0744833946228027, |
|
"logits/rejected": -2.1010680198669434, |
|
"logps/chosen": -3.007612943649292, |
|
"logps/rejected": -4.534255027770996, |
|
"loss": 0.8893, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.015225887298584, |
|
"rewards/margins": 3.0532851219177246, |
|
"rewards/rejected": -9.068510055541992, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9978991596638656, |
|
"grad_norm": 71.68265122537953, |
|
"learning_rate": 4.040838755653419e-12, |
|
"logits/chosen": -2.0488152503967285, |
|
"logits/rejected": -2.0957658290863037, |
|
"logps/chosen": -2.9260973930358887, |
|
"logps/rejected": -4.68855619430542, |
|
"loss": 0.9609, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -5.852194786071777, |
|
"rewards/margins": 3.5249176025390625, |
|
"rewards/rejected": -9.37711238861084, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 476, |
|
"total_flos": 0.0, |
|
"train_loss": 1.1419020675811447, |
|
"train_runtime": 10201.3152, |
|
"train_samples_per_second": 5.971, |
|
"train_steps_per_second": 0.047 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 476, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|