|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994495412844037, |
|
"eval_steps": 500, |
|
"global_step": 408, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014678899082568808, |
|
"grad_norm": 11.81737232208252, |
|
"learning_rate": 2.439024390243903e-07, |
|
"logits/chosen": -0.9879676103591919, |
|
"logits/rejected": -1.9993298053741455, |
|
"logps/chosen": -269.27239990234375, |
|
"logps/rejected": -186.47621154785156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.029357798165137616, |
|
"grad_norm": 11.950206756591797, |
|
"learning_rate": 4.878048780487805e-07, |
|
"logits/chosen": -1.0342975854873657, |
|
"logits/rejected": -1.9880424737930298, |
|
"logps/chosen": -290.81072998046875, |
|
"logps/rejected": -204.50514221191406, |
|
"loss": 0.712, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.009406615048646927, |
|
"rewards/margins": 0.020395996049046516, |
|
"rewards/rejected": -0.029802614822983742, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.044036697247706424, |
|
"grad_norm": 11.719453811645508, |
|
"learning_rate": 7.317073170731707e-07, |
|
"logits/chosen": -1.1187832355499268, |
|
"logits/rejected": -2.125272750854492, |
|
"logps/chosen": -295.85894775390625, |
|
"logps/rejected": -203.1645050048828, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07618961483240128, |
|
"rewards/margins": 0.09595101326704025, |
|
"rewards/rejected": -0.01976138912141323, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05871559633027523, |
|
"grad_norm": 12.171574592590332, |
|
"learning_rate": 9.75609756097561e-07, |
|
"logits/chosen": -1.250899076461792, |
|
"logits/rejected": -2.1083037853240967, |
|
"logps/chosen": -252.51145935058594, |
|
"logps/rejected": -164.40138244628906, |
|
"loss": 0.7179, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.044141992926597595, |
|
"rewards/margins": 0.003628704696893692, |
|
"rewards/rejected": 0.040513284504413605, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07339449541284404, |
|
"grad_norm": 13.125951766967773, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"logits/chosen": -1.0734999179840088, |
|
"logits/rejected": -2.204047679901123, |
|
"logps/chosen": -306.6387939453125, |
|
"logps/rejected": -158.70912170410156, |
|
"loss": 0.7397, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.0127907395362854, |
|
"rewards/margins": -0.031406134366989136, |
|
"rewards/rejected": 0.01861538738012314, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08807339449541285, |
|
"grad_norm": 15.099513053894043, |
|
"learning_rate": 1.4634146341463414e-06, |
|
"logits/chosen": -1.0108157396316528, |
|
"logits/rejected": -1.977769374847412, |
|
"logps/chosen": -344.31402587890625, |
|
"logps/rejected": -223.3643798828125, |
|
"loss": 0.7584, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.06078364700078964, |
|
"rewards/margins": -0.0867958813905716, |
|
"rewards/rejected": 0.026012245565652847, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10275229357798166, |
|
"grad_norm": 13.785890579223633, |
|
"learning_rate": 1.707317073170732e-06, |
|
"logits/chosen": -0.9762290120124817, |
|
"logits/rejected": -1.9721505641937256, |
|
"logps/chosen": -259.58258056640625, |
|
"logps/rejected": -167.8755645751953, |
|
"loss": 0.7235, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.023126909509301186, |
|
"rewards/margins": -0.02050386182963848, |
|
"rewards/rejected": 0.043630778789520264, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11743119266055047, |
|
"grad_norm": 13.014513969421387, |
|
"learning_rate": 1.951219512195122e-06, |
|
"logits/chosen": -1.1472342014312744, |
|
"logits/rejected": -2.0296616554260254, |
|
"logps/chosen": -269.7952575683594, |
|
"logps/rejected": -186.65452575683594, |
|
"loss": 0.7405, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.004093457013368607, |
|
"rewards/margins": -0.044701721519231796, |
|
"rewards/rejected": 0.0487951785326004, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13211009174311927, |
|
"grad_norm": 12.20093059539795, |
|
"learning_rate": 2.1951219512195125e-06, |
|
"logits/chosen": -1.0266412496566772, |
|
"logits/rejected": -2.0891737937927246, |
|
"logps/chosen": -313.8085021972656, |
|
"logps/rejected": -197.85943603515625, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.09984610974788666, |
|
"rewards/margins": 0.0959281176328659, |
|
"rewards/rejected": 0.003917992115020752, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14678899082568808, |
|
"grad_norm": 12.344905853271484, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"logits/chosen": -1.0662198066711426, |
|
"logits/rejected": -2.0889832973480225, |
|
"logps/chosen": -308.8189697265625, |
|
"logps/rejected": -156.6934814453125, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.08540838956832886, |
|
"rewards/margins": 0.07292439043521881, |
|
"rewards/rejected": 0.012484000064432621, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1614678899082569, |
|
"grad_norm": 11.898660659790039, |
|
"learning_rate": 2.682926829268293e-06, |
|
"logits/chosen": -1.2143007516860962, |
|
"logits/rejected": -2.262324571609497, |
|
"logps/chosen": -298.7814636230469, |
|
"logps/rejected": -186.76119995117188, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.08213196694850922, |
|
"rewards/margins": 0.08361663669347763, |
|
"rewards/rejected": -0.0014846734702587128, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1761467889908257, |
|
"grad_norm": 13.68064022064209, |
|
"learning_rate": 2.926829268292683e-06, |
|
"logits/chosen": -1.0233314037322998, |
|
"logits/rejected": -2.1899986267089844, |
|
"logps/chosen": -370.8209228515625, |
|
"logps/rejected": -156.96270751953125, |
|
"loss": 0.7306, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.007536953315138817, |
|
"rewards/margins": -0.009052609093487263, |
|
"rewards/rejected": 0.0015156615991145372, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1908256880733945, |
|
"grad_norm": 12.661199569702148, |
|
"learning_rate": 3.1707317073170736e-06, |
|
"logits/chosen": -1.2463735342025757, |
|
"logits/rejected": -2.1673622131347656, |
|
"logps/chosen": -326.9246520996094, |
|
"logps/rejected": -182.17701721191406, |
|
"loss": 0.7175, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.010405808687210083, |
|
"rewards/margins": 0.007835682481527328, |
|
"rewards/rejected": -0.01824149303138256, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.20550458715596331, |
|
"grad_norm": 11.61974811553955, |
|
"learning_rate": 3.414634146341464e-06, |
|
"logits/chosen": -1.1716669797897339, |
|
"logits/rejected": -2.2106716632843018, |
|
"logps/chosen": -284.443603515625, |
|
"logps/rejected": -165.102783203125, |
|
"loss": 0.7409, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.007994448766112328, |
|
"rewards/margins": -0.035433441400527954, |
|
"rewards/rejected": 0.04342789575457573, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 10.777989387512207, |
|
"learning_rate": 3.6585365853658537e-06, |
|
"logits/chosen": -1.0662914514541626, |
|
"logits/rejected": -2.1156551837921143, |
|
"logps/chosen": -289.4057922363281, |
|
"logps/rejected": -197.46649169921875, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.1247626319527626, |
|
"rewards/margins": 0.15936096012592316, |
|
"rewards/rejected": -0.03459831699728966, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23486238532110093, |
|
"grad_norm": 12.190910339355469, |
|
"learning_rate": 3.902439024390244e-06, |
|
"logits/chosen": -1.1755316257476807, |
|
"logits/rejected": -2.1449058055877686, |
|
"logps/chosen": -288.5774841308594, |
|
"logps/rejected": -163.59588623046875, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.09892146289348602, |
|
"rewards/margins": 0.08179756253957748, |
|
"rewards/rejected": 0.01712390035390854, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.24954128440366974, |
|
"grad_norm": 13.154803276062012, |
|
"learning_rate": 4.146341463414634e-06, |
|
"logits/chosen": -1.1496777534484863, |
|
"logits/rejected": -2.2045750617980957, |
|
"logps/chosen": -324.6558837890625, |
|
"logps/rejected": -164.45327758789062, |
|
"loss": 0.6478, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.08885271847248077, |
|
"rewards/margins": 0.1558375358581543, |
|
"rewards/rejected": -0.06698483228683472, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.26422018348623855, |
|
"grad_norm": 11.07314682006836, |
|
"learning_rate": 4.390243902439025e-06, |
|
"logits/chosen": -1.1677134037017822, |
|
"logits/rejected": -2.0850350856781006, |
|
"logps/chosen": -311.2884216308594, |
|
"logps/rejected": -204.43142700195312, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.20451843738555908, |
|
"rewards/margins": 0.22681473195552826, |
|
"rewards/rejected": -0.02229629084467888, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.27889908256880735, |
|
"grad_norm": 12.431696891784668, |
|
"learning_rate": 4.634146341463416e-06, |
|
"logits/chosen": -1.195428729057312, |
|
"logits/rejected": -2.197521686553955, |
|
"logps/chosen": -294.04962158203125, |
|
"logps/rejected": -200.2810516357422, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.15507417917251587, |
|
"rewards/margins": 0.11412172019481659, |
|
"rewards/rejected": 0.04095245152711868, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.29357798165137616, |
|
"grad_norm": 11.575589179992676, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"logits/chosen": -1.0411652326583862, |
|
"logits/rejected": -2.03951096534729, |
|
"logps/chosen": -345.9762268066406, |
|
"logps/rejected": -181.34144592285156, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.14570173621177673, |
|
"rewards/margins": 0.20660607516765594, |
|
"rewards/rejected": -0.06090431660413742, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30825688073394497, |
|
"grad_norm": 12.716486930847168, |
|
"learning_rate": 4.999908404322799e-06, |
|
"logits/chosen": -1.0371800661087036, |
|
"logits/rejected": -2.2317895889282227, |
|
"logps/chosen": -319.42755126953125, |
|
"logps/rejected": -172.60479736328125, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.13961191475391388, |
|
"rewards/margins": 0.19473902881145477, |
|
"rewards/rejected": -0.055127132683992386, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3229357798165138, |
|
"grad_norm": 10.400399208068848, |
|
"learning_rate": 4.999175679175577e-06, |
|
"logits/chosen": -1.1097325086593628, |
|
"logits/rejected": -2.1328647136688232, |
|
"logps/chosen": -251.92745971679688, |
|
"logps/rejected": -161.21292114257812, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.25156256556510925, |
|
"rewards/margins": 0.2756442427635193, |
|
"rewards/rejected": -0.02408166043460369, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3376146788990826, |
|
"grad_norm": 11.084893226623535, |
|
"learning_rate": 4.997710443643461e-06, |
|
"logits/chosen": -1.1712064743041992, |
|
"logits/rejected": -2.0722293853759766, |
|
"logps/chosen": -259.9323425292969, |
|
"logps/rejected": -206.37510681152344, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.22726279497146606, |
|
"rewards/margins": 0.24400296807289124, |
|
"rewards/rejected": -0.016740169376134872, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3522935779816514, |
|
"grad_norm": 13.230236053466797, |
|
"learning_rate": 4.995513127188151e-06, |
|
"logits/chosen": -1.0816175937652588, |
|
"logits/rejected": -2.215028762817383, |
|
"logps/chosen": -365.7675476074219, |
|
"logps/rejected": -183.13980102539062, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.34264349937438965, |
|
"rewards/margins": 0.38213008642196655, |
|
"rewards/rejected": -0.03948655351996422, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 11.37851333618164, |
|
"learning_rate": 4.992584373844853e-06, |
|
"logits/chosen": -1.2096611261367798, |
|
"logits/rejected": -2.082951784133911, |
|
"logps/chosen": -345.7232971191406, |
|
"logps/rejected": -184.25949096679688, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.413723886013031, |
|
"rewards/margins": 0.4999043643474579, |
|
"rewards/rejected": -0.08618048578500748, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.381651376146789, |
|
"grad_norm": 9.676469802856445, |
|
"learning_rate": 4.98892504203351e-06, |
|
"logits/chosen": -1.2248896360397339, |
|
"logits/rejected": -2.1341745853424072, |
|
"logps/chosen": -282.0457763671875, |
|
"logps/rejected": -158.89736938476562, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.42736518383026123, |
|
"rewards/margins": 0.5060732960700989, |
|
"rewards/rejected": -0.07870808988809586, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3963302752293578, |
|
"grad_norm": 9.402766227722168, |
|
"learning_rate": 4.9845362043071925e-06, |
|
"logits/chosen": -1.0192848443984985, |
|
"logits/rejected": -2.0682382583618164, |
|
"logps/chosen": -290.6011962890625, |
|
"logps/rejected": -163.6627197265625, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.5030941963195801, |
|
"rewards/margins": 0.6462306380271912, |
|
"rewards/rejected": -0.14313644170761108, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.41100917431192663, |
|
"grad_norm": 10.944356918334961, |
|
"learning_rate": 4.97941914703774e-06, |
|
"logits/chosen": -1.1482800245285034, |
|
"logits/rejected": -2.151231050491333, |
|
"logps/chosen": -287.7913513183594, |
|
"logps/rejected": -201.2919464111328, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6401927471160889, |
|
"rewards/margins": 0.7009615898132324, |
|
"rewards/rejected": -0.06076894700527191, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.42568807339449544, |
|
"grad_norm": 8.618446350097656, |
|
"learning_rate": 4.973575370038718e-06, |
|
"logits/chosen": -1.0707895755767822, |
|
"logits/rejected": -2.049323558807373, |
|
"logps/chosen": -305.2084045410156, |
|
"logps/rejected": -193.321533203125, |
|
"loss": 0.3851, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.8177109956741333, |
|
"rewards/margins": 0.9303702116012573, |
|
"rewards/rejected": -0.11265924572944641, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 7.712850093841553, |
|
"learning_rate": 4.967006586125827e-06, |
|
"logits/chosen": -1.240044355392456, |
|
"logits/rejected": -2.0774481296539307, |
|
"logps/chosen": -301.3046569824219, |
|
"logps/rejected": -186.58460998535156, |
|
"loss": 0.35, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.9353222846984863, |
|
"rewards/margins": 1.0043295621871948, |
|
"rewards/rejected": -0.06900733709335327, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45504587155963305, |
|
"grad_norm": 8.133475303649902, |
|
"learning_rate": 4.959714720614871e-06, |
|
"logits/chosen": -1.1756389141082764, |
|
"logits/rejected": -2.2198028564453125, |
|
"logps/chosen": -319.236083984375, |
|
"logps/rejected": -184.04647827148438, |
|
"loss": 0.3239, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.9475828409194946, |
|
"rewards/margins": 1.177114725112915, |
|
"rewards/rejected": -0.22953176498413086, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.46972477064220186, |
|
"grad_norm": 6.613894462585449, |
|
"learning_rate": 4.951701910757446e-06, |
|
"logits/chosen": -1.1599823236465454, |
|
"logits/rejected": -2.064751148223877, |
|
"logps/chosen": -253.94537353515625, |
|
"logps/rejected": -188.87652587890625, |
|
"loss": 0.3088, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.0339241027832031, |
|
"rewards/margins": 1.2678444385528564, |
|
"rewards/rejected": -0.2339203655719757, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.48440366972477067, |
|
"grad_norm": 8.49493408203125, |
|
"learning_rate": 4.942970505114514e-06, |
|
"logits/chosen": -1.0440397262573242, |
|
"logits/rejected": -2.1136162281036377, |
|
"logps/chosen": -308.4583435058594, |
|
"logps/rejected": -176.34474182128906, |
|
"loss": 0.268, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1736990213394165, |
|
"rewards/margins": 1.4280885457992554, |
|
"rewards/rejected": -0.25438952445983887, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4990825688073395, |
|
"grad_norm": 6.022420883178711, |
|
"learning_rate": 4.933523062868033e-06, |
|
"logits/chosen": -1.0774444341659546, |
|
"logits/rejected": -2.1658172607421875, |
|
"logps/chosen": -269.4661560058594, |
|
"logps/rejected": -164.7786865234375, |
|
"loss": 0.2372, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.3157860040664673, |
|
"rewards/margins": 1.5686390399932861, |
|
"rewards/rejected": -0.25285303592681885, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5137614678899083, |
|
"grad_norm": 4.839372634887695, |
|
"learning_rate": 4.923362353070859e-06, |
|
"logits/chosen": -0.8954001665115356, |
|
"logits/rejected": -2.1572980880737305, |
|
"logps/chosen": -287.38250732421875, |
|
"logps/rejected": -159.82025146484375, |
|
"loss": 0.2079, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.4153721332550049, |
|
"rewards/margins": 1.8501354455947876, |
|
"rewards/rejected": -0.43476346135139465, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5284403669724771, |
|
"grad_norm": 5.355666160583496, |
|
"learning_rate": 4.912491353835138e-06, |
|
"logits/chosen": -1.1590656042099, |
|
"logits/rejected": -2.088367462158203, |
|
"logps/chosen": -260.02386474609375, |
|
"logps/rejected": -185.47396850585938, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.4196313619613647, |
|
"rewards/margins": 1.858705997467041, |
|
"rewards/rejected": -0.43907448649406433, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5431192660550459, |
|
"grad_norm": 4.641209602355957, |
|
"learning_rate": 4.900913251459418e-06, |
|
"logits/chosen": -1.0761524438858032, |
|
"logits/rejected": -2.0451908111572266, |
|
"logps/chosen": -264.9051513671875, |
|
"logps/rejected": -173.16702270507812, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5420759916305542, |
|
"rewards/margins": 2.071654796600342, |
|
"rewards/rejected": -0.5295785069465637, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5577981651376147, |
|
"grad_norm": 4.564330101013184, |
|
"learning_rate": 4.8886314394947396e-06, |
|
"logits/chosen": -0.9936952590942383, |
|
"logits/rejected": -2.070539951324463, |
|
"logps/chosen": -278.8867492675781, |
|
"logps/rejected": -185.91055297851562, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.9348686933517456, |
|
"rewards/margins": 2.52958083152771, |
|
"rewards/rejected": -0.5947118997573853, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5724770642201835, |
|
"grad_norm": 5.782593250274658, |
|
"learning_rate": 4.875649517749985e-06, |
|
"logits/chosen": -1.0427924394607544, |
|
"logits/rejected": -2.180347442626953, |
|
"logps/chosen": -282.06732177734375, |
|
"logps/rejected": -191.30137634277344, |
|
"loss": 0.1548, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.9499953985214233, |
|
"rewards/margins": 2.6421873569488525, |
|
"rewards/rejected": -0.6921918392181396, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"grad_norm": 4.356126308441162, |
|
"learning_rate": 4.861971291236772e-06, |
|
"logits/chosen": -1.134873390197754, |
|
"logits/rejected": -2.047222852706909, |
|
"logps/chosen": -328.65509033203125, |
|
"logps/rejected": -191.76483154296875, |
|
"loss": 0.1841, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.2117769718170166, |
|
"rewards/margins": 2.5882744789123535, |
|
"rewards/rejected": -0.37649768590927124, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6018348623853211, |
|
"grad_norm": 3.70808482170105, |
|
"learning_rate": 4.847600769054201e-06, |
|
"logits/chosen": -1.1773045063018799, |
|
"logits/rejected": -2.071323871612549, |
|
"logps/chosen": -365.7237243652344, |
|
"logps/rejected": -221.5764923095703, |
|
"loss": 0.1093, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.4926247596740723, |
|
"rewards/margins": 3.0623347759246826, |
|
"rewards/rejected": -0.5697098970413208, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6165137614678899, |
|
"grad_norm": 2.8207852840423584, |
|
"learning_rate": 4.832542163213787e-06, |
|
"logits/chosen": -1.0239057540893555, |
|
"logits/rejected": -2.1960628032684326, |
|
"logps/chosen": -261.3912658691406, |
|
"logps/rejected": -155.67286682128906, |
|
"loss": 0.1073, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.281486749649048, |
|
"rewards/margins": 3.123940944671631, |
|
"rewards/rejected": -0.8424541354179382, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6311926605504588, |
|
"grad_norm": 2.6905996799468994, |
|
"learning_rate": 4.816799887404911e-06, |
|
"logits/chosen": -1.2185587882995605, |
|
"logits/rejected": -2.146491289138794, |
|
"logps/chosen": -300.77069091796875, |
|
"logps/rejected": -185.7276153564453, |
|
"loss": 0.1277, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.3313047885894775, |
|
"rewards/margins": 3.0290822982788086, |
|
"rewards/rejected": -0.6977773904800415, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6458715596330276, |
|
"grad_norm": 1.891965389251709, |
|
"learning_rate": 4.800378555701168e-06, |
|
"logits/chosen": -1.056377649307251, |
|
"logits/rejected": -2.001763343811035, |
|
"logps/chosen": -354.14990234375, |
|
"logps/rejected": -186.62448120117188, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 2.5092077255249023, |
|
"rewards/margins": 3.3948686122894287, |
|
"rewards/rejected": -0.8856609463691711, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 3.7145261764526367, |
|
"learning_rate": 4.783282981207979e-06, |
|
"logits/chosen": -1.1021761894226074, |
|
"logits/rejected": -2.2725181579589844, |
|
"logps/chosen": -296.32763671875, |
|
"logps/rejected": -169.7439727783203, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.6951088905334473, |
|
"rewards/margins": 3.6553006172180176, |
|
"rewards/rejected": -0.9601919054985046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6752293577981652, |
|
"grad_norm": 2.50156307220459, |
|
"learning_rate": 4.765518174651864e-06, |
|
"logits/chosen": -1.1074126958847046, |
|
"logits/rejected": -2.051131248474121, |
|
"logps/chosen": -285.9756164550781, |
|
"logps/rejected": -190.58448791503906, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.7018895149230957, |
|
"rewards/margins": 3.8313865661621094, |
|
"rewards/rejected": -1.1294972896575928, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.689908256880734, |
|
"grad_norm": 2.272671699523926, |
|
"learning_rate": 4.747089342911793e-06, |
|
"logits/chosen": -0.9693321585655212, |
|
"logits/rejected": -2.168473720550537, |
|
"logps/chosen": -291.7270812988281, |
|
"logps/rejected": -175.2049560546875, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.954824209213257, |
|
"rewards/margins": 4.128055095672607, |
|
"rewards/rejected": -1.1732308864593506, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7045871559633028, |
|
"grad_norm": 2.2310574054718018, |
|
"learning_rate": 4.728001887493048e-06, |
|
"logits/chosen": -0.9781808853149414, |
|
"logits/rejected": -2.155506134033203, |
|
"logps/chosen": -299.66314697265625, |
|
"logps/rejected": -194.56436157226562, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 3.1443114280700684, |
|
"rewards/margins": 4.226352214813232, |
|
"rewards/rejected": -1.0820410251617432, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7192660550458716, |
|
"grad_norm": 1.7269368171691895, |
|
"learning_rate": 4.708261402944036e-06, |
|
"logits/chosen": -1.0619006156921387, |
|
"logits/rejected": -2.1256189346313477, |
|
"logps/chosen": -315.5987548828125, |
|
"logps/rejected": -188.52439880371094, |
|
"loss": 0.06, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3185007572174072, |
|
"rewards/margins": 4.740314960479736, |
|
"rewards/rejected": -1.4218144416809082, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 3.1934289932250977, |
|
"learning_rate": 4.687873675216522e-06, |
|
"logits/chosen": -0.9534860253334045, |
|
"logits/rejected": -1.9718412160873413, |
|
"logps/chosen": -303.17181396484375, |
|
"logps/rejected": -199.40789794921875, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 3.4963011741638184, |
|
"rewards/margins": 4.650891304016113, |
|
"rewards/rejected": -1.1545898914337158, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7486238532110092, |
|
"grad_norm": 1.0567034482955933, |
|
"learning_rate": 4.666844679969765e-06, |
|
"logits/chosen": -1.287552833557129, |
|
"logits/rejected": -2.272284507751465, |
|
"logps/chosen": -299.2529296875, |
|
"logps/rejected": -208.53785705566406, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.132131814956665, |
|
"rewards/margins": 4.880558967590332, |
|
"rewards/rejected": -1.748427152633667, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.763302752293578, |
|
"grad_norm": 1.3455036878585815, |
|
"learning_rate": 4.6451805808190464e-06, |
|
"logits/chosen": -1.049391508102417, |
|
"logits/rejected": -2.1182594299316406, |
|
"logps/chosen": -284.2237548828125, |
|
"logps/rejected": -176.08627319335938, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.437601089477539, |
|
"rewards/margins": 5.258786678314209, |
|
"rewards/rejected": -1.821185827255249, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7779816513761468, |
|
"grad_norm": 1.6037604808807373, |
|
"learning_rate": 4.622887727529104e-06, |
|
"logits/chosen": -1.0589053630828857, |
|
"logits/rejected": -2.095472812652588, |
|
"logps/chosen": -257.8381042480469, |
|
"logps/rejected": -207.0792236328125, |
|
"loss": 0.029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.2390363216400146, |
|
"rewards/margins": 5.350650310516357, |
|
"rewards/rejected": -2.1116137504577637, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7926605504587156, |
|
"grad_norm": 1.9005062580108643, |
|
"learning_rate": 4.599972654153018e-06, |
|
"logits/chosen": -0.9298142194747925, |
|
"logits/rejected": -2.0814666748046875, |
|
"logps/chosen": -301.68865966796875, |
|
"logps/rejected": -174.01010131835938, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7089574337005615, |
|
"rewards/margins": 5.569860458374023, |
|
"rewards/rejected": -1.8609036207199097, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8073394495412844, |
|
"grad_norm": 0.712770402431488, |
|
"learning_rate": 4.5764420771170735e-06, |
|
"logits/chosen": -0.9678480625152588, |
|
"logits/rejected": -2.0447123050689697, |
|
"logps/chosen": -278.64398193359375, |
|
"logps/rejected": -192.5853729248047, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.513016939163208, |
|
"rewards/margins": 5.6180419921875, |
|
"rewards/rejected": -2.105024814605713, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8220183486238533, |
|
"grad_norm": 1.3919163942337036, |
|
"learning_rate": 4.552302893252166e-06, |
|
"logits/chosen": -1.2199370861053467, |
|
"logits/rejected": -2.197056293487549, |
|
"logps/chosen": -306.26080322265625, |
|
"logps/rejected": -205.06845092773438, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.367818832397461, |
|
"rewards/margins": 5.316436290740967, |
|
"rewards/rejected": -1.948617696762085, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8366972477064221, |
|
"grad_norm": 3.037362575531006, |
|
"learning_rate": 4.52756217777234e-06, |
|
"logits/chosen": -1.2299991846084595, |
|
"logits/rejected": -2.1640126705169678, |
|
"logps/chosen": -311.70574951171875, |
|
"logps/rejected": -207.38746643066406, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.595170021057129, |
|
"rewards/margins": 5.463950157165527, |
|
"rewards/rejected": -1.8687800168991089, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8513761467889909, |
|
"grad_norm": 0.8069730401039124, |
|
"learning_rate": 4.502227182201035e-06, |
|
"logits/chosen": -0.9528835415840149, |
|
"logits/rejected": -1.977004051208496, |
|
"logps/chosen": -264.5509033203125, |
|
"logps/rejected": -174.93551635742188, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.867180109024048, |
|
"rewards/margins": 6.007584571838379, |
|
"rewards/rejected": -2.140403985977173, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.8660550458715597, |
|
"grad_norm": 1.4102082252502441, |
|
"learning_rate": 4.476305332245662e-06, |
|
"logits/chosen": -1.0918750762939453, |
|
"logits/rejected": -2.3146743774414062, |
|
"logps/chosen": -314.5960998535156, |
|
"logps/rejected": -152.3535614013672, |
|
"loss": 0.028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.734642744064331, |
|
"rewards/margins": 6.135974407196045, |
|
"rewards/rejected": -2.4013314247131348, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 2.8867928981781006, |
|
"learning_rate": 4.449804225621116e-06, |
|
"logits/chosen": -1.0288662910461426, |
|
"logits/rejected": -2.0701658725738525, |
|
"logps/chosen": -279.2713317871094, |
|
"logps/rejected": -180.374267578125, |
|
"loss": 0.0485, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 3.5982298851013184, |
|
"rewards/margins": 5.633719444274902, |
|
"rewards/rejected": -2.0354888439178467, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8954128440366973, |
|
"grad_norm": 0.7778434753417969, |
|
"learning_rate": 4.422731629822887e-06, |
|
"logits/chosen": -0.9540915489196777, |
|
"logits/rejected": -1.9875534772872925, |
|
"logps/chosen": -314.85003662109375, |
|
"logps/rejected": -194.16896057128906, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.723828077316284, |
|
"rewards/margins": 6.088706016540527, |
|
"rewards/rejected": -2.364877462387085, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9100917431192661, |
|
"grad_norm": 1.9667764902114868, |
|
"learning_rate": 4.395095479850396e-06, |
|
"logits/chosen": -0.9676120281219482, |
|
"logits/rejected": -1.9072697162628174, |
|
"logps/chosen": -287.99981689453125, |
|
"logps/rejected": -186.82659912109375, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7472550868988037, |
|
"rewards/margins": 6.025314807891846, |
|
"rewards/rejected": -2.278059482574463, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.9247706422018349, |
|
"grad_norm": 0.4268924593925476, |
|
"learning_rate": 4.366903875881243e-06, |
|
"logits/chosen": -1.0968043804168701, |
|
"logits/rejected": -2.334925651550293, |
|
"logps/chosen": -275.3115234375, |
|
"logps/rejected": -164.202392578125, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.771523952484131, |
|
"rewards/margins": 6.7282609939575195, |
|
"rewards/rejected": -2.9567372798919678, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9394495412844037, |
|
"grad_norm": 1.4270014762878418, |
|
"learning_rate": 4.3381650808970365e-06, |
|
"logits/chosen": -1.0460113286972046, |
|
"logits/rejected": -1.9695379734039307, |
|
"logps/chosen": -254.8202667236328, |
|
"logps/rejected": -185.63243103027344, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.706533432006836, |
|
"rewards/margins": 6.099806785583496, |
|
"rewards/rejected": -2.39327335357666, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9541284403669725, |
|
"grad_norm": 0.6754117012023926, |
|
"learning_rate": 4.308887518261507e-06, |
|
"logits/chosen": -0.8909565210342407, |
|
"logits/rejected": -1.9432121515274048, |
|
"logps/chosen": -278.40216064453125, |
|
"logps/rejected": -195.16552734375, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.237884521484375, |
|
"rewards/margins": 6.639657974243164, |
|
"rewards/rejected": -2.4017739295959473, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9688073394495413, |
|
"grad_norm": 0.7388483285903931, |
|
"learning_rate": 4.279079769251617e-06, |
|
"logits/chosen": -1.2244815826416016, |
|
"logits/rejected": -2.1885085105895996, |
|
"logps/chosen": -351.21783447265625, |
|
"logps/rejected": -210.98890686035156, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.041647911071777, |
|
"rewards/margins": 6.690797328948975, |
|
"rewards/rejected": -2.6491494178771973, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9834862385321101, |
|
"grad_norm": 0.7370263934135437, |
|
"learning_rate": 4.248750570542373e-06, |
|
"logits/chosen": -1.0081679821014404, |
|
"logits/rejected": -2.0711734294891357, |
|
"logps/chosen": -272.2639465332031, |
|
"logps/rejected": -179.82412719726562, |
|
"loss": 0.0231, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.885261058807373, |
|
"rewards/margins": 6.468730449676514, |
|
"rewards/rejected": -2.5834696292877197, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.998165137614679, |
|
"grad_norm": 2.1839847564697266, |
|
"learning_rate": 4.21790881164611e-06, |
|
"logits/chosen": -0.9589763879776001, |
|
"logits/rejected": -2.103942394256592, |
|
"logps/chosen": -282.6980285644531, |
|
"logps/rejected": -193.8739776611328, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.211104869842529, |
|
"rewards/margins": 7.221211910247803, |
|
"rewards/rejected": -3.0101072788238525, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.0128440366972478, |
|
"grad_norm": 2.379425525665283, |
|
"learning_rate": 4.186563532306957e-06, |
|
"logits/chosen": -0.9432098865509033, |
|
"logits/rejected": -2.0608460903167725, |
|
"logps/chosen": -288.9028625488281, |
|
"logps/rejected": -168.07359313964844, |
|
"loss": 0.028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.061460494995117, |
|
"rewards/margins": 7.086147308349609, |
|
"rewards/rejected": -3.0246872901916504, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.0275229357798166, |
|
"grad_norm": 2.2438290119171143, |
|
"learning_rate": 4.154723919851291e-06, |
|
"logits/chosen": -1.1197127103805542, |
|
"logits/rejected": -2.0973258018493652, |
|
"logps/chosen": -290.60296630859375, |
|
"logps/rejected": -173.36465454101562, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.5277624130249023, |
|
"rewards/margins": 6.31058406829834, |
|
"rewards/rejected": -2.7828218936920166, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0422018348623854, |
|
"grad_norm": 0.38025742769241333, |
|
"learning_rate": 4.122399306494918e-06, |
|
"logits/chosen": -1.1321005821228027, |
|
"logits/rejected": -2.2533721923828125, |
|
"logps/chosen": -336.11224365234375, |
|
"logps/rejected": -198.53457641601562, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.0418477058410645, |
|
"rewards/margins": 7.016913890838623, |
|
"rewards/rejected": -2.975067138671875, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0568807339449542, |
|
"grad_norm": 1.0832823514938354, |
|
"learning_rate": 4.089599166607794e-06, |
|
"logits/chosen": -1.0980923175811768, |
|
"logits/rejected": -2.007105588912964, |
|
"logps/chosen": -292.0760803222656, |
|
"logps/rejected": -186.78787231445312, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.942317008972168, |
|
"rewards/margins": 7.760876178741455, |
|
"rewards/rejected": -3.818559169769287, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.071559633027523, |
|
"grad_norm": 0.542005717754364, |
|
"learning_rate": 4.05633311393708e-06, |
|
"logits/chosen": -0.9787145853042603, |
|
"logits/rejected": -2.0150396823883057, |
|
"logps/chosen": -257.6767883300781, |
|
"logps/rejected": -172.47512817382812, |
|
"loss": 0.0268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.031050682067871, |
|
"rewards/margins": 7.134464263916016, |
|
"rewards/rejected": -3.1034140586853027, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.0862385321100918, |
|
"grad_norm": 1.513509750366211, |
|
"learning_rate": 4.022610898789349e-06, |
|
"logits/chosen": -1.008697509765625, |
|
"logits/rejected": -2.0967135429382324, |
|
"logps/chosen": -266.4443664550781, |
|
"logps/rejected": -186.60263061523438, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.120553970336914, |
|
"rewards/margins": 7.502930641174316, |
|
"rewards/rejected": -3.382376194000244, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 1.2189836502075195, |
|
"learning_rate": 3.988442405172755e-06, |
|
"logits/chosen": -0.8885701894760132, |
|
"logits/rejected": -2.0014257431030273, |
|
"logps/chosen": -281.70147705078125, |
|
"logps/rejected": -201.9718780517578, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.45000696182251, |
|
"rewards/margins": 7.705287933349609, |
|
"rewards/rejected": -3.2552807331085205, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1155963302752294, |
|
"grad_norm": 0.2563473880290985, |
|
"learning_rate": 3.953837647900031e-06, |
|
"logits/chosen": -0.9757863283157349, |
|
"logits/rejected": -2.0974419116973877, |
|
"logps/chosen": -273.5846862792969, |
|
"logps/rejected": -195.75936889648438, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.658951759338379, |
|
"rewards/margins": 8.117878913879395, |
|
"rewards/rejected": -3.4589266777038574, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.1302752293577982, |
|
"grad_norm": 2.6809535026550293, |
|
"learning_rate": 3.918806769653135e-06, |
|
"logits/chosen": -0.8756412863731384, |
|
"logits/rejected": -1.9975080490112305, |
|
"logps/chosen": -318.453857421875, |
|
"logps/rejected": -195.71372985839844, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.540386199951172, |
|
"rewards/margins": 7.758340358734131, |
|
"rewards/rejected": -3.217954158782959, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.144954128440367, |
|
"grad_norm": 0.34194982051849365, |
|
"learning_rate": 3.88336003801042e-06, |
|
"logits/chosen": -0.9494649171829224, |
|
"logits/rejected": -2.052715301513672, |
|
"logps/chosen": -255.02169799804688, |
|
"logps/rejected": -178.14224243164062, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8328986167907715, |
|
"rewards/margins": 7.075186729431152, |
|
"rewards/rejected": -3.2422876358032227, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.1596330275229358, |
|
"grad_norm": 0.37112390995025635, |
|
"learning_rate": 3.847507842437205e-06, |
|
"logits/chosen": -0.8547274470329285, |
|
"logits/rejected": -2.1034629344940186, |
|
"logps/chosen": -296.8822021484375, |
|
"logps/rejected": -171.6925048828125, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.401730060577393, |
|
"rewards/margins": 8.271763801574707, |
|
"rewards/rejected": -3.8700337409973145, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 0.6065702438354492, |
|
"learning_rate": 3.811260691240604e-06, |
|
"logits/chosen": -0.894873857498169, |
|
"logits/rejected": -2.086596965789795, |
|
"logps/chosen": -340.1643981933594, |
|
"logps/rejected": -188.5568389892578, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.769918441772461, |
|
"rewards/margins": 8.063861846923828, |
|
"rewards/rejected": -3.293943166732788, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1889908256880735, |
|
"grad_norm": 0.38738325238227844, |
|
"learning_rate": 3.774629208489547e-06, |
|
"logits/chosen": -0.9661360383033752, |
|
"logits/rejected": -2.0905256271362305, |
|
"logps/chosen": -241.7164764404297, |
|
"logps/rejected": -172.8728790283203, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.948549270629883, |
|
"rewards/margins": 7.093012809753418, |
|
"rewards/rejected": -3.1444640159606934, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.2036697247706423, |
|
"grad_norm": 0.4064182639122009, |
|
"learning_rate": 3.7376241309008433e-06, |
|
"logits/chosen": -1.1252474784851074, |
|
"logits/rejected": -2.123969793319702, |
|
"logps/chosen": -326.73370361328125, |
|
"logps/rejected": -183.0895233154297, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.725476264953613, |
|
"rewards/margins": 7.916323661804199, |
|
"rewards/rejected": -3.190847635269165, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.218348623853211, |
|
"grad_norm": 0.3772048056125641, |
|
"learning_rate": 3.7002563046922502e-06, |
|
"logits/chosen": -1.0913598537445068, |
|
"logits/rejected": -2.229214668273926, |
|
"logps/chosen": -326.4932861328125, |
|
"logps/rejected": -173.02989196777344, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.724410533905029, |
|
"rewards/margins": 8.843596458435059, |
|
"rewards/rejected": -4.119184970855713, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.2330275229357799, |
|
"grad_norm": 0.48601555824279785, |
|
"learning_rate": 3.6625366824034337e-06, |
|
"logits/chosen": -0.8681567907333374, |
|
"logits/rejected": -2.067228317260742, |
|
"logps/chosen": -279.7916259765625, |
|
"logps/rejected": -206.9775390625, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.7722625732421875, |
|
"rewards/margins": 9.153127670288086, |
|
"rewards/rejected": -4.380865097045898, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.2477064220183487, |
|
"grad_norm": 1.1044621467590332, |
|
"learning_rate": 3.6244763196857714e-06, |
|
"logits/chosen": -0.9898172616958618, |
|
"logits/rejected": -2.130460262298584, |
|
"logps/chosen": -296.6734619140625, |
|
"logps/rejected": -181.456298828125, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.674115180969238, |
|
"rewards/margins": 8.792963981628418, |
|
"rewards/rejected": -4.1188483238220215, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2623853211009175, |
|
"grad_norm": 1.3868632316589355, |
|
"learning_rate": 3.5860863720619333e-06, |
|
"logits/chosen": -1.0125945806503296, |
|
"logits/rejected": -2.080739736557007, |
|
"logps/chosen": -289.3682861328125, |
|
"logps/rejected": -184.58253479003906, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.030690670013428, |
|
"rewards/margins": 8.200502395629883, |
|
"rewards/rejected": -3.1698126792907715, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2770642201834863, |
|
"grad_norm": 0.4792233407497406, |
|
"learning_rate": 3.547378091656186e-06, |
|
"logits/chosen": -0.9022351503372192, |
|
"logits/rejected": -2.0722413063049316, |
|
"logps/chosen": -293.7245178222656, |
|
"logps/rejected": -173.53054809570312, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.439302444458008, |
|
"rewards/margins": 8.599308013916016, |
|
"rewards/rejected": -4.160006046295166, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.2917431192660551, |
|
"grad_norm": 0.7374489903450012, |
|
"learning_rate": 3.5083628238963913e-06, |
|
"logits/chosen": -1.085463047027588, |
|
"logits/rejected": -1.969193935394287, |
|
"logps/chosen": -234.489013671875, |
|
"logps/rejected": -175.44613647460938, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.414360523223877, |
|
"rewards/margins": 7.9778642654418945, |
|
"rewards/rejected": -3.5635030269622803, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.306422018348624, |
|
"grad_norm": 1.7487801313400269, |
|
"learning_rate": 3.4690520041886473e-06, |
|
"logits/chosen": -0.9150568246841431, |
|
"logits/rejected": -2.0502333641052246, |
|
"logps/chosen": -275.4502258300781, |
|
"logps/rejected": -212.3257598876953, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.58690071105957, |
|
"rewards/margins": 8.234541893005371, |
|
"rewards/rejected": -3.647641181945801, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 0.12792479991912842, |
|
"learning_rate": 3.4294571545655653e-06, |
|
"logits/chosen": -0.91706383228302, |
|
"logits/rejected": -2.196730613708496, |
|
"logps/chosen": -293.5966796875, |
|
"logps/rejected": -180.54701232910156, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.876993656158447, |
|
"rewards/margins": 9.287820816040039, |
|
"rewards/rejected": -4.41082763671875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3357798165137615, |
|
"grad_norm": 1.574942708015442, |
|
"learning_rate": 3.38958988030915e-06, |
|
"logits/chosen": -1.1890692710876465, |
|
"logits/rejected": -2.066960334777832, |
|
"logps/chosen": -274.7825622558594, |
|
"logps/rejected": -224.47837829589844, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.447505950927734, |
|
"rewards/margins": 8.213159561157227, |
|
"rewards/rejected": -3.7656538486480713, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.3504587155963304, |
|
"grad_norm": 1.0345042943954468, |
|
"learning_rate": 3.3494618665492833e-06, |
|
"logits/chosen": -1.1099860668182373, |
|
"logits/rejected": -2.0204684734344482, |
|
"logps/chosen": -255.40478515625, |
|
"logps/rejected": -192.52752685546875, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.036734104156494, |
|
"rewards/margins": 7.486913681030273, |
|
"rewards/rejected": -3.4501795768737793, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.3651376146788992, |
|
"grad_norm": 0.1829257309436798, |
|
"learning_rate": 3.3090848748388042e-06, |
|
"logits/chosen": -1.0115846395492554, |
|
"logits/rejected": -2.1213629245758057, |
|
"logps/chosen": -353.5410461425781, |
|
"logps/rejected": -192.9250030517578, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.421483993530273, |
|
"rewards/margins": 8.999296188354492, |
|
"rewards/rejected": -4.577812194824219, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.379816513761468, |
|
"grad_norm": 0.3030329942703247, |
|
"learning_rate": 3.2684707397061887e-06, |
|
"logits/chosen": -1.0969910621643066, |
|
"logits/rejected": -2.0923759937286377, |
|
"logps/chosen": -293.1423645019531, |
|
"logps/rejected": -173.88784790039062, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.567940711975098, |
|
"rewards/margins": 8.394798278808594, |
|
"rewards/rejected": -3.8268580436706543, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.3944954128440368, |
|
"grad_norm": 0.8538657426834106, |
|
"learning_rate": 3.2276313651868364e-06, |
|
"logits/chosen": -0.9523632526397705, |
|
"logits/rejected": -2.0854203701019287, |
|
"logps/chosen": -297.4543762207031, |
|
"logps/rejected": -162.13568115234375, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.548934459686279, |
|
"rewards/margins": 8.458111763000488, |
|
"rewards/rejected": -3.909177541732788, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.4091743119266056, |
|
"grad_norm": 0.4353146553039551, |
|
"learning_rate": 3.1865787213339926e-06, |
|
"logits/chosen": -0.9564714431762695, |
|
"logits/rejected": -2.0908193588256836, |
|
"logps/chosen": -281.3487243652344, |
|
"logps/rejected": -186.54757690429688, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.7654829025268555, |
|
"rewards/margins": 9.230217933654785, |
|
"rewards/rejected": -4.464734077453613, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.4238532110091744, |
|
"grad_norm": 0.40312162041664124, |
|
"learning_rate": 3.1453248407103156e-06, |
|
"logits/chosen": -0.9966449737548828, |
|
"logits/rejected": -2.1248295307159424, |
|
"logps/chosen": -287.3999328613281, |
|
"logps/rejected": -169.44496154785156, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.19376802444458, |
|
"rewards/margins": 8.428169250488281, |
|
"rewards/rejected": -4.234401702880859, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.4385321100917432, |
|
"grad_norm": 1.217081904411316, |
|
"learning_rate": 3.1038818148611178e-06, |
|
"logits/chosen": -1.022183895111084, |
|
"logits/rejected": -2.0069739818573, |
|
"logps/chosen": -312.16973876953125, |
|
"logps/rejected": -181.82955932617188, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.685446262359619, |
|
"rewards/margins": 8.785126686096191, |
|
"rewards/rejected": -4.099679946899414, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.453211009174312, |
|
"grad_norm": 0.6292124390602112, |
|
"learning_rate": 3.062261790770331e-06, |
|
"logits/chosen": -0.8997288942337036, |
|
"logits/rejected": -1.9895069599151611, |
|
"logps/chosen": -259.7031555175781, |
|
"logps/rejected": -182.4678497314453, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.453324794769287, |
|
"rewards/margins": 7.995000839233398, |
|
"rewards/rejected": -3.541675567626953, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 0.46019911766052246, |
|
"learning_rate": 3.0204769673002123e-06, |
|
"logits/chosen": -0.981975793838501, |
|
"logits/rejected": -2.123629331588745, |
|
"logps/chosen": -333.59722900390625, |
|
"logps/rejected": -198.2655487060547, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.988855838775635, |
|
"rewards/margins": 8.937817573547363, |
|
"rewards/rejected": -3.948960781097412, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4825688073394496, |
|
"grad_norm": 1.5788525342941284, |
|
"learning_rate": 2.978539591615848e-06, |
|
"logits/chosen": -1.0232621431350708, |
|
"logits/rejected": -1.9014160633087158, |
|
"logps/chosen": -299.21649169921875, |
|
"logps/rejected": -196.33389282226562, |
|
"loss": 0.0167, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.068594932556152, |
|
"rewards/margins": 8.521183013916016, |
|
"rewards/rejected": -4.452587604522705, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.4972477064220184, |
|
"grad_norm": 0.18567878007888794, |
|
"learning_rate": 2.936461955595501e-06, |
|
"logits/chosen": -1.0283303260803223, |
|
"logits/rejected": -2.1100425720214844, |
|
"logps/chosen": -298.8528137207031, |
|
"logps/rejected": -191.35086059570312, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.9132208824157715, |
|
"rewards/margins": 8.617488861083984, |
|
"rewards/rejected": -3.704267978668213, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.5119266055045872, |
|
"grad_norm": 0.13169872760772705, |
|
"learning_rate": 2.8942563922278487e-06, |
|
"logits/chosen": -1.0413228273391724, |
|
"logits/rejected": -2.1321609020233154, |
|
"logps/chosen": -286.2435607910156, |
|
"logps/rejected": -196.66256713867188, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.733799457550049, |
|
"rewards/margins": 9.53592586517334, |
|
"rewards/rejected": -4.802126407623291, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.526605504587156, |
|
"grad_norm": 2.0917320251464844, |
|
"learning_rate": 2.8519352719971783e-06, |
|
"logits/chosen": -1.097141981124878, |
|
"logits/rejected": -2.0799503326416016, |
|
"logps/chosen": -316.93597412109375, |
|
"logps/rejected": -201.67100524902344, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.9373931884765625, |
|
"rewards/margins": 8.860273361206055, |
|
"rewards/rejected": -3.9228808879852295, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 0.6296855807304382, |
|
"learning_rate": 2.8095109992575824e-06, |
|
"logits/chosen": -0.9797852039337158, |
|
"logits/rejected": -2.088029146194458, |
|
"logps/chosen": -328.76251220703125, |
|
"logps/rejected": -201.71078491210938, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.399056911468506, |
|
"rewards/margins": 9.34478759765625, |
|
"rewards/rejected": -3.945730209350586, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.5559633027522937, |
|
"grad_norm": 0.3409838080406189, |
|
"learning_rate": 2.7669960085972407e-06, |
|
"logits/chosen": -0.9346829652786255, |
|
"logits/rejected": -2.2055399417877197, |
|
"logps/chosen": -351.57489013671875, |
|
"logps/rejected": -219.8714141845703, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.243146896362305, |
|
"rewards/margins": 9.388729095458984, |
|
"rewards/rejected": -4.145582675933838, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.5706422018348625, |
|
"grad_norm": 0.791716456413269, |
|
"learning_rate": 2.7244027611938247e-06, |
|
"logits/chosen": -0.8380637764930725, |
|
"logits/rejected": -1.925654649734497, |
|
"logps/chosen": -251.362548828125, |
|
"logps/rejected": -220.16436767578125, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.47086763381958, |
|
"rewards/margins": 8.536866188049316, |
|
"rewards/rejected": -4.065998554229736, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.5853211009174313, |
|
"grad_norm": 0.2506906986236572, |
|
"learning_rate": 2.6817437411621194e-06, |
|
"logits/chosen": -0.9830411076545715, |
|
"logits/rejected": -2.0578300952911377, |
|
"logps/chosen": -342.7967529296875, |
|
"logps/rejected": -237.17025756835938, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.036384582519531, |
|
"rewards/margins": 9.013383865356445, |
|
"rewards/rejected": -3.976999521255493, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.22265683114528656, |
|
"learning_rate": 2.639031451894923e-06, |
|
"logits/chosen": -1.028990387916565, |
|
"logits/rejected": -1.9095451831817627, |
|
"logps/chosen": -330.1585998535156, |
|
"logps/rejected": -222.2620086669922, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.05686616897583, |
|
"rewards/margins": 9.352256774902344, |
|
"rewards/rejected": -4.2953901290893555, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.614678899082569, |
|
"grad_norm": 0.857473611831665, |
|
"learning_rate": 2.5962784123982843e-06, |
|
"logits/chosen": -1.049895167350769, |
|
"logits/rejected": -2.1700665950775146, |
|
"logps/chosen": -305.7288513183594, |
|
"logps/rejected": -198.574462890625, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.7103071212768555, |
|
"rewards/margins": 9.439103126525879, |
|
"rewards/rejected": -4.728795528411865, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.6293577981651377, |
|
"grad_norm": 0.2698463797569275, |
|
"learning_rate": 2.5534971536221804e-06, |
|
"logits/chosen": -0.861595630645752, |
|
"logits/rejected": -1.9534931182861328, |
|
"logps/chosen": -268.7453918457031, |
|
"logps/rejected": -191.55238342285156, |
|
"loss": 0.024, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.435842514038086, |
|
"rewards/margins": 9.0311918258667, |
|
"rewards/rejected": -4.595349311828613, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.6440366972477065, |
|
"grad_norm": 1.8740975856781006, |
|
"learning_rate": 2.5107002147876814e-06, |
|
"logits/chosen": -1.010701298713684, |
|
"logits/rejected": -1.9186618328094482, |
|
"logps/chosen": -263.1980895996094, |
|
"logps/rejected": -206.22360229492188, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.652899742126465, |
|
"rewards/margins": 9.141688346862793, |
|
"rewards/rejected": -4.488787651062012, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.6587155963302753, |
|
"grad_norm": 2.0281364917755127, |
|
"learning_rate": 2.467900139711693e-06, |
|
"logits/chosen": -1.0440551042556763, |
|
"logits/rejected": -1.971301555633545, |
|
"logps/chosen": -272.8301696777344, |
|
"logps/rejected": -197.07269287109375, |
|
"loss": 0.0238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.243446350097656, |
|
"rewards/margins": 8.737079620361328, |
|
"rewards/rejected": -4.493633270263672, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.6733944954128441, |
|
"grad_norm": 0.23005536198616028, |
|
"learning_rate": 2.4251094731303586e-06, |
|
"logits/chosen": -0.9269182085990906, |
|
"logits/rejected": -2.089838981628418, |
|
"logps/chosen": -291.0529479980469, |
|
"logps/rejected": -179.94895935058594, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.278434753417969, |
|
"rewards/margins": 9.40982723236084, |
|
"rewards/rejected": -4.131391525268555, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.688073394495413, |
|
"grad_norm": 0.0681939348578453, |
|
"learning_rate": 2.3823407570221812e-06, |
|
"logits/chosen": -0.8353657126426697, |
|
"logits/rejected": -2.02689266204834, |
|
"logps/chosen": -300.3406982421875, |
|
"logps/rejected": -175.13296508789062, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.921877384185791, |
|
"rewards/margins": 9.119339942932129, |
|
"rewards/rejected": -4.197463035583496, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.7027522935779817, |
|
"grad_norm": 0.24333705008029938, |
|
"learning_rate": 2.3396065269319655e-06, |
|
"logits/chosen": -1.0092397928237915, |
|
"logits/rejected": -2.1053268909454346, |
|
"logps/chosen": -300.02294921875, |
|
"logps/rejected": -172.78187561035156, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.987635612487793, |
|
"rewards/margins": 9.38530445098877, |
|
"rewards/rejected": -4.397668361663818, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.7174311926605506, |
|
"grad_norm": 0.6797487139701843, |
|
"learning_rate": 2.2969193082966353e-06, |
|
"logits/chosen": -0.8851895332336426, |
|
"logits/rejected": -2.036161422729492, |
|
"logps/chosen": -285.2466735839844, |
|
"logps/rejected": -189.85882568359375, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.067909240722656, |
|
"rewards/margins": 9.919548988342285, |
|
"rewards/rejected": -4.851640701293945, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.7321100917431194, |
|
"grad_norm": 0.6367282271385193, |
|
"learning_rate": 2.2542916127740194e-06, |
|
"logits/chosen": -0.8543779253959656, |
|
"logits/rejected": -1.752845048904419, |
|
"logps/chosen": -312.6046142578125, |
|
"logps/rejected": -234.28988647460938, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.974181175231934, |
|
"rewards/margins": 9.56247329711914, |
|
"rewards/rejected": -4.588292121887207, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.7467889908256882, |
|
"grad_norm": 0.2897071838378906, |
|
"learning_rate": 2.211735934575674e-06, |
|
"logits/chosen": -0.9410618543624878, |
|
"logits/rejected": -2.174349308013916, |
|
"logps/chosen": -281.28863525390625, |
|
"logps/rejected": -165.9616241455078, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.256411552429199, |
|
"rewards/margins": 9.012039184570312, |
|
"rewards/rejected": -4.755627632141113, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 0.41199827194213867, |
|
"learning_rate": 2.1692647468048235e-06, |
|
"logits/chosen": -1.0583674907684326, |
|
"logits/rejected": -2.0003695487976074, |
|
"logps/chosen": -307.04766845703125, |
|
"logps/rejected": -206.0718231201172, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.970597743988037, |
|
"rewards/margins": 10.884113311767578, |
|
"rewards/rejected": -5.913515567779541, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7761467889908258, |
|
"grad_norm": 0.5175734162330627, |
|
"learning_rate": 2.126890497800477e-06, |
|
"logits/chosen": -1.0432560443878174, |
|
"logits/rejected": -1.912244439125061, |
|
"logps/chosen": -297.5209655761719, |
|
"logps/rejected": -201.7635498046875, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.594603538513184, |
|
"rewards/margins": 8.852926254272461, |
|
"rewards/rejected": -4.258323669433594, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.7908256880733946, |
|
"grad_norm": 0.6547983288764954, |
|
"learning_rate": 2.084625607488816e-06, |
|
"logits/chosen": -0.9311404228210449, |
|
"logits/rejected": -2.1106457710266113, |
|
"logps/chosen": -275.57183837890625, |
|
"logps/rejected": -188.14370727539062, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.0517778396606445, |
|
"rewards/margins": 9.953323364257812, |
|
"rewards/rejected": -4.901544094085693, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.8055045871559634, |
|
"grad_norm": 0.33203306794166565, |
|
"learning_rate": 2.0424824637428995e-06, |
|
"logits/chosen": -0.9116280674934387, |
|
"logits/rejected": -2.247035026550293, |
|
"logps/chosen": -267.2120361328125, |
|
"logps/rejected": -171.6895751953125, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.797155857086182, |
|
"rewards/margins": 9.531312942504883, |
|
"rewards/rejected": -4.734157562255859, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.8201834862385322, |
|
"grad_norm": 0.5430265069007874, |
|
"learning_rate": 2.0004734187517744e-06, |
|
"logits/chosen": -1.082189917564392, |
|
"logits/rejected": -1.9552661180496216, |
|
"logps/chosen": -318.3630676269531, |
|
"logps/rejected": -176.5325469970703, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.9373297691345215, |
|
"rewards/margins": 9.501139640808105, |
|
"rewards/rejected": -4.563809871673584, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.19558808207511902, |
|
"learning_rate": 1.9586107854000327e-06, |
|
"logits/chosen": -1.1152639389038086, |
|
"logits/rejected": -2.129647731781006, |
|
"logps/chosen": -296.6053466796875, |
|
"logps/rejected": -169.00213623046875, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.840038299560547, |
|
"rewards/margins": 9.542232513427734, |
|
"rewards/rejected": -4.702193737030029, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8495412844036698, |
|
"grad_norm": 0.45886340737342834, |
|
"learning_rate": 1.916906833658899e-06, |
|
"logits/chosen": -0.8982828855514526, |
|
"logits/rejected": -2.0570406913757324, |
|
"logps/chosen": -324.3260803222656, |
|
"logps/rejected": -216.9913330078125, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.856814861297607, |
|
"rewards/margins": 9.941521644592285, |
|
"rewards/rejected": -5.084706783294678, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.8642201834862386, |
|
"grad_norm": 0.9119444489479065, |
|
"learning_rate": 1.8753737869898921e-06, |
|
"logits/chosen": -0.972162663936615, |
|
"logits/rejected": -2.016150951385498, |
|
"logps/chosen": -248.53463745117188, |
|
"logps/rejected": -184.32382202148438, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.359373092651367, |
|
"rewards/margins": 10.132734298706055, |
|
"rewards/rejected": -5.7733612060546875, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.8788990825688074, |
|
"grad_norm": 0.12387188524007797, |
|
"learning_rate": 1.8340238187621185e-06, |
|
"logits/chosen": -0.8442805409431458, |
|
"logits/rejected": -1.9759818315505981, |
|
"logps/chosen": -262.56671142578125, |
|
"logps/rejected": -175.5653839111328, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.67296028137207, |
|
"rewards/margins": 9.085709571838379, |
|
"rewards/rejected": -4.412749290466309, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.8935779816513763, |
|
"grad_norm": 0.5057358145713806, |
|
"learning_rate": 1.7928690486842438e-06, |
|
"logits/chosen": -1.015974760055542, |
|
"logits/rejected": -2.1081368923187256, |
|
"logps/chosen": -253.27394104003906, |
|
"logps/rejected": -160.09469604492188, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.807024002075195, |
|
"rewards/margins": 9.237527847290039, |
|
"rewards/rejected": -4.4305033683776855, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.908256880733945, |
|
"grad_norm": 0.6048524379730225, |
|
"learning_rate": 1.7519215392522026e-06, |
|
"logits/chosen": -0.9711456298828125, |
|
"logits/rejected": -2.1203389167785645, |
|
"logps/chosen": -282.3438720703125, |
|
"logps/rejected": -166.2510528564453, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.09495735168457, |
|
"rewards/margins": 9.728025436401367, |
|
"rewards/rejected": -4.633067607879639, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.9229357798165139, |
|
"grad_norm": 0.5542910695075989, |
|
"learning_rate": 1.7111932922136715e-06, |
|
"logits/chosen": -0.9748891592025757, |
|
"logits/rejected": -1.8318710327148438, |
|
"logps/chosen": -253.21209716796875, |
|
"logps/rejected": -202.5255889892578, |
|
"loss": 0.019, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.323376178741455, |
|
"rewards/margins": 9.282448768615723, |
|
"rewards/rejected": -4.959072589874268, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.9376146788990827, |
|
"grad_norm": 0.24626314640045166, |
|
"learning_rate": 1.6706962450503408e-06, |
|
"logits/chosen": -0.8283478617668152, |
|
"logits/rejected": -2.0624401569366455, |
|
"logps/chosen": -282.2995300292969, |
|
"logps/rejected": -189.75595092773438, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.900084495544434, |
|
"rewards/margins": 10.33100414276123, |
|
"rewards/rejected": -5.4309186935424805, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.9522935779816515, |
|
"grad_norm": 1.167913794517517, |
|
"learning_rate": 1.630442267479034e-06, |
|
"logits/chosen": -0.789318323135376, |
|
"logits/rejected": -1.9187240600585938, |
|
"logps/chosen": -266.4274597167969, |
|
"logps/rejected": -198.47540283203125, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.9975690841674805, |
|
"rewards/margins": 9.762743949890137, |
|
"rewards/rejected": -4.765174865722656, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.9669724770642203, |
|
"grad_norm": 0.05298791825771332, |
|
"learning_rate": 1.5904431579726837e-06, |
|
"logits/chosen": -0.9226531982421875, |
|
"logits/rejected": -2.0884995460510254, |
|
"logps/chosen": -295.6236267089844, |
|
"logps/rejected": -165.60801696777344, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.444765567779541, |
|
"rewards/margins": 9.557327270507812, |
|
"rewards/rejected": -5.1125617027282715, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 0.11875250190496445, |
|
"learning_rate": 1.5507106403021897e-06, |
|
"logits/chosen": -0.8945147395133972, |
|
"logits/rejected": -2.1213436126708984, |
|
"logps/chosen": -329.32354736328125, |
|
"logps/rejected": -205.64938354492188, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.863536834716797, |
|
"rewards/margins": 10.598997116088867, |
|
"rewards/rejected": -4.735459804534912, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.996330275229358, |
|
"grad_norm": 0.16463226079940796, |
|
"learning_rate": 1.511256360100171e-06, |
|
"logits/chosen": -0.8653547167778015, |
|
"logits/rejected": -2.120985746383667, |
|
"logps/chosen": -294.7039489746094, |
|
"logps/rejected": -191.1700897216797, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.6077494621276855, |
|
"rewards/margins": 9.97114372253418, |
|
"rewards/rejected": -5.363394737243652, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.0110091743119267, |
|
"grad_norm": 0.5620644092559814, |
|
"learning_rate": 1.4720918814476234e-06, |
|
"logits/chosen": -1.0870428085327148, |
|
"logits/rejected": -2.203629493713379, |
|
"logps/chosen": -255.451171875, |
|
"logps/rejected": -179.9131317138672, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.301146984100342, |
|
"rewards/margins": 10.202719688415527, |
|
"rewards/rejected": -5.9015727043151855, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.0256880733944955, |
|
"grad_norm": 0.22174260020256042, |
|
"learning_rate": 1.4332286834844792e-06, |
|
"logits/chosen": -1.1182466745376587, |
|
"logits/rejected": -2.1164536476135254, |
|
"logps/chosen": -286.516357421875, |
|
"logps/rejected": -188.3638458251953, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.640047550201416, |
|
"rewards/margins": 9.481383323669434, |
|
"rewards/rejected": -4.841336250305176, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0403669724770643, |
|
"grad_norm": 0.33157217502593994, |
|
"learning_rate": 1.3946781570450563e-06, |
|
"logits/chosen": -0.9743894338607788, |
|
"logits/rejected": -2.0844216346740723, |
|
"logps/chosen": -303.1180419921875, |
|
"logps/rejected": -197.849853515625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.488700866699219, |
|
"rewards/margins": 10.087553024291992, |
|
"rewards/rejected": -4.598852157592773, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.055045871559633, |
|
"grad_norm": 0.2071988433599472, |
|
"learning_rate": 1.3564516013194023e-06, |
|
"logits/chosen": -0.7817774415016174, |
|
"logits/rejected": -1.967786431312561, |
|
"logps/chosen": -266.5663757324219, |
|
"logps/rejected": -185.63877868652344, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.6336894035339355, |
|
"rewards/margins": 10.230566024780273, |
|
"rewards/rejected": -5.596876621246338, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.069724770642202, |
|
"grad_norm": 0.35437583923339844, |
|
"learning_rate": 1.3185602205414894e-06, |
|
"logits/chosen": -0.9503396153450012, |
|
"logits/rejected": -2.0260818004608154, |
|
"logps/chosen": -269.90093994140625, |
|
"logps/rejected": -172.7965850830078, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.775392532348633, |
|
"rewards/margins": 9.074084281921387, |
|
"rewards/rejected": -4.298691272735596, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.0844036697247708, |
|
"grad_norm": 0.09949786216020584, |
|
"learning_rate": 1.2810151207052465e-06, |
|
"logits/chosen": -1.025212049484253, |
|
"logits/rejected": -2.090640068054199, |
|
"logps/chosen": -335.35882568359375, |
|
"logps/rejected": -221.98355102539062, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.815802097320557, |
|
"rewards/margins": 9.805765151977539, |
|
"rewards/rejected": -4.989964008331299, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.0990825688073396, |
|
"grad_norm": 0.1902090311050415, |
|
"learning_rate": 1.2438273063093811e-06, |
|
"logits/chosen": -0.8500208854675293, |
|
"logits/rejected": -1.9380009174346924, |
|
"logps/chosen": -277.2483215332031, |
|
"logps/rejected": -168.717529296875, |
|
"loss": 0.0127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.573535442352295, |
|
"rewards/margins": 9.026323318481445, |
|
"rewards/rejected": -4.452788352966309, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.1137614678899084, |
|
"grad_norm": 0.3208858072757721, |
|
"learning_rate": 1.2070076771319536e-06, |
|
"logits/chosen": -1.082637906074524, |
|
"logits/rejected": -1.9498220682144165, |
|
"logps/chosen": -353.7499084472656, |
|
"logps/rejected": -200.4058074951172, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.898225784301758, |
|
"rewards/margins": 8.967299461364746, |
|
"rewards/rejected": -4.0690741539001465, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.128440366972477, |
|
"grad_norm": 1.1437596082687378, |
|
"learning_rate": 1.1705670250356417e-06, |
|
"logits/chosen": -0.8648325800895691, |
|
"logits/rejected": -2.037424087524414, |
|
"logps/chosen": -311.7782287597656, |
|
"logps/rejected": -195.8933563232422, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.304541110992432, |
|
"rewards/margins": 10.098506927490234, |
|
"rewards/rejected": -4.7939653396606445, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.143119266055046, |
|
"grad_norm": 0.16270968317985535, |
|
"learning_rate": 1.1345160308046413e-06, |
|
"logits/chosen": -0.9791809916496277, |
|
"logits/rejected": -2.24078369140625, |
|
"logps/chosen": -382.8855895996094, |
|
"logps/rejected": -205.8779754638672, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.505633354187012, |
|
"rewards/margins": 11.220186233520508, |
|
"rewards/rejected": -5.714553356170654, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.157798165137615, |
|
"grad_norm": 2.2618370056152344, |
|
"learning_rate": 1.0988652610141154e-06, |
|
"logits/chosen": -0.9164503216743469, |
|
"logits/rejected": -1.9510498046875, |
|
"logps/chosen": -276.1203918457031, |
|
"logps/rejected": -212.5254669189453, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.720728874206543, |
|
"rewards/margins": 9.504093170166016, |
|
"rewards/rejected": -4.783364295959473, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.1724770642201836, |
|
"grad_norm": 0.08572974801063538, |
|
"learning_rate": 1.063625164933124e-06, |
|
"logits/chosen": -0.8781817555427551, |
|
"logits/rejected": -2.0793867111206055, |
|
"logps/chosen": -330.6020202636719, |
|
"logps/rejected": -208.72425842285156, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.462750434875488, |
|
"rewards/margins": 11.203109741210938, |
|
"rewards/rejected": -5.740358352661133, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.1871559633027524, |
|
"grad_norm": 0.5944895148277283, |
|
"learning_rate": 1.0288060714619359e-06, |
|
"logits/chosen": -1.1157301664352417, |
|
"logits/rejected": -2.214977502822876, |
|
"logps/chosen": -316.9060363769531, |
|
"logps/rejected": -167.6466522216797, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.41005277633667, |
|
"rewards/margins": 10.054335594177246, |
|
"rewards/rejected": -4.644283294677734, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 0.5239315629005432, |
|
"learning_rate": 9.944181861046188e-07, |
|
"logits/chosen": -0.8929880857467651, |
|
"logits/rejected": -1.9771008491516113, |
|
"logps/chosen": -334.0789489746094, |
|
"logps/rejected": -201.40476989746094, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.018026351928711, |
|
"rewards/margins": 10.597818374633789, |
|
"rewards/rejected": -5.579792499542236, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.21651376146789, |
|
"grad_norm": 0.17582310736179352, |
|
"learning_rate": 9.604715879777986e-07, |
|
"logits/chosen": -0.9466437101364136, |
|
"logits/rejected": -2.1750948429107666, |
|
"logps/chosen": -279.5908203125, |
|
"logps/rejected": -154.12644958496094, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.104902744293213, |
|
"rewards/margins": 10.134696006774902, |
|
"rewards/rejected": -5.029792785644531, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.231192660550459, |
|
"grad_norm": 0.44052350521087646, |
|
"learning_rate": 9.269762268564616e-07, |
|
"logits/chosen": -1.0591435432434082, |
|
"logits/rejected": -2.134446382522583, |
|
"logps/chosen": -255.2496337890625, |
|
"logps/rejected": -161.16136169433594, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.842598915100098, |
|
"rewards/margins": 9.835264205932617, |
|
"rewards/rejected": -4.992665767669678, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.2458715596330276, |
|
"grad_norm": 0.702462375164032, |
|
"learning_rate": 8.939419202576694e-07, |
|
"logits/chosen": -0.768172025680542, |
|
"logits/rejected": -1.7977386713027954, |
|
"logps/chosen": -258.4624938964844, |
|
"logps/rejected": -183.80621337890625, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.580810070037842, |
|
"rewards/margins": 7.86204719543457, |
|
"rewards/rejected": -3.2812376022338867, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.2605504587155965, |
|
"grad_norm": 0.4431416690349579, |
|
"learning_rate": 8.61378350563033e-07, |
|
"logits/chosen": -0.9345456957817078, |
|
"logits/rejected": -1.9868954420089722, |
|
"logps/chosen": -250.33721923828125, |
|
"logps/rejected": -193.64549255371094, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.593289375305176, |
|
"rewards/margins": 9.057455062866211, |
|
"rewards/rejected": -4.464165210723877, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.2752293577981653, |
|
"grad_norm": 0.30388739705085754, |
|
"learning_rate": 8.292950621808022e-07, |
|
"logits/chosen": -0.9780189990997314, |
|
"logits/rejected": -2.0176703929901123, |
|
"logps/chosen": -285.4472961425781, |
|
"logps/rejected": -191.96495056152344, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.00777006149292, |
|
"rewards/margins": 10.013311386108398, |
|
"rewards/rejected": -5.005540370941162, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.289908256880734, |
|
"grad_norm": 0.3784541189670563, |
|
"learning_rate": 7.977014587483925e-07, |
|
"logits/chosen": -1.0011767148971558, |
|
"logits/rejected": -2.0550498962402344, |
|
"logps/chosen": -273.92138671875, |
|
"logps/rejected": -228.16741943359375, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.854176998138428, |
|
"rewards/margins": 9.765246391296387, |
|
"rewards/rejected": -4.911068916320801, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.304587155963303, |
|
"grad_norm": 0.2992611527442932, |
|
"learning_rate": 7.666068003761684e-07, |
|
"logits/chosen": -0.9273378849029541, |
|
"logits/rejected": -2.042013645172119, |
|
"logps/chosen": -296.50616455078125, |
|
"logps/rejected": -169.5068817138672, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.002749443054199, |
|
"rewards/margins": 10.678738594055176, |
|
"rewards/rejected": -5.675989151000977, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.3192660550458717, |
|
"grad_norm": 0.23903429508209229, |
|
"learning_rate": 7.360202009332993e-07, |
|
"logits/chosen": -1.0399566888809204, |
|
"logits/rejected": -2.143623113632202, |
|
"logps/chosen": -296.7044677734375, |
|
"logps/rejected": -185.7502899169922, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.675933361053467, |
|
"rewards/margins": 10.062166213989258, |
|
"rewards/rejected": -5.386232376098633, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.3339449541284405, |
|
"grad_norm": 0.23702357709407806, |
|
"learning_rate": 7.059506253764773e-07, |
|
"logits/chosen": -0.9900916814804077, |
|
"logits/rejected": -2.093594789505005, |
|
"logps/chosen": -314.4263000488281, |
|
"logps/rejected": -194.73269653320312, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.672959327697754, |
|
"rewards/margins": 10.092663764953613, |
|
"rewards/rejected": -5.419704437255859, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.3486238532110093, |
|
"grad_norm": 0.40923863649368286, |
|
"learning_rate": 6.764068871222825e-07, |
|
"logits/chosen": -0.7488622069358826, |
|
"logits/rejected": -1.9413087368011475, |
|
"logps/chosen": -287.322021484375, |
|
"logps/rejected": -188.12283325195312, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.020465850830078, |
|
"rewards/margins": 9.374917984008789, |
|
"rewards/rejected": -4.354452133178711, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.363302752293578, |
|
"grad_norm": 0.19543257355690002, |
|
"learning_rate": 6.473976454639608e-07, |
|
"logits/chosen": -0.9299582839012146, |
|
"logits/rejected": -2.107851266860962, |
|
"logps/chosen": -293.9842529296875, |
|
"logps/rejected": -168.69400024414062, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.329434871673584, |
|
"rewards/margins": 10.282726287841797, |
|
"rewards/rejected": -4.9532904624938965, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.377981651376147, |
|
"grad_norm": 1.0940320491790771, |
|
"learning_rate": 6.189314030333796e-07, |
|
"logits/chosen": -0.8577584624290466, |
|
"logits/rejected": -1.934208631515503, |
|
"logps/chosen": -280.2929382324219, |
|
"logps/rejected": -220.2890167236328, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.774515628814697, |
|
"rewards/margins": 10.356229782104492, |
|
"rewards/rejected": -5.581714153289795, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.3926605504587157, |
|
"grad_norm": 0.12193372845649719, |
|
"learning_rate": 5.910165033089e-07, |
|
"logits/chosen": -0.8733283281326294, |
|
"logits/rejected": -2.079462766647339, |
|
"logps/chosen": -316.6996765136719, |
|
"logps/rejected": -201.25564575195312, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.122347831726074, |
|
"rewards/margins": 9.807957649230957, |
|
"rewards/rejected": -4.685609817504883, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.4073394495412845, |
|
"grad_norm": 0.43534737825393677, |
|
"learning_rate": 5.636611281698956e-07, |
|
"logits/chosen": -0.8986641764640808, |
|
"logits/rejected": -1.9822278022766113, |
|
"logps/chosen": -262.6693420410156, |
|
"logps/rejected": -185.32846069335938, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.460582733154297, |
|
"rewards/margins": 9.22741413116455, |
|
"rewards/rejected": -4.766830921173096, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.4220183486238533, |
|
"grad_norm": 0.17293158173561096, |
|
"learning_rate": 5.368732954986389e-07, |
|
"logits/chosen": -1.0250214338302612, |
|
"logits/rejected": -2.0870189666748047, |
|
"logps/chosen": -279.42999267578125, |
|
"logps/rejected": -196.77059936523438, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.643675327301025, |
|
"rewards/margins": 9.602448463439941, |
|
"rewards/rejected": -4.958773136138916, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.436697247706422, |
|
"grad_norm": 0.20987118780612946, |
|
"learning_rate": 5.106608568302504e-07, |
|
"logits/chosen": -1.066097617149353, |
|
"logits/rejected": -2.057497978210449, |
|
"logps/chosen": -257.912109375, |
|
"logps/rejected": -195.58677673339844, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 4.727499961853027, |
|
"rewards/margins": 9.989591598510742, |
|
"rewards/rejected": -5.262092590332031, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.451376146788991, |
|
"grad_norm": 1.3423670530319214, |
|
"learning_rate": 4.850314950514124e-07, |
|
"logits/chosen": -0.8067299127578735, |
|
"logits/rejected": -1.9319019317626953, |
|
"logps/chosen": -281.423583984375, |
|
"logps/rejected": -192.34666442871094, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.9796462059021, |
|
"rewards/margins": 9.925731658935547, |
|
"rewards/rejected": -4.9460859298706055, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.4660550458715598, |
|
"grad_norm": 0.2133161723613739, |
|
"learning_rate": 4.599927221485034e-07, |
|
"logits/chosen": -0.9198440909385681, |
|
"logits/rejected": -2.121577024459839, |
|
"logps/chosen": -277.7024230957031, |
|
"logps/rejected": -171.4612579345703, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.626633167266846, |
|
"rewards/margins": 9.858685493469238, |
|
"rewards/rejected": -5.232051849365234, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.4807339449541286, |
|
"grad_norm": 0.16850100457668304, |
|
"learning_rate": 4.3555187700583175e-07, |
|
"logits/chosen": -0.8522999882698059, |
|
"logits/rejected": -2.053220748901367, |
|
"logps/chosen": -265.3820495605469, |
|
"logps/rejected": -188.9971160888672, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.703529357910156, |
|
"rewards/margins": 10.37534236907959, |
|
"rewards/rejected": -5.671813011169434, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.4954128440366974, |
|
"grad_norm": 0.15111279487609863, |
|
"learning_rate": 4.1171612325460244e-07, |
|
"logits/chosen": -0.9065884351730347, |
|
"logits/rejected": -1.9212383031845093, |
|
"logps/chosen": -279.0539245605469, |
|
"logps/rejected": -185.0900115966797, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.483066082000732, |
|
"rewards/margins": 9.553812026977539, |
|
"rewards/rejected": -5.070746421813965, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.510091743119266, |
|
"grad_norm": 0.06084302440285683, |
|
"learning_rate": 3.8849244717325206e-07, |
|
"logits/chosen": -0.9317240715026855, |
|
"logits/rejected": -1.988271713256836, |
|
"logps/chosen": -268.8980407714844, |
|
"logps/rejected": -202.74929809570312, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.079599380493164, |
|
"rewards/margins": 10.533794403076172, |
|
"rewards/rejected": -5.454195022583008, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.524770642201835, |
|
"grad_norm": 0.8741805553436279, |
|
"learning_rate": 3.658876556397628e-07, |
|
"logits/chosen": -1.1219009160995483, |
|
"logits/rejected": -2.1234138011932373, |
|
"logps/chosen": -255.28311157226562, |
|
"logps/rejected": -171.67091369628906, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.061553478240967, |
|
"rewards/margins": 10.136541366577148, |
|
"rewards/rejected": -5.074987411499023, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.539449541284404, |
|
"grad_norm": 0.4372842013835907, |
|
"learning_rate": 3.4390837413656256e-07, |
|
"logits/chosen": -0.9813422560691833, |
|
"logits/rejected": -2.116903781890869, |
|
"logps/chosen": -278.31292724609375, |
|
"logps/rejected": -204.9643096923828, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.085036277770996, |
|
"rewards/margins": 10.61630630493164, |
|
"rewards/rejected": -5.531269073486328, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.5541284403669726, |
|
"grad_norm": 1.2550814151763916, |
|
"learning_rate": 3.225610448085903e-07, |
|
"logits/chosen": -0.9581831693649292, |
|
"logits/rejected": -2.0414552688598633, |
|
"logps/chosen": -270.668701171875, |
|
"logps/rejected": -183.82034301757812, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.899204730987549, |
|
"rewards/margins": 10.034035682678223, |
|
"rewards/rejected": -5.134830474853516, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 0.03559936583042145, |
|
"learning_rate": 3.018519245750989e-07, |
|
"logits/chosen": -0.9744287729263306, |
|
"logits/rejected": -1.9595189094543457, |
|
"logps/chosen": -321.4473876953125, |
|
"logps/rejected": -223.63467407226562, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.675147533416748, |
|
"rewards/margins": 10.108884811401367, |
|
"rewards/rejected": -5.433738708496094, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.5834862385321102, |
|
"grad_norm": 0.2746826708316803, |
|
"learning_rate": 2.817870832957459e-07, |
|
"logits/chosen": -0.8869858980178833, |
|
"logits/rejected": -2.016246795654297, |
|
"logps/chosen": -259.2815856933594, |
|
"logps/rejected": -180.26258850097656, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.067638874053955, |
|
"rewards/margins": 10.355432510375977, |
|
"rewards/rejected": -5.287793159484863, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.598165137614679, |
|
"grad_norm": 0.17304402589797974, |
|
"learning_rate": 2.6237240199151386e-07, |
|
"logits/chosen": -1.0045228004455566, |
|
"logits/rejected": -2.091106414794922, |
|
"logps/chosen": -264.62774658203125, |
|
"logps/rejected": -172.3504638671875, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.949882507324219, |
|
"rewards/margins": 9.181565284729004, |
|
"rewards/rejected": -4.231683254241943, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.612844036697248, |
|
"grad_norm": 0.9380022883415222, |
|
"learning_rate": 2.436135711209786e-07, |
|
"logits/chosen": -1.1858479976654053, |
|
"logits/rejected": -2.1570074558258057, |
|
"logps/chosen": -279.8266296386719, |
|
"logps/rejected": -165.30809020996094, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.504940032958984, |
|
"rewards/margins": 9.417243957519531, |
|
"rewards/rejected": -4.912304401397705, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.6275229357798167, |
|
"grad_norm": 0.6058441400527954, |
|
"learning_rate": 2.2551608891243026e-07, |
|
"logits/chosen": -1.1764850616455078, |
|
"logits/rejected": -2.1525368690490723, |
|
"logps/chosen": -352.7016296386719, |
|
"logps/rejected": -213.2824249267578, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.686002731323242, |
|
"rewards/margins": 9.250012397766113, |
|
"rewards/rejected": -4.564009666442871, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.6422018348623855, |
|
"grad_norm": 0.14885057508945465, |
|
"learning_rate": 2.0808525975233807e-07, |
|
"logits/chosen": -0.8036705255508423, |
|
"logits/rejected": -2.0143167972564697, |
|
"logps/chosen": -282.6025085449219, |
|
"logps/rejected": -200.5447540283203, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.5807576179504395, |
|
"rewards/margins": 9.530784606933594, |
|
"rewards/rejected": -4.950027942657471, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.6568807339449543, |
|
"grad_norm": 0.40349748730659485, |
|
"learning_rate": 1.9132619263063144e-07, |
|
"logits/chosen": -0.8986431360244751, |
|
"logits/rejected": -2.059335231781006, |
|
"logps/chosen": -346.6067810058594, |
|
"logps/rejected": -212.26348876953125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.356535911560059, |
|
"rewards/margins": 10.730274200439453, |
|
"rewards/rejected": -5.373737812042236, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.671559633027523, |
|
"grad_norm": 0.02223406359553337, |
|
"learning_rate": 1.7524379964325155e-07, |
|
"logits/chosen": -0.9592161774635315, |
|
"logits/rejected": -2.094557523727417, |
|
"logps/chosen": -327.5130310058594, |
|
"logps/rejected": -203.91741943359375, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.003190517425537, |
|
"rewards/margins": 10.261466979980469, |
|
"rewards/rejected": -5.258275985717773, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.686238532110092, |
|
"grad_norm": 0.21345356106758118, |
|
"learning_rate": 1.5984279455240975e-07, |
|
"logits/chosen": -0.9917050004005432, |
|
"logits/rejected": -2.0196518898010254, |
|
"logps/chosen": -282.2841796875, |
|
"logps/rejected": -191.79910278320312, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.118269443511963, |
|
"rewards/margins": 10.18971061706543, |
|
"rewards/rejected": -5.071441173553467, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.7009174311926607, |
|
"grad_norm": 0.0874081626534462, |
|
"learning_rate": 1.451276914049818e-07, |
|
"logits/chosen": -0.9789815545082092, |
|
"logits/rejected": -2.004281997680664, |
|
"logps/chosen": -256.3831787109375, |
|
"logps/rejected": -178.12074279785156, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.682791233062744, |
|
"rewards/margins": 10.118135452270508, |
|
"rewards/rejected": -5.435344219207764, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.7155963302752295, |
|
"grad_norm": 0.15522974729537964, |
|
"learning_rate": 1.3110280320943692e-07, |
|
"logits/chosen": -0.89200758934021, |
|
"logits/rejected": -2.112806797027588, |
|
"logps/chosen": -271.0398254394531, |
|
"logps/rejected": -171.77919006347656, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.064676284790039, |
|
"rewards/margins": 10.34743881225586, |
|
"rewards/rejected": -5.28276252746582, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.7302752293577983, |
|
"grad_norm": 0.5010592341423035, |
|
"learning_rate": 1.1777224067169218e-07, |
|
"logits/chosen": -0.8353609442710876, |
|
"logits/rejected": -1.9892935752868652, |
|
"logps/chosen": -279.2965087890625, |
|
"logps/rejected": -191.65855407714844, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.053516864776611, |
|
"rewards/margins": 10.337114334106445, |
|
"rewards/rejected": -5.283597469329834, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.744954128440367, |
|
"grad_norm": 0.48516571521759033, |
|
"learning_rate": 1.0513991099025872e-07, |
|
"logits/chosen": -1.016608476638794, |
|
"logits/rejected": -2.1301956176757812, |
|
"logps/chosen": -323.5552673339844, |
|
"logps/rejected": -193.77996826171875, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.830180644989014, |
|
"rewards/margins": 9.39232349395752, |
|
"rewards/rejected": -4.562142372131348, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.759633027522936, |
|
"grad_norm": 0.18496806919574738, |
|
"learning_rate": 9.320951671104194e-08, |
|
"logits/chosen": -0.9126584529876709, |
|
"logits/rejected": -2.1175155639648438, |
|
"logps/chosen": -314.1302795410156, |
|
"logps/rejected": -191.1002960205078, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.795359134674072, |
|
"rewards/margins": 10.335535049438477, |
|
"rewards/rejected": -4.540175914764404, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.7743119266055047, |
|
"grad_norm": 0.158527210354805, |
|
"learning_rate": 8.198455464212108e-08, |
|
"logits/chosen": -0.9621077179908752, |
|
"logits/rejected": -2.066542148590088, |
|
"logps/chosen": -293.69940185546875, |
|
"logps/rejected": -176.17442321777344, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.256305694580078, |
|
"rewards/margins": 10.846275329589844, |
|
"rewards/rejected": -5.589971542358398, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.7889908256880735, |
|
"grad_norm": 0.11951223015785217, |
|
"learning_rate": 7.146831482883115e-08, |
|
"logits/chosen": -0.7449550628662109, |
|
"logits/rejected": -2.0898332595825195, |
|
"logps/chosen": -297.49365234375, |
|
"logps/rejected": -172.2970428466797, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.16118860244751, |
|
"rewards/margins": 11.0027437210083, |
|
"rewards/rejected": -5.841555595397949, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.8036697247706424, |
|
"grad_norm": 0.32903870940208435, |
|
"learning_rate": 6.16638795894492e-08, |
|
"logits/chosen": -0.9001256823539734, |
|
"logits/rejected": -1.9853109121322632, |
|
"logps/chosen": -261.6986389160156, |
|
"logps/rejected": -200.32876586914062, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.192695617675781, |
|
"rewards/margins": 10.185223579406738, |
|
"rewards/rejected": -4.992527961730957, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.818348623853211, |
|
"grad_norm": 1.1356521844863892, |
|
"learning_rate": 5.257412261176375e-08, |
|
"logits/chosen": -1.0478947162628174, |
|
"logits/rejected": -2.031193971633911, |
|
"logps/chosen": -272.9176025390625, |
|
"logps/rejected": -191.03363037109375, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.408777236938477, |
|
"rewards/margins": 10.281020164489746, |
|
"rewards/rejected": -4.8722429275512695, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.83302752293578, |
|
"grad_norm": 0.36936327815055847, |
|
"learning_rate": 4.4201708110795384e-08, |
|
"logits/chosen": -0.9411278963088989, |
|
"logits/rejected": -1.9795866012573242, |
|
"logps/chosen": -292.65386962890625, |
|
"logps/rejected": -204.53778076171875, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.8684539794921875, |
|
"rewards/margins": 9.500937461853027, |
|
"rewards/rejected": -4.632482528686523, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.847706422018349, |
|
"grad_norm": 0.16482090950012207, |
|
"learning_rate": 3.654909004791152e-08, |
|
"logits/chosen": -0.938539981842041, |
|
"logits/rejected": -2.1424248218536377, |
|
"logps/chosen": -293.4642333984375, |
|
"logps/rejected": -184.5415802001953, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.669407844543457, |
|
"rewards/margins": 10.090935707092285, |
|
"rewards/rejected": -5.421527862548828, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.8623853211009176, |
|
"grad_norm": 0.13458868861198425, |
|
"learning_rate": 2.9618511411570462e-08, |
|
"logits/chosen": -1.0025708675384521, |
|
"logits/rejected": -2.083418607711792, |
|
"logps/chosen": -284.4067687988281, |
|
"logps/rejected": -172.37875366210938, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.38177490234375, |
|
"rewards/margins": 9.599076271057129, |
|
"rewards/rejected": -5.2173004150390625, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.8770642201834864, |
|
"grad_norm": 1.0483838319778442, |
|
"learning_rate": 2.3412003559898088e-08, |
|
"logits/chosen": -0.8990004658699036, |
|
"logits/rejected": -1.8701345920562744, |
|
"logps/chosen": -271.26129150390625, |
|
"logps/rejected": -207.7753143310547, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.918183326721191, |
|
"rewards/margins": 9.391782760620117, |
|
"rewards/rejected": -4.473598957061768, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.891743119266055, |
|
"grad_norm": 0.25216034054756165, |
|
"learning_rate": 1.793138562529634e-08, |
|
"logits/chosen": -0.971919059753418, |
|
"logits/rejected": -2.1569983959198, |
|
"logps/chosen": -346.71875, |
|
"logps/rejected": -184.22348022460938, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.533580780029297, |
|
"rewards/margins": 10.083108901977539, |
|
"rewards/rejected": -4.549527168273926, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.906422018348624, |
|
"grad_norm": 0.8910009860992432, |
|
"learning_rate": 1.317826398125277e-08, |
|
"logits/chosen": -1.062324047088623, |
|
"logits/rejected": -2.1035232543945312, |
|
"logps/chosen": -293.03125, |
|
"logps/rejected": -204.35723876953125, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.314833641052246, |
|
"rewards/margins": 10.953380584716797, |
|
"rewards/rejected": -5.638547420501709, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.921100917431193, |
|
"grad_norm": 0.3026532828807831, |
|
"learning_rate": 9.15403177151275e-09, |
|
"logits/chosen": -0.9711483716964722, |
|
"logits/rejected": -1.8983428478240967, |
|
"logps/chosen": -276.14398193359375, |
|
"logps/rejected": -217.97817993164062, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.347715377807617, |
|
"rewards/margins": 10.437253952026367, |
|
"rewards/rejected": -5.08953857421875, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 0.21673916280269623, |
|
"learning_rate": 5.85986850174608e-09, |
|
"logits/chosen": -0.8715996146202087, |
|
"logits/rejected": -2.193289279937744, |
|
"logps/chosen": -312.49847412109375, |
|
"logps/rejected": -185.29078674316406, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.40484619140625, |
|
"rewards/margins": 10.567187309265137, |
|
"rewards/rejected": -5.162341117858887, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.9504587155963304, |
|
"grad_norm": 0.18232221901416779, |
|
"learning_rate": 3.296739693834927e-09, |
|
"logits/chosen": -1.094886302947998, |
|
"logits/rejected": -1.9882696866989136, |
|
"logps/chosen": -305.0465087890625, |
|
"logps/rejected": -184.07928466796875, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.589092254638672, |
|
"rewards/margins": 9.142921447753906, |
|
"rewards/rejected": -4.553828239440918, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.9651376146788992, |
|
"grad_norm": 0.28508853912353516, |
|
"learning_rate": 1.4653966028774225e-09, |
|
"logits/chosen": -0.9431482553482056, |
|
"logits/rejected": -1.953324556350708, |
|
"logps/chosen": -313.6567077636719, |
|
"logps/rejected": -213.5366973876953, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.910001277923584, |
|
"rewards/margins": 10.507518768310547, |
|
"rewards/rejected": -5.597517490386963, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.979816513761468, |
|
"grad_norm": 0.14594300091266632, |
|
"learning_rate": 3.6637599699351766e-10, |
|
"logits/chosen": -0.940761387348175, |
|
"logits/rejected": -2.1918911933898926, |
|
"logps/chosen": -289.67626953125, |
|
"logps/rejected": -180.724365234375, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.103169918060303, |
|
"rewards/margins": 9.98257827758789, |
|
"rewards/rejected": -4.879408359527588, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"grad_norm": 0.16170361638069153, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.0162718296051025, |
|
"logits/rejected": -1.9849637746810913, |
|
"logps/chosen": -319.7056579589844, |
|
"logps/rejected": -213.12435913085938, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 5.299257755279541, |
|
"rewards/margins": 10.258605003356934, |
|
"rewards/rejected": -4.959346771240234, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"step": 408, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_loss": 0.11720214437923905, |
|
"train_runtime": 8069.9016, |
|
"train_samples_per_second": 1.62, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 408, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|