|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994495412844037, |
|
"eval_steps": 500, |
|
"global_step": 408, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014678899082568808, |
|
"grad_norm": 1.9680895805358887, |
|
"learning_rate": 2.439024390243903e-07, |
|
"logits/chosen": -0.9879676103591919, |
|
"logits/rejected": -1.9993298053741455, |
|
"logps/chosen": -269.27239990234375, |
|
"logps/rejected": -186.47621154785156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.029357798165137616, |
|
"grad_norm": 1.94403076171875, |
|
"learning_rate": 4.878048780487805e-07, |
|
"logits/chosen": -1.0356446504592896, |
|
"logits/rejected": -1.989874005317688, |
|
"logps/chosen": -290.80950927734375, |
|
"logps/rejected": -204.501708984375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0014448652509599924, |
|
"rewards/margins": 0.003180259373039007, |
|
"rewards/rejected": -0.004625123459845781, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.044036697247706424, |
|
"grad_norm": 2.0493357181549072, |
|
"learning_rate": 7.317073170731707e-07, |
|
"logits/chosen": -1.1185294389724731, |
|
"logits/rejected": -2.12608003616333, |
|
"logps/chosen": -295.96728515625, |
|
"logps/rejected": -203.1552734375, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.001862919656559825, |
|
"rewards/margins": 0.004233717452734709, |
|
"rewards/rejected": -0.002370798261836171, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05871559633027523, |
|
"grad_norm": 1.94967520236969, |
|
"learning_rate": 9.75609756097561e-07, |
|
"logits/chosen": -1.2532521486282349, |
|
"logits/rejected": -2.1084485054016113, |
|
"logps/chosen": -252.42922973632812, |
|
"logps/rejected": -164.4582977294922, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.01558337826281786, |
|
"rewards/margins": 0.014522464945912361, |
|
"rewards/rejected": 0.0010609120363369584, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07339449541284404, |
|
"grad_norm": 2.222012996673584, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"logits/chosen": -1.073809266090393, |
|
"logits/rejected": -2.202866554260254, |
|
"logps/chosen": -306.59698486328125, |
|
"logps/rejected": -158.68309020996094, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.002047515008598566, |
|
"rewards/margins": -0.003660219721496105, |
|
"rewards/rejected": 0.0057077351957559586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08807339449541285, |
|
"grad_norm": 2.4431416988372803, |
|
"learning_rate": 1.4634146341463414e-06, |
|
"logits/chosen": -1.01254141330719, |
|
"logits/rejected": -1.9788235425949097, |
|
"logps/chosen": -344.3016662597656, |
|
"logps/rejected": -223.37034606933594, |
|
"loss": 0.7002, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.0088964169844985, |
|
"rewards/margins": -0.01263641007244587, |
|
"rewards/rejected": 0.0037399940192699432, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10275229357798166, |
|
"grad_norm": 2.189417839050293, |
|
"learning_rate": 1.707317073170732e-06, |
|
"logits/chosen": -0.9766557812690735, |
|
"logits/rejected": -1.9725066423416138, |
|
"logps/chosen": -259.5781555175781, |
|
"logps/rejected": -167.9022216796875, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.004296607803553343, |
|
"rewards/margins": -0.00030927988700568676, |
|
"rewards/rejected": 0.0046058883890509605, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11743119266055047, |
|
"grad_norm": 2.137669801712036, |
|
"learning_rate": 1.951219512195122e-06, |
|
"logits/chosen": -1.1466783285140991, |
|
"logits/rejected": -2.0279669761657715, |
|
"logps/chosen": -269.79840087890625, |
|
"logps/rejected": -186.73306274414062, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0003678178181871772, |
|
"rewards/margins": 8.735968731343746e-05, |
|
"rewards/rejected": 0.0002804575487971306, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13211009174311927, |
|
"grad_norm": 2.2226767539978027, |
|
"learning_rate": 2.1951219512195125e-06, |
|
"logits/chosen": -1.025728702545166, |
|
"logits/rejected": -2.0889053344726562, |
|
"logps/chosen": -313.826904296875, |
|
"logps/rejected": -197.85458374023438, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.01480245590209961, |
|
"rewards/margins": 0.013661026023328304, |
|
"rewards/rejected": 0.0011414289474487305, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14678899082568808, |
|
"grad_norm": 2.161883592605591, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"logits/chosen": -1.0672154426574707, |
|
"logits/rejected": -2.087402820587158, |
|
"logps/chosen": -308.974853515625, |
|
"logps/rejected": -156.69020080566406, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -0.0013552162563428283, |
|
"rewards/margins": -0.0037626533303409815, |
|
"rewards/rejected": 0.0024074374232441187, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1614678899082569, |
|
"grad_norm": 2.115229368209839, |
|
"learning_rate": 2.682926829268293e-06, |
|
"logits/chosen": -1.214624047279358, |
|
"logits/rejected": -2.263981342315674, |
|
"logps/chosen": -298.82470703125, |
|
"logps/rejected": -186.7963104248047, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.009364986792206764, |
|
"rewards/margins": 0.013124614953994751, |
|
"rewards/rejected": -0.0037596281617879868, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1761467889908257, |
|
"grad_norm": 2.2371408939361572, |
|
"learning_rate": 2.926829268292683e-06, |
|
"logits/chosen": -1.0216898918151855, |
|
"logits/rejected": -2.1882967948913574, |
|
"logps/chosen": -370.7431945800781, |
|
"logps/rejected": -156.88446044921875, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.006515222601592541, |
|
"rewards/margins": -0.0015635215677320957, |
|
"rewards/rejected": 0.008078744634985924, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1908256880733945, |
|
"grad_norm": 2.0763676166534424, |
|
"learning_rate": 3.1707317073170736e-06, |
|
"logits/chosen": -1.2455652952194214, |
|
"logits/rejected": -2.1691112518310547, |
|
"logps/chosen": -326.8089599609375, |
|
"logps/rejected": -182.18414306640625, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.009837308898568153, |
|
"rewards/margins": 0.013591233640909195, |
|
"rewards/rejected": -0.003753924509510398, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.20550458715596331, |
|
"grad_norm": 2.039747714996338, |
|
"learning_rate": 3.414634146341464e-06, |
|
"logits/chosen": -1.171751856803894, |
|
"logits/rejected": -2.2122840881347656, |
|
"logps/chosen": -284.33807373046875, |
|
"logps/rejected": -165.06973266601562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.01188646536320448, |
|
"rewards/margins": 0.0013419748283922672, |
|
"rewards/rejected": 0.0105444910004735, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 1.8488988876342773, |
|
"learning_rate": 3.6585365853658537e-06, |
|
"logits/chosen": -1.0655186176300049, |
|
"logits/rejected": -2.1152091026306152, |
|
"logps/chosen": -289.524169921875, |
|
"logps/rejected": -197.5038299560547, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00895670149475336, |
|
"rewards/margins": 0.018459243699908257, |
|
"rewards/rejected": -0.009502542205154896, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23486238532110093, |
|
"grad_norm": 2.165867805480957, |
|
"learning_rate": 3.902439024390244e-06, |
|
"logits/chosen": -1.1744866371154785, |
|
"logits/rejected": -2.1444649696350098, |
|
"logps/chosen": -288.5516052246094, |
|
"logps/rejected": -163.60867309570312, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.01907195895910263, |
|
"rewards/margins": 0.017497658729553223, |
|
"rewards/rejected": 0.0015743016265332699, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.24954128440366974, |
|
"grad_norm": 2.3035781383514404, |
|
"learning_rate": 4.146341463414634e-06, |
|
"logits/chosen": -1.1526018381118774, |
|
"logits/rejected": -2.208312511444092, |
|
"logps/chosen": -324.5880126953125, |
|
"logps/rejected": -164.4250030517578, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02159590646624565, |
|
"rewards/margins": 0.029934655874967575, |
|
"rewards/rejected": -0.008338749408721924, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.26422018348623855, |
|
"grad_norm": 2.0675113201141357, |
|
"learning_rate": 4.390243902439025e-06, |
|
"logits/chosen": -1.1687654256820679, |
|
"logits/rejected": -2.0869553089141846, |
|
"logps/chosen": -311.4859313964844, |
|
"logps/rejected": -204.45504760742188, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.014333288185298443, |
|
"rewards/margins": 0.02041085995733738, |
|
"rewards/rejected": -0.006077570840716362, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.27889908256880735, |
|
"grad_norm": 2.190349578857422, |
|
"learning_rate": 4.634146341463416e-06, |
|
"logits/chosen": -1.1937575340270996, |
|
"logits/rejected": -2.1977155208587646, |
|
"logps/chosen": -294.04486083984375, |
|
"logps/rejected": -200.34214782714844, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.026322053745388985, |
|
"rewards/margins": 0.025608191266655922, |
|
"rewards/rejected": 0.0007138650980778039, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.29357798165137616, |
|
"grad_norm": 2.097546100616455, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"logits/chosen": -1.0418745279312134, |
|
"logits/rejected": -2.0384740829467773, |
|
"logps/chosen": -345.9956359863281, |
|
"logps/rejected": -181.20216369628906, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.022339414805173874, |
|
"rewards/margins": 0.01856069266796112, |
|
"rewards/rejected": 0.0037787200417369604, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30825688073394497, |
|
"grad_norm": 2.183018684387207, |
|
"learning_rate": 4.999908404322799e-06, |
|
"logits/chosen": -1.0392463207244873, |
|
"logits/rejected": -2.2322421073913574, |
|
"logps/chosen": -319.3612976074219, |
|
"logps/rejected": -172.56298828125, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.029892440885305405, |
|
"rewards/margins": 0.03489885479211807, |
|
"rewards/rejected": -0.005006419029086828, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3229357798165138, |
|
"grad_norm": 1.9956392049789429, |
|
"learning_rate": 4.999175679175577e-06, |
|
"logits/chosen": -1.1080222129821777, |
|
"logits/rejected": -2.1327452659606934, |
|
"logps/chosen": -252.04730224609375, |
|
"logps/rejected": -161.28741455078125, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02994115650653839, |
|
"rewards/margins": 0.04140399023890495, |
|
"rewards/rejected": -0.011462842114269733, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3376146788990826, |
|
"grad_norm": 2.0336079597473145, |
|
"learning_rate": 4.997710443643461e-06, |
|
"logits/chosen": -1.1675165891647339, |
|
"logits/rejected": -2.0700559616088867, |
|
"logps/chosen": -259.8742980957031, |
|
"logps/rejected": -206.36590576171875, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.04368049278855324, |
|
"rewards/margins": 0.04554973170161247, |
|
"rewards/rejected": -0.0018692431040108204, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3522935779816514, |
|
"grad_norm": 2.5602798461914062, |
|
"learning_rate": 4.995513127188151e-06, |
|
"logits/chosen": -1.0851877927780151, |
|
"logits/rejected": -2.2184207439422607, |
|
"logps/chosen": -365.763671875, |
|
"logps/rejected": -183.1211700439453, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.057490088045597076, |
|
"rewards/margins": 0.06220678985118866, |
|
"rewards/rejected": -0.004716700874269009, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 2.348297119140625, |
|
"learning_rate": 4.992584373844853e-06, |
|
"logits/chosen": -1.2085366249084473, |
|
"logits/rejected": -2.0821032524108887, |
|
"logps/chosen": -345.70233154296875, |
|
"logps/rejected": -184.2445526123047, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.07105237245559692, |
|
"rewards/margins": 0.0839213952422142, |
|
"rewards/rejected": -0.012869024649262428, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.381651376146789, |
|
"grad_norm": 2.2809340953826904, |
|
"learning_rate": 4.98892504203351e-06, |
|
"logits/chosen": -1.2231262922286987, |
|
"logits/rejected": -2.131476402282715, |
|
"logps/chosen": -281.89996337890625, |
|
"logps/rejected": -158.83851623535156, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.08580557256937027, |
|
"rewards/margins": 0.09303868561983109, |
|
"rewards/rejected": -0.007233113050460815, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3963302752293578, |
|
"grad_norm": 2.181974411010742, |
|
"learning_rate": 4.9845362043071925e-06, |
|
"logits/chosen": -1.0225324630737305, |
|
"logits/rejected": -2.0713043212890625, |
|
"logps/chosen": -290.6384582519531, |
|
"logps/rejected": -163.60952758789062, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.08012352883815765, |
|
"rewards/margins": 0.09865971654653549, |
|
"rewards/rejected": -0.018536187708377838, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.41100917431192663, |
|
"grad_norm": 2.2260894775390625, |
|
"learning_rate": 4.97941914703774e-06, |
|
"logits/chosen": -1.147126317024231, |
|
"logits/rejected": -2.1539666652679443, |
|
"logps/chosen": -287.7672424316406, |
|
"logps/rejected": -201.26382446289062, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.10910698771476746, |
|
"rewards/margins": 0.11642393469810486, |
|
"rewards/rejected": -0.007316945120692253, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.42568807339449544, |
|
"grad_norm": 2.510664224624634, |
|
"learning_rate": 4.973575370038718e-06, |
|
"logits/chosen": -1.0700985193252563, |
|
"logits/rejected": -2.0467348098754883, |
|
"logps/chosen": -305.25732421875, |
|
"logps/rejected": -193.29762268066406, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.1313922256231308, |
|
"rewards/margins": 0.14777766168117523, |
|
"rewards/rejected": -0.01638544164597988, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 2.172935962677002, |
|
"learning_rate": 4.967006586125827e-06, |
|
"logits/chosen": -1.2389792203903198, |
|
"logits/rejected": -2.0778443813323975, |
|
"logps/chosen": -301.3792724609375, |
|
"logps/rejected": -186.553466796875, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.148426353931427, |
|
"rewards/margins": 0.1568143665790558, |
|
"rewards/rejected": -0.00838800985366106, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45504587155963305, |
|
"grad_norm": 2.4665584564208984, |
|
"learning_rate": 4.959714720614871e-06, |
|
"logits/chosen": -1.1760492324829102, |
|
"logits/rejected": -2.221869468688965, |
|
"logps/chosen": -319.1153869628906, |
|
"logps/rejected": -184.00784301757812, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.1699998527765274, |
|
"rewards/margins": 0.20439089834690094, |
|
"rewards/rejected": -0.034391067922115326, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.46972477064220186, |
|
"grad_norm": 2.0785248279571533, |
|
"learning_rate": 4.951701910757446e-06, |
|
"logits/chosen": -1.1586418151855469, |
|
"logits/rejected": -2.063925266265869, |
|
"logps/chosen": -253.8636474609375, |
|
"logps/rejected": -188.82861328125, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.18049536645412445, |
|
"rewards/margins": 0.21469204127788544, |
|
"rewards/rejected": -0.03419665992259979, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.48440366972477067, |
|
"grad_norm": 2.569335699081421, |
|
"learning_rate": 4.942970505114514e-06, |
|
"logits/chosen": -1.0432729721069336, |
|
"logits/rejected": -2.1155495643615723, |
|
"logps/chosen": -308.3620910644531, |
|
"logps/rejected": -176.31954956054688, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.20524388551712036, |
|
"rewards/margins": 0.24512474238872528, |
|
"rewards/rejected": -0.039880868047475815, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4990825688073395, |
|
"grad_norm": 2.016463279724121, |
|
"learning_rate": 4.933523062868033e-06, |
|
"logits/chosen": -1.0766105651855469, |
|
"logits/rejected": -2.1681623458862305, |
|
"logps/chosen": -269.38372802734375, |
|
"logps/rejected": -164.8252716064453, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.22753889858722687, |
|
"rewards/margins": 0.2743402421474457, |
|
"rewards/rejected": -0.0468013621866703, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5137614678899083, |
|
"grad_norm": 2.1414647102355957, |
|
"learning_rate": 4.923362353070859e-06, |
|
"logits/chosen": -0.8950706720352173, |
|
"logits/rejected": -2.1591413021087646, |
|
"logps/chosen": -287.07080078125, |
|
"logps/rejected": -159.81610107421875, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.267063707113266, |
|
"rewards/margins": 0.3391086757183075, |
|
"rewards/rejected": -0.0720449760556221, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5284403669724771, |
|
"grad_norm": 1.9493393898010254, |
|
"learning_rate": 4.912491353835138e-06, |
|
"logits/chosen": -1.1616274118423462, |
|
"logits/rejected": -2.0955862998962402, |
|
"logps/chosen": -259.8146667480469, |
|
"logps/rejected": -185.34283447265625, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.25752362608909607, |
|
"rewards/margins": 0.3175871968269348, |
|
"rewards/rejected": -0.060063570737838745, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5431192660550459, |
|
"grad_norm": 1.920662522315979, |
|
"learning_rate": 4.900913251459418e-06, |
|
"logits/chosen": -1.076828956604004, |
|
"logits/rejected": -2.0495800971984863, |
|
"logps/chosen": -264.6033935546875, |
|
"logps/rejected": -173.12643432617188, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.2871895134449005, |
|
"rewards/margins": 0.3713921010494232, |
|
"rewards/rejected": -0.08420257270336151, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5577981651376147, |
|
"grad_norm": 2.01267671585083, |
|
"learning_rate": 4.8886314394947396e-06, |
|
"logits/chosen": -0.9867750406265259, |
|
"logits/rejected": -2.073662281036377, |
|
"logps/chosen": -278.45904541015625, |
|
"logps/rejected": -185.91983032226562, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3652493953704834, |
|
"rewards/margins": 0.46529656648635864, |
|
"rewards/rejected": -0.10004711896181107, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5724770642201835, |
|
"grad_norm": 2.1410698890686035, |
|
"learning_rate": 4.875649517749985e-06, |
|
"logits/chosen": -1.0425585508346558, |
|
"logits/rejected": -2.1876349449157715, |
|
"logps/chosen": -281.6705017089844, |
|
"logps/rejected": -191.27613830566406, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.3646804392337799, |
|
"rewards/margins": 0.4775218367576599, |
|
"rewards/rejected": -0.11284142732620239, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"grad_norm": 1.8346776962280273, |
|
"learning_rate": 4.861971291236772e-06, |
|
"logits/chosen": -1.1346731185913086, |
|
"logits/rejected": -2.0539989471435547, |
|
"logps/chosen": -327.99591064453125, |
|
"logps/rejected": -191.64602661132812, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.4345476031303406, |
|
"rewards/margins": 0.48541808128356934, |
|
"rewards/rejected": -0.050870493054389954, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6018348623853211, |
|
"grad_norm": 2.0787250995635986, |
|
"learning_rate": 4.847600769054201e-06, |
|
"logits/chosen": -1.1712466478347778, |
|
"logits/rejected": -2.0765137672424316, |
|
"logps/chosen": -364.9358215332031, |
|
"logps/rejected": -221.4823455810547, |
|
"loss": 0.4574, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.49422764778137207, |
|
"rewards/margins": 0.5797656178474426, |
|
"rewards/rejected": -0.08553799241781235, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6165137614678899, |
|
"grad_norm": 1.8402867317199707, |
|
"learning_rate": 4.832542163213787e-06, |
|
"logits/chosen": -1.0152747631072998, |
|
"logits/rejected": -2.2040934562683105, |
|
"logps/chosen": -260.55572509765625, |
|
"logps/rejected": -155.7264862060547, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.463802695274353, |
|
"rewards/margins": 0.6095759868621826, |
|
"rewards/rejected": -0.14577320218086243, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6311926605504588, |
|
"grad_norm": 1.792264461517334, |
|
"learning_rate": 4.816799887404911e-06, |
|
"logits/chosen": -1.215497374534607, |
|
"logits/rejected": -2.1564512252807617, |
|
"logps/chosen": -299.83013916015625, |
|
"logps/rejected": -185.66973876953125, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.4826025366783142, |
|
"rewards/margins": 0.5931094884872437, |
|
"rewards/rejected": -0.11050693690776825, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6458715596330276, |
|
"grad_norm": 1.752100944519043, |
|
"learning_rate": 4.800378555701168e-06, |
|
"logits/chosen": -1.0534290075302124, |
|
"logits/rejected": -2.0158798694610596, |
|
"logps/chosen": -352.815185546875, |
|
"logps/rejected": -186.691162109375, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.5516682267189026, |
|
"rewards/margins": 0.705946683883667, |
|
"rewards/rejected": -0.15427842736244202, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 1.9027481079101562, |
|
"learning_rate": 4.783282981207979e-06, |
|
"logits/chosen": -1.096940040588379, |
|
"logits/rejected": -2.289912462234497, |
|
"logps/chosen": -295.15185546875, |
|
"logps/rejected": -169.89637756347656, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.5667621493339539, |
|
"rewards/margins": 0.7420346736907959, |
|
"rewards/rejected": -0.17527249455451965, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6752293577981652, |
|
"grad_norm": 1.6681158542633057, |
|
"learning_rate": 4.765518174651864e-06, |
|
"logits/chosen": -1.1006022691726685, |
|
"logits/rejected": -2.068861484527588, |
|
"logps/chosen": -284.75360107421875, |
|
"logps/rejected": -190.65567016601562, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.5725142359733582, |
|
"rewards/margins": 0.767882227897644, |
|
"rewards/rejected": -0.1953679472208023, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.689908256880734, |
|
"grad_norm": 1.751624584197998, |
|
"learning_rate": 4.747089342911793e-06, |
|
"logits/chosen": -0.9587563276290894, |
|
"logits/rejected": -2.189159393310547, |
|
"logps/chosen": -289.97772216796875, |
|
"logps/rejected": -175.44686889648438, |
|
"loss": 0.3614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6674075722694397, |
|
"rewards/margins": 0.8871376514434814, |
|
"rewards/rejected": -0.21973000466823578, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7045871559633028, |
|
"grad_norm": 1.5681990385055542, |
|
"learning_rate": 4.728001887493048e-06, |
|
"logits/chosen": -0.9651382565498352, |
|
"logits/rejected": -2.1765432357788086, |
|
"logps/chosen": -297.8235168457031, |
|
"logps/rejected": -194.53585815429688, |
|
"loss": 0.3691, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.7080159783363342, |
|
"rewards/margins": 0.8855035901069641, |
|
"rewards/rejected": -0.17748761177062988, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7192660550458716, |
|
"grad_norm": 1.5139466524124146, |
|
"learning_rate": 4.708261402944036e-06, |
|
"logits/chosen": -1.0541656017303467, |
|
"logits/rejected": -2.1506094932556152, |
|
"logps/chosen": -313.3412780761719, |
|
"logps/rejected": -188.76458740234375, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.7788289785385132, |
|
"rewards/margins": 1.0398166179656982, |
|
"rewards/rejected": -0.2609875798225403, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 1.5227373838424683, |
|
"learning_rate": 4.687873675216522e-06, |
|
"logits/chosen": -0.9371960163116455, |
|
"logits/rejected": -1.9936003684997559, |
|
"logps/chosen": -300.81158447265625, |
|
"logps/rejected": -199.56732177734375, |
|
"loss": 0.3424, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.8187399506568909, |
|
"rewards/margins": 1.027111530303955, |
|
"rewards/rejected": -0.2083716094493866, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7486238532110092, |
|
"grad_norm": 1.450157642364502, |
|
"learning_rate": 4.666844679969765e-06, |
|
"logits/chosen": -1.2729206085205078, |
|
"logits/rejected": -2.29646372795105, |
|
"logps/chosen": -297.16717529296875, |
|
"logps/rejected": -209.1322021484375, |
|
"loss": 0.3164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7305983304977417, |
|
"rewards/margins": 1.0814378261566162, |
|
"rewards/rejected": -0.35083937644958496, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.763302752293578, |
|
"grad_norm": 1.4192078113555908, |
|
"learning_rate": 4.6451805808190464e-06, |
|
"logits/chosen": -1.0219098329544067, |
|
"logits/rejected": -2.149144411087036, |
|
"logps/chosen": -281.9839782714844, |
|
"logps/rejected": -176.75775146484375, |
|
"loss": 0.2955, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7969125509262085, |
|
"rewards/margins": 1.1675889492034912, |
|
"rewards/rejected": -0.3706764280796051, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7779816513761468, |
|
"grad_norm": 1.3653980493545532, |
|
"learning_rate": 4.622887727529104e-06, |
|
"logits/chosen": -1.0368492603302002, |
|
"logits/rejected": -2.1240074634552, |
|
"logps/chosen": -255.23117065429688, |
|
"logps/rejected": -207.93202209472656, |
|
"loss": 0.2807, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.800532341003418, |
|
"rewards/margins": 1.2377490997314453, |
|
"rewards/rejected": -0.4372166097164154, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7926605504587156, |
|
"grad_norm": 1.2262941598892212, |
|
"learning_rate": 4.599972654153018e-06, |
|
"logits/chosen": -0.8901413679122925, |
|
"logits/rejected": -2.1084232330322266, |
|
"logps/chosen": -298.4986572265625, |
|
"logps/rejected": -174.66802978515625, |
|
"loss": 0.2646, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9371631145477295, |
|
"rewards/margins": 1.313105821609497, |
|
"rewards/rejected": -0.37594282627105713, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8073394495412844, |
|
"grad_norm": 1.2946897745132446, |
|
"learning_rate": 4.5764420771170735e-06, |
|
"logits/chosen": -0.9344348907470703, |
|
"logits/rejected": -2.080475091934204, |
|
"logps/chosen": -275.98651123046875, |
|
"logps/rejected": -193.67861938476562, |
|
"loss": 0.2753, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8512506484985352, |
|
"rewards/margins": 1.311413288116455, |
|
"rewards/rejected": -0.4601626992225647, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8220183486238533, |
|
"grad_norm": 1.3371607065200806, |
|
"learning_rate": 4.552302893252166e-06, |
|
"logits/chosen": -1.2021628618240356, |
|
"logits/rejected": -2.230822801589966, |
|
"logps/chosen": -303.2996826171875, |
|
"logps/rejected": -206.12570190429688, |
|
"loss": 0.2712, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8574119806289673, |
|
"rewards/margins": 1.2879098653793335, |
|
"rewards/rejected": -0.43049782514572144, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8366972477064221, |
|
"grad_norm": 1.410147786140442, |
|
"learning_rate": 4.52756217777234e-06, |
|
"logits/chosen": -1.2043006420135498, |
|
"logits/rejected": -2.198638677597046, |
|
"logps/chosen": -308.4309997558594, |
|
"logps/rejected": -208.2371063232422, |
|
"loss": 0.2744, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9266740083694458, |
|
"rewards/margins": 1.3231022357940674, |
|
"rewards/rejected": -0.39642831683158875, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8513761467889909, |
|
"grad_norm": 1.2775070667266846, |
|
"learning_rate": 4.502227182201035e-06, |
|
"logits/chosen": -0.9093916416168213, |
|
"logits/rejected": -2.001713275909424, |
|
"logps/chosen": -261.07958984375, |
|
"logps/rejected": -176.1920928955078, |
|
"loss": 0.2392, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9916579127311707, |
|
"rewards/margins": 1.4740495681762695, |
|
"rewards/rejected": -0.48239168524742126, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.8660550458715597, |
|
"grad_norm": 1.07257080078125, |
|
"learning_rate": 4.476305332245662e-06, |
|
"logits/chosen": -1.0554779767990112, |
|
"logits/rejected": -2.3410415649414062, |
|
"logps/chosen": -311.1431579589844, |
|
"logps/rejected": -154.0364990234375, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.9677338600158691, |
|
"rewards/margins": 1.5362495183944702, |
|
"rewards/rejected": -0.5685155987739563, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 1.242690920829773, |
|
"learning_rate": 4.449804225621116e-06, |
|
"logits/chosen": -0.9921204447746277, |
|
"logits/rejected": -2.1049680709838867, |
|
"logps/chosen": -275.8395080566406, |
|
"logps/rejected": -181.99395751953125, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.942889928817749, |
|
"rewards/margins": 1.4441044330596924, |
|
"rewards/rejected": -0.5012143850326538, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8954128440366973, |
|
"grad_norm": 1.1715214252471924, |
|
"learning_rate": 4.422731629822887e-06, |
|
"logits/chosen": -0.9075473546981812, |
|
"logits/rejected": -2.0095391273498535, |
|
"logps/chosen": -311.2561950683594, |
|
"logps/rejected": -195.94952392578125, |
|
"loss": 0.2366, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9800222516059875, |
|
"rewards/margins": 1.5522234439849854, |
|
"rewards/rejected": -0.5722010135650635, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9100917431192661, |
|
"grad_norm": 1.1240105628967285, |
|
"learning_rate": 4.395095479850396e-06, |
|
"logits/chosen": -0.9207634329795837, |
|
"logits/rejected": -1.925203561782837, |
|
"logps/chosen": -284.2423400878906, |
|
"logps/rejected": -188.70021057128906, |
|
"loss": 0.2474, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.000288963317871, |
|
"rewards/margins": 1.5673273801803589, |
|
"rewards/rejected": -0.5670384168624878, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.9247706422018349, |
|
"grad_norm": 1.010899543762207, |
|
"learning_rate": 4.366903875881243e-06, |
|
"logits/chosen": -1.044604778289795, |
|
"logits/rejected": -2.3711750507354736, |
|
"logps/chosen": -271.86798095703125, |
|
"logps/rejected": -167.09661865234375, |
|
"loss": 0.1996, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.972938060760498, |
|
"rewards/margins": 1.7551511526107788, |
|
"rewards/rejected": -0.7822130918502808, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9394495412844037, |
|
"grad_norm": 1.1042369604110718, |
|
"learning_rate": 4.3381650808970365e-06, |
|
"logits/chosen": -0.9873514771461487, |
|
"logits/rejected": -1.9928994178771973, |
|
"logps/chosen": -251.29678344726562, |
|
"logps/rejected": -188.0851593017578, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.9701035618782043, |
|
"rewards/margins": 1.6142561435699463, |
|
"rewards/rejected": -0.6441525220870972, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9541284403669725, |
|
"grad_norm": 1.102792739868164, |
|
"learning_rate": 4.308887518261507e-06, |
|
"logits/chosen": -0.8256503343582153, |
|
"logits/rejected": -1.9561454057693481, |
|
"logps/chosen": -274.3543701171875, |
|
"logps/rejected": -197.478271484375, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.111090898513794, |
|
"rewards/margins": 1.742661476135254, |
|
"rewards/rejected": -0.6315708160400391, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9688073394495413, |
|
"grad_norm": 1.0662490129470825, |
|
"learning_rate": 4.279079769251617e-06, |
|
"logits/chosen": -1.1610325574874878, |
|
"logits/rejected": -2.2200121879577637, |
|
"logps/chosen": -346.84844970703125, |
|
"logps/rejected": -213.77944946289062, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1105481386184692, |
|
"rewards/margins": 1.8311288356781006, |
|
"rewards/rejected": -0.720580518245697, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9834862385321101, |
|
"grad_norm": 0.9257935881614685, |
|
"learning_rate": 4.248750570542373e-06, |
|
"logits/chosen": -0.9382209777832031, |
|
"logits/rejected": -2.100569725036621, |
|
"logps/chosen": -268.1485290527344, |
|
"logps/rejected": -182.8208465576172, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0590834617614746, |
|
"rewards/margins": 1.7893319129943848, |
|
"rewards/rejected": -0.7302483320236206, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.998165137614679, |
|
"grad_norm": 0.9342382550239563, |
|
"learning_rate": 4.21790881164611e-06, |
|
"logits/chosen": -0.87558913230896, |
|
"logits/rejected": -2.136240005493164, |
|
"logps/chosen": -278.4149475097656, |
|
"logps/rejected": -198.23118591308594, |
|
"loss": 0.181, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.130152940750122, |
|
"rewards/margins": 2.0675582885742188, |
|
"rewards/rejected": -0.9374052286148071, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.0128440366972478, |
|
"grad_norm": 0.9632888436317444, |
|
"learning_rate": 4.186563532306957e-06, |
|
"logits/chosen": -0.858898937702179, |
|
"logits/rejected": -2.086756706237793, |
|
"logps/chosen": -285.22412109375, |
|
"logps/rejected": -172.19354248046875, |
|
"loss": 0.1752, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.044780969619751, |
|
"rewards/margins": 1.960892677307129, |
|
"rewards/rejected": -0.9161118865013123, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.0275229357798166, |
|
"grad_norm": 1.0304391384124756, |
|
"learning_rate": 4.154723919851291e-06, |
|
"logits/chosen": -1.0367554426193237, |
|
"logits/rejected": -2.1234500408172607, |
|
"logps/chosen": -286.8799133300781, |
|
"logps/rejected": -177.22369384765625, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9602662324905396, |
|
"rewards/margins": 1.8099737167358398, |
|
"rewards/rejected": -0.8497075438499451, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0422018348623854, |
|
"grad_norm": 0.8776561617851257, |
|
"learning_rate": 4.122399306494918e-06, |
|
"logits/chosen": -1.0474900007247925, |
|
"logits/rejected": -2.2810702323913574, |
|
"logps/chosen": -331.9571533203125, |
|
"logps/rejected": -202.68959045410156, |
|
"loss": 0.171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0891458988189697, |
|
"rewards/margins": 2.000492572784424, |
|
"rewards/rejected": -0.9113465547561646, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0568807339449542, |
|
"grad_norm": 0.7959941029548645, |
|
"learning_rate": 4.089599166607794e-06, |
|
"logits/chosen": -1.0148186683654785, |
|
"logits/rejected": -2.022637128829956, |
|
"logps/chosen": -287.8388671875, |
|
"logps/rejected": -192.57476806640625, |
|
"loss": 0.1359, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.0807700157165527, |
|
"rewards/margins": 2.295886754989624, |
|
"rewards/rejected": -1.2151165008544922, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.071559633027523, |
|
"grad_norm": 0.8457261919975281, |
|
"learning_rate": 4.05633311393708e-06, |
|
"logits/chosen": -0.8852999210357666, |
|
"logits/rejected": -2.0398194789886475, |
|
"logps/chosen": -253.7115936279297, |
|
"logps/rejected": -176.87721252441406, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0683624744415283, |
|
"rewards/margins": 2.0258071422576904, |
|
"rewards/rejected": -0.9574447870254517, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.0862385321100918, |
|
"grad_norm": 0.8683717250823975, |
|
"learning_rate": 4.022610898789349e-06, |
|
"logits/chosen": -0.9221259355545044, |
|
"logits/rejected": -2.1102771759033203, |
|
"logps/chosen": -261.9385681152344, |
|
"logps/rejected": -191.73699951171875, |
|
"loss": 0.1444, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1373392343521118, |
|
"rewards/margins": 2.214506149291992, |
|
"rewards/rejected": -1.07716703414917, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 0.9450275301933289, |
|
"learning_rate": 3.988442405172755e-06, |
|
"logits/chosen": -0.7829115390777588, |
|
"logits/rejected": -2.009986400604248, |
|
"logps/chosen": -276.32489013671875, |
|
"logps/rejected": -207.25306701660156, |
|
"loss": 0.131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2793267965316772, |
|
"rewards/margins": 2.3499927520751953, |
|
"rewards/rejected": -1.0706660747528076, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1155963302752294, |
|
"grad_norm": 0.7044640183448792, |
|
"learning_rate": 3.953837647900031e-06, |
|
"logits/chosen": -0.8685592412948608, |
|
"logits/rejected": -2.114327907562256, |
|
"logps/chosen": -268.39666748046875, |
|
"logps/rejected": -201.62606811523438, |
|
"loss": 0.1375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2952916622161865, |
|
"rewards/margins": 2.458449363708496, |
|
"rewards/rejected": -1.1631580591201782, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.1302752293577982, |
|
"grad_norm": 0.9467131495475769, |
|
"learning_rate": 3.918806769653135e-06, |
|
"logits/chosen": -0.7687999606132507, |
|
"logits/rejected": -2.0185675621032715, |
|
"logps/chosen": -313.5120544433594, |
|
"logps/rejected": -201.3844757080078, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2509111166000366, |
|
"rewards/margins": 2.3543107509613037, |
|
"rewards/rejected": -1.1033998727798462, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.144954128440367, |
|
"grad_norm": 0.9374696612358093, |
|
"learning_rate": 3.88336003801042e-06, |
|
"logits/chosen": -0.8394186496734619, |
|
"logits/rejected": -2.057039260864258, |
|
"logps/chosen": -250.6929168701172, |
|
"logps/rejected": -183.453857421875, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0716910362243652, |
|
"rewards/margins": 2.1432347297668457, |
|
"rewards/rejected": -1.0715439319610596, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.1596330275229358, |
|
"grad_norm": 0.7151435613632202, |
|
"learning_rate": 3.847507842437205e-06, |
|
"logits/chosen": -0.7416910529136658, |
|
"logits/rejected": -2.1064770221710205, |
|
"logps/chosen": -291.7624206542969, |
|
"logps/rejected": -178.61636352539062, |
|
"loss": 0.1197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2456002235412598, |
|
"rewards/margins": 2.5829920768737793, |
|
"rewards/rejected": -1.3373918533325195, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 0.9140924215316772, |
|
"learning_rate": 3.811260691240604e-06, |
|
"logits/chosen": -0.7726842164993286, |
|
"logits/rejected": -2.1016135215759277, |
|
"logps/chosen": -334.30889892578125, |
|
"logps/rejected": -194.9357452392578, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3805341720581055, |
|
"rewards/margins": 2.567413806915283, |
|
"rewards/rejected": -1.1868797540664673, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1889908256880735, |
|
"grad_norm": 0.8678433895111084, |
|
"learning_rate": 3.774629208489547e-06, |
|
"logits/chosen": -0.8398001194000244, |
|
"logits/rejected": -2.0928659439086914, |
|
"logps/chosen": -237.24533081054688, |
|
"logps/rejected": -179.15646362304688, |
|
"loss": 0.1365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.10520339012146, |
|
"rewards/margins": 2.2576396465301514, |
|
"rewards/rejected": -1.1524361371994019, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.2036697247706423, |
|
"grad_norm": 0.8694548606872559, |
|
"learning_rate": 3.7376241309008433e-06, |
|
"logits/chosen": -0.9950603246688843, |
|
"logits/rejected": -2.11771297454834, |
|
"logps/chosen": -321.5704040527344, |
|
"logps/rejected": -189.8595428466797, |
|
"loss": 0.132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3039062023162842, |
|
"rewards/margins": 2.5127134323120117, |
|
"rewards/rejected": -1.208807349205017, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.218348623853211, |
|
"grad_norm": 0.6600338816642761, |
|
"learning_rate": 3.7002563046922502e-06, |
|
"logits/chosen": -0.9616975784301758, |
|
"logits/rejected": -2.2365851402282715, |
|
"logps/chosen": -321.541015625, |
|
"logps/rejected": -181.15940856933594, |
|
"loss": 0.101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2826290130615234, |
|
"rewards/margins": 2.782111883163452, |
|
"rewards/rejected": -1.4994828701019287, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.2330275229357799, |
|
"grad_norm": 0.5339725017547607, |
|
"learning_rate": 3.6625366824034337e-06, |
|
"logits/chosen": -0.7237181663513184, |
|
"logits/rejected": -2.069546699523926, |
|
"logps/chosen": -274.1114196777344, |
|
"logps/rejected": -215.94541931152344, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.363397240638733, |
|
"rewards/margins": 2.990330934524536, |
|
"rewards/rejected": -1.6269338130950928, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.2477064220183487, |
|
"grad_norm": 0.8492485284805298, |
|
"learning_rate": 3.6244763196857714e-06, |
|
"logits/chosen": -0.8532748222351074, |
|
"logits/rejected": -2.129385471343994, |
|
"logps/chosen": -291.3846435546875, |
|
"logps/rejected": -189.8374481201172, |
|
"loss": 0.1186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3079031705856323, |
|
"rewards/margins": 2.8324928283691406, |
|
"rewards/rejected": -1.5245895385742188, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2623853211009175, |
|
"grad_norm": 0.7831862568855286, |
|
"learning_rate": 3.5860863720619333e-06, |
|
"logits/chosen": -0.8818280696868896, |
|
"logits/rejected": -2.078712224960327, |
|
"logps/chosen": -283.1339111328125, |
|
"logps/rejected": -191.44334411621094, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.4618873596191406, |
|
"rewards/margins": 2.6762702465057373, |
|
"rewards/rejected": -1.2143830060958862, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2770642201834863, |
|
"grad_norm": 0.7691733837127686, |
|
"learning_rate": 3.547378091656186e-06, |
|
"logits/chosen": -0.7506787776947021, |
|
"logits/rejected": -2.08036208152771, |
|
"logps/chosen": -288.89703369140625, |
|
"logps/rejected": -182.05838012695312, |
|
"loss": 0.1012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2226263284683228, |
|
"rewards/margins": 2.7687463760375977, |
|
"rewards/rejected": -1.546120047569275, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.2917431192660551, |
|
"grad_norm": 1.1186165809631348, |
|
"learning_rate": 3.5083628238963913e-06, |
|
"logits/chosen": -0.9391432404518127, |
|
"logits/rejected": -1.9351601600646973, |
|
"logps/chosen": -230.03585815429688, |
|
"logps/rejected": -183.17974853515625, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.181043028831482, |
|
"rewards/margins": 2.5483238697052, |
|
"rewards/rejected": -1.3672808408737183, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.306422018348624, |
|
"grad_norm": 0.7178276777267456, |
|
"learning_rate": 3.4690520041886473e-06, |
|
"logits/chosen": -0.7520254254341125, |
|
"logits/rejected": -2.0339462757110596, |
|
"logps/chosen": -270.16693115234375, |
|
"logps/rejected": -220.48068237304688, |
|
"loss": 0.108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.292816162109375, |
|
"rewards/margins": 2.7162489891052246, |
|
"rewards/rejected": -1.423432469367981, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 0.4474305212497711, |
|
"learning_rate": 3.4294571545655653e-06, |
|
"logits/chosen": -0.7533161640167236, |
|
"logits/rejected": -2.217989444732666, |
|
"logps/chosen": -287.7424011230469, |
|
"logps/rejected": -190.35292053222656, |
|
"loss": 0.073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3982596397399902, |
|
"rewards/margins": 3.1139888763427734, |
|
"rewards/rejected": -1.7157293558120728, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3357798165137615, |
|
"grad_norm": 0.6046783924102783, |
|
"learning_rate": 3.38958988030915e-06, |
|
"logits/chosen": -1.0535500049591064, |
|
"logits/rejected": -2.043179988861084, |
|
"logps/chosen": -269.7599792480469, |
|
"logps/rejected": -233.5879669189453, |
|
"loss": 0.1254, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.2435075044631958, |
|
"rewards/margins": 2.7820754051208496, |
|
"rewards/rejected": -1.5385677814483643, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.3504587155963304, |
|
"grad_norm": 0.8419710397720337, |
|
"learning_rate": 3.3494618665492833e-06, |
|
"logits/chosen": -0.9595227241516113, |
|
"logits/rejected": -1.9984383583068848, |
|
"logps/chosen": -251.54937744140625, |
|
"logps/rejected": -200.65310668945312, |
|
"loss": 0.1435, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0583308935165405, |
|
"rewards/margins": 2.44592022895813, |
|
"rewards/rejected": -1.387589454650879, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.3651376146788992, |
|
"grad_norm": 0.54781574010849, |
|
"learning_rate": 3.3090848748388042e-06, |
|
"logits/chosen": -0.8385530114173889, |
|
"logits/rejected": -2.110614776611328, |
|
"logps/chosen": -348.93280029296875, |
|
"logps/rejected": -204.1185760498047, |
|
"loss": 0.085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1977351903915405, |
|
"rewards/margins": 3.080059051513672, |
|
"rewards/rejected": -1.8823240995407104, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.379816513761468, |
|
"grad_norm": 0.6081674695014954, |
|
"learning_rate": 3.2684707397061887e-06, |
|
"logits/chosen": -0.9298804998397827, |
|
"logits/rejected": -2.0643250942230225, |
|
"logps/chosen": -287.8137512207031, |
|
"logps/rejected": -182.94764709472656, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2941851615905762, |
|
"rewards/margins": 2.83797287940979, |
|
"rewards/rejected": -1.5437875986099243, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.3944954128440368, |
|
"grad_norm": 0.6791523694992065, |
|
"learning_rate": 3.2276313651868364e-06, |
|
"logits/chosen": -0.7827736735343933, |
|
"logits/rejected": -2.0808167457580566, |
|
"logps/chosen": -292.70318603515625, |
|
"logps/rejected": -172.2096405029297, |
|
"loss": 0.1022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2332713603973389, |
|
"rewards/margins": 2.892195463180542, |
|
"rewards/rejected": -1.6589239835739136, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.4091743119266056, |
|
"grad_norm": 0.5124452114105225, |
|
"learning_rate": 3.1865787213339926e-06, |
|
"logits/chosen": -0.8003594875335693, |
|
"logits/rejected": -2.0693812370300293, |
|
"logps/chosen": -276.2262268066406, |
|
"logps/rejected": -197.57984924316406, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3064976930618286, |
|
"rewards/margins": 3.1538498401641846, |
|
"rewards/rejected": -1.8473519086837769, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.4238532110091744, |
|
"grad_norm": 0.6072111129760742, |
|
"learning_rate": 3.1453248407103156e-06, |
|
"logits/chosen": -0.8399981260299683, |
|
"logits/rejected": -2.1043572425842285, |
|
"logps/chosen": -283.270263671875, |
|
"logps/rejected": -180.08139038085938, |
|
"loss": 0.0991, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1119271516799927, |
|
"rewards/margins": 2.881303310394287, |
|
"rewards/rejected": -1.7693761587142944, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.4385321100917432, |
|
"grad_norm": 0.7273401618003845, |
|
"learning_rate": 3.1038818148611178e-06, |
|
"logits/chosen": -0.8482731580734253, |
|
"logits/rejected": -1.9760737419128418, |
|
"logps/chosen": -307.0271911621094, |
|
"logps/rejected": -192.59413146972656, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2951596975326538, |
|
"rewards/margins": 3.0548951625823975, |
|
"rewards/rejected": -1.7597354650497437, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.453211009174312, |
|
"grad_norm": 0.7552217841148376, |
|
"learning_rate": 3.062261790770331e-06, |
|
"logits/chosen": -0.732867956161499, |
|
"logits/rejected": -1.9417576789855957, |
|
"logps/chosen": -254.77919006347656, |
|
"logps/rejected": -191.59266662597656, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.2346166372299194, |
|
"rewards/margins": 2.7373769283294678, |
|
"rewards/rejected": -1.5027600526809692, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 0.49851715564727783, |
|
"learning_rate": 3.0204769673002123e-06, |
|
"logits/chosen": -0.7962568402290344, |
|
"logits/rejected": -2.082612991333008, |
|
"logps/chosen": -328.71258544921875, |
|
"logps/rejected": -208.73855590820312, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.319938063621521, |
|
"rewards/margins": 3.0253965854644775, |
|
"rewards/rejected": -1.7054588794708252, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4825688073394496, |
|
"grad_norm": 0.5871180891990662, |
|
"learning_rate": 2.978539591615848e-06, |
|
"logits/chosen": -0.8652533292770386, |
|
"logits/rejected": -1.8598251342773438, |
|
"logps/chosen": -295.21002197265625, |
|
"logps/rejected": -208.55722045898438, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.0787442922592163, |
|
"rewards/margins": 3.0431761741638184, |
|
"rewards/rejected": -1.964431881904602, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.4972477064220184, |
|
"grad_norm": 0.6797307729721069, |
|
"learning_rate": 2.936461955595501e-06, |
|
"logits/chosen": -0.8632673025131226, |
|
"logits/rejected": -2.0839812755584717, |
|
"logps/chosen": -293.5806884765625, |
|
"logps/rejected": -202.08558654785156, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3460842370986938, |
|
"rewards/margins": 3.0369350910186768, |
|
"rewards/rejected": -1.6908507347106934, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.5119266055045872, |
|
"grad_norm": 0.4843383729457855, |
|
"learning_rate": 2.8942563922278487e-06, |
|
"logits/chosen": -0.8488835096359253, |
|
"logits/rejected": -2.1049342155456543, |
|
"logps/chosen": -281.8902893066406, |
|
"logps/rejected": -209.70303344726562, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.224290132522583, |
|
"rewards/margins": 3.3286895751953125, |
|
"rewards/rejected": -2.1043996810913086, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.526605504587156, |
|
"grad_norm": 0.9614333510398865, |
|
"learning_rate": 2.8519352719971783e-06, |
|
"logits/chosen": -0.9178900122642517, |
|
"logits/rejected": -2.042919158935547, |
|
"logps/chosen": -312.1871643066406, |
|
"logps/rejected": -213.3470458984375, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.297778844833374, |
|
"rewards/margins": 3.1191961765289307, |
|
"rewards/rejected": -1.8214173316955566, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 0.5379541516304016, |
|
"learning_rate": 2.8095109992575824e-06, |
|
"logits/chosen": -0.7868439555168152, |
|
"logits/rejected": -2.0428073406219482, |
|
"logps/chosen": -323.072265625, |
|
"logps/rejected": -213.02218627929688, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4688664674758911, |
|
"rewards/margins": 3.2576310634613037, |
|
"rewards/rejected": -1.7887648344039917, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.5559633027522937, |
|
"grad_norm": 0.6083974838256836, |
|
"learning_rate": 2.7669960085972407e-06, |
|
"logits/chosen": -0.7467061877250671, |
|
"logits/rejected": -2.1958773136138916, |
|
"logps/chosen": -346.7701416015625, |
|
"logps/rejected": -232.30908203125, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3543357849121094, |
|
"rewards/margins": 3.2890334129333496, |
|
"rewards/rejected": -1.9346972703933716, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.5706422018348625, |
|
"grad_norm": 0.7258363366127014, |
|
"learning_rate": 2.7244027611938247e-06, |
|
"logits/chosen": -0.6547941565513611, |
|
"logits/rejected": -1.8969395160675049, |
|
"logps/chosen": -247.0142364501953, |
|
"logps/rejected": -232.2767791748047, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1799771785736084, |
|
"rewards/margins": 3.0688834190368652, |
|
"rewards/rejected": -1.8889062404632568, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.5853211009174313, |
|
"grad_norm": 0.8645861148834229, |
|
"learning_rate": 2.6817437411621194e-06, |
|
"logits/chosen": -0.783694863319397, |
|
"logits/rejected": -2.0017306804656982, |
|
"logps/chosen": -338.4605712890625, |
|
"logps/rejected": -248.77426147460938, |
|
"loss": 0.0917, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2730180025100708, |
|
"rewards/margins": 3.0962510108947754, |
|
"rewards/rejected": -1.8232333660125732, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.5792384147644043, |
|
"learning_rate": 2.639031451894923e-06, |
|
"logits/chosen": -0.8397828936576843, |
|
"logits/rejected": -1.8498985767364502, |
|
"logps/chosen": -324.8052062988281, |
|
"logps/rejected": -234.82046508789062, |
|
"loss": 0.0681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.378151774406433, |
|
"rewards/margins": 3.3498945236206055, |
|
"rewards/rejected": -1.971742868423462, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.614678899082569, |
|
"grad_norm": 0.5888641476631165, |
|
"learning_rate": 2.5962784123982843e-06, |
|
"logits/chosen": -0.8491584062576294, |
|
"logits/rejected": -2.119868278503418, |
|
"logps/chosen": -301.45770263671875, |
|
"logps/rejected": -212.421142578125, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.212162971496582, |
|
"rewards/margins": 3.384962320327759, |
|
"rewards/rejected": -2.172799587249756, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.6293577981651377, |
|
"grad_norm": 0.5032806396484375, |
|
"learning_rate": 2.5534971536221804e-06, |
|
"logits/chosen": -0.6542560458183289, |
|
"logits/rejected": -1.8956093788146973, |
|
"logps/chosen": -265.0072937011719, |
|
"logps/rejected": -204.949462890625, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1131190061569214, |
|
"rewards/margins": 3.218719244003296, |
|
"rewards/rejected": -2.105600595474243, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.6440366972477065, |
|
"grad_norm": 0.4768834114074707, |
|
"learning_rate": 2.5107002147876814e-06, |
|
"logits/chosen": -0.8108365535736084, |
|
"logits/rejected": -1.8424029350280762, |
|
"logps/chosen": -258.41229248046875, |
|
"logps/rejected": -219.92779541015625, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2540631294250488, |
|
"rewards/margins": 3.3726139068603516, |
|
"rewards/rejected": -2.1185507774353027, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.6587155963302753, |
|
"grad_norm": 0.8125916719436646, |
|
"learning_rate": 2.467900139711693e-06, |
|
"logits/chosen": -0.8619644045829773, |
|
"logits/rejected": -1.9385554790496826, |
|
"logps/chosen": -269.68865966796875, |
|
"logps/rejected": -210.65516662597656, |
|
"loss": 0.1102, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.0213913917541504, |
|
"rewards/margins": 3.1285760402679443, |
|
"rewards/rejected": -2.107184648513794, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.6733944954128441, |
|
"grad_norm": 0.3927309215068817, |
|
"learning_rate": 2.4251094731303586e-06, |
|
"logits/chosen": -0.7270049452781677, |
|
"logits/rejected": -2.04742169380188, |
|
"logps/chosen": -285.7385559082031, |
|
"logps/rejected": -192.58218383789062, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.411180853843689, |
|
"rewards/margins": 3.363067388534546, |
|
"rewards/rejected": -1.9518864154815674, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.688073394495413, |
|
"grad_norm": 0.3921149671077728, |
|
"learning_rate": 2.3823407570221812e-06, |
|
"logits/chosen": -0.6392618417739868, |
|
"logits/rejected": -1.9726002216339111, |
|
"logps/chosen": -295.4417724609375, |
|
"logps/rejected": -188.51336669921875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3102056980133057, |
|
"rewards/margins": 3.34782338142395, |
|
"rewards/rejected": -2.0376174449920654, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.7027522935779817, |
|
"grad_norm": 0.534637451171875, |
|
"learning_rate": 2.3396065269319655e-06, |
|
"logits/chosen": -0.8075351119041443, |
|
"logits/rejected": -2.0515084266662598, |
|
"logps/chosen": -295.063720703125, |
|
"logps/rejected": -186.2789306640625, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3271912336349487, |
|
"rewards/margins": 3.409841537475586, |
|
"rewards/rejected": -2.0826501846313477, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.7174311926605506, |
|
"grad_norm": 0.42420271039009094, |
|
"learning_rate": 2.2969193082966353e-06, |
|
"logits/chosen": -0.671249270439148, |
|
"logits/rejected": -1.996549367904663, |
|
"logps/chosen": -280.64324951171875, |
|
"logps/rejected": -205.07583618164062, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3049947023391724, |
|
"rewards/margins": 3.6353046894073486, |
|
"rewards/rejected": -2.330310106277466, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.7321100917431194, |
|
"grad_norm": 0.7015637755393982, |
|
"learning_rate": 2.2542916127740194e-06, |
|
"logits/chosen": -0.6518242955207825, |
|
"logits/rejected": -1.6868393421173096, |
|
"logps/chosen": -308.3757019042969, |
|
"logps/rejected": -248.41526794433594, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2519233226776123, |
|
"rewards/margins": 3.429176092147827, |
|
"rewards/rejected": -2.177252769470215, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.7467889908256882, |
|
"grad_norm": 0.5725772380828857, |
|
"learning_rate": 2.211735934575674e-06, |
|
"logits/chosen": -0.7443441152572632, |
|
"logits/rejected": -2.128143548965454, |
|
"logps/chosen": -278.10333251953125, |
|
"logps/rejected": -179.91464233398438, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0279340744018555, |
|
"rewards/margins": 3.215839385986328, |
|
"rewards/rejected": -2.1879053115844727, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 0.45834001898765564, |
|
"learning_rate": 2.1692647468048235e-06, |
|
"logits/chosen": -0.8754634857177734, |
|
"logits/rejected": -1.9457530975341797, |
|
"logps/chosen": -303.13446044921875, |
|
"logps/rejected": -224.45973205566406, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2197521924972534, |
|
"rewards/margins": 4.044127941131592, |
|
"rewards/rejected": -2.8243753910064697, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7761467889908258, |
|
"grad_norm": 0.4834798574447632, |
|
"learning_rate": 2.126890497800477e-06, |
|
"logits/chosen": -0.8580002784729004, |
|
"logits/rejected": -1.8683984279632568, |
|
"logps/chosen": -293.51300048828125, |
|
"logps/rejected": -214.88717651367188, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.166562557220459, |
|
"rewards/margins": 3.1886491775512695, |
|
"rewards/rejected": -2.0220866203308105, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.7908256880733946, |
|
"grad_norm": 0.5751946568489075, |
|
"learning_rate": 2.084625607488816e-06, |
|
"logits/chosen": -0.7191615700721741, |
|
"logits/rejected": -2.0266952514648438, |
|
"logps/chosen": -270.87384033203125, |
|
"logps/rejected": -203.34132385253906, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3117611408233643, |
|
"rewards/margins": 3.6484484672546387, |
|
"rewards/rejected": -2.3366873264312744, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.8055045871559634, |
|
"grad_norm": 0.5446330904960632, |
|
"learning_rate": 2.0424824637428995e-06, |
|
"logits/chosen": -0.7033808827400208, |
|
"logits/rejected": -2.186832904815674, |
|
"logps/chosen": -263.154296875, |
|
"logps/rejected": -186.73326110839844, |
|
"loss": 0.068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2053011655807495, |
|
"rewards/margins": 3.4986956119537354, |
|
"rewards/rejected": -2.2933943271636963, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.8201834862385322, |
|
"grad_norm": 0.6622535586357117, |
|
"learning_rate": 2.0004734187517744e-06, |
|
"logits/chosen": -0.8931563496589661, |
|
"logits/rejected": -1.8802121877670288, |
|
"logps/chosen": -314.3890686035156, |
|
"logps/rejected": -190.39952087402344, |
|
"loss": 0.083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.220291256904602, |
|
"rewards/margins": 3.367621421813965, |
|
"rewards/rejected": -2.1473300457000732, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.4107370972633362, |
|
"learning_rate": 1.9586107854000327e-06, |
|
"logits/chosen": -0.9159139394760132, |
|
"logits/rejected": -2.079981803894043, |
|
"logps/chosen": -292.71826171875, |
|
"logps/rejected": -183.3487091064453, |
|
"loss": 0.071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1953845024108887, |
|
"rewards/margins": 3.413740634918213, |
|
"rewards/rejected": -2.218356132507324, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8495412844036698, |
|
"grad_norm": 0.5233532786369324, |
|
"learning_rate": 1.916906833658899e-06, |
|
"logits/chosen": -0.6843887567520142, |
|
"logits/rejected": -2.013826370239258, |
|
"logps/chosen": -320.2293701171875, |
|
"logps/rejected": -232.86337280273438, |
|
"loss": 0.06, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2191388607025146, |
|
"rewards/margins": 3.6537911891937256, |
|
"rewards/rejected": -2.43465256690979, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.8642201834862386, |
|
"grad_norm": 0.5287185311317444, |
|
"learning_rate": 1.8753737869898921e-06, |
|
"logits/chosen": -0.7584778070449829, |
|
"logits/rejected": -1.9458154439926147, |
|
"logps/chosen": -245.53262329101562, |
|
"logps/rejected": -202.05838012695312, |
|
"loss": 0.06, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0267632007598877, |
|
"rewards/margins": 3.7624475955963135, |
|
"rewards/rejected": -2.735684394836426, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.8788990825688074, |
|
"grad_norm": 0.442032128572464, |
|
"learning_rate": 1.8340238187621185e-06, |
|
"logits/chosen": -0.6425713300704956, |
|
"logits/rejected": -1.9409232139587402, |
|
"logps/chosen": -258.7193603515625, |
|
"logps/rejected": -189.8154754638672, |
|
"loss": 0.0842, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1635607481002808, |
|
"rewards/margins": 3.324028491973877, |
|
"rewards/rejected": -2.1604676246643066, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.8935779816513763, |
|
"grad_norm": 0.5363203287124634, |
|
"learning_rate": 1.7928690486842438e-06, |
|
"logits/chosen": -0.8087460994720459, |
|
"logits/rejected": -2.033935070037842, |
|
"logps/chosen": -248.86526489257812, |
|
"logps/rejected": -174.26751708984375, |
|
"loss": 0.071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2420388460159302, |
|
"rewards/margins": 3.397740364074707, |
|
"rewards/rejected": -2.155701160430908, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.908256880733945, |
|
"grad_norm": 0.6694974899291992, |
|
"learning_rate": 1.7519215392522026e-06, |
|
"logits/chosen": -0.7507426142692566, |
|
"logits/rejected": -2.0721378326416016, |
|
"logps/chosen": -278.2518310546875, |
|
"logps/rejected": -182.0352325439453, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2583619356155396, |
|
"rewards/margins": 3.6089565753936768, |
|
"rewards/rejected": -2.350594997406006, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.9229357798165139, |
|
"grad_norm": 0.43020421266555786, |
|
"learning_rate": 1.7111932922136715e-06, |
|
"logits/chosen": -0.7629832625389099, |
|
"logits/rejected": -1.7456109523773193, |
|
"logps/chosen": -249.65966796875, |
|
"logps/rejected": -219.1207275390625, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0758057832717896, |
|
"rewards/margins": 3.561830759048462, |
|
"rewards/rejected": -2.486024856567383, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.9376146788990827, |
|
"grad_norm": 0.4130955934524536, |
|
"learning_rate": 1.6706962450503408e-06, |
|
"logits/chosen": -0.5795252919197083, |
|
"logits/rejected": -2.0015809535980225, |
|
"logps/chosen": -278.20465087890625, |
|
"logps/rejected": -207.60369873046875, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2261712551116943, |
|
"rewards/margins": 3.91609787940979, |
|
"rewards/rejected": -2.6899263858795166, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.9522935779816515, |
|
"grad_norm": 0.5304592251777649, |
|
"learning_rate": 1.630442267479034e-06, |
|
"logits/chosen": -0.5538889169692993, |
|
"logits/rejected": -1.865062952041626, |
|
"logps/chosen": -261.88433837890625, |
|
"logps/rejected": -214.9879150390625, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2872378826141357, |
|
"rewards/margins": 3.732686758041382, |
|
"rewards/rejected": -2.445448637008667, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.9669724770642203, |
|
"grad_norm": 0.3272631764411926, |
|
"learning_rate": 1.5904431579726837e-06, |
|
"logits/chosen": -0.6988632082939148, |
|
"logits/rejected": -2.0075268745422363, |
|
"logps/chosen": -292.2803039550781, |
|
"logps/rejected": -182.51699829101562, |
|
"loss": 0.061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0751233100891113, |
|
"rewards/margins": 3.618114948272705, |
|
"rewards/rejected": -2.5429916381835938, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 0.4741683304309845, |
|
"learning_rate": 1.5507106403021897e-06, |
|
"logits/chosen": -0.6511130928993225, |
|
"logits/rejected": -2.0427660942077637, |
|
"logps/chosen": -323.4988098144531, |
|
"logps/rejected": -221.62696838378906, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5597325563430786, |
|
"rewards/margins": 3.946735382080078, |
|
"rewards/rejected": -2.38700270652771, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.996330275229358, |
|
"grad_norm": 0.49209919571876526, |
|
"learning_rate": 1.511256360100171e-06, |
|
"logits/chosen": -0.6503832936286926, |
|
"logits/rejected": -2.046551465988159, |
|
"logps/chosen": -291.0798645019531, |
|
"logps/rejected": -208.11813354492188, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1303690671920776, |
|
"rewards/margins": 3.719075918197632, |
|
"rewards/rejected": -2.5887067317962646, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.0110091743119267, |
|
"grad_norm": 0.5563045144081116, |
|
"learning_rate": 1.4720918814476234e-06, |
|
"logits/chosen": -0.8647924065589905, |
|
"logits/rejected": -2.10542368888855, |
|
"logps/chosen": -252.02809143066406, |
|
"logps/rejected": -198.6781463623047, |
|
"loss": 0.0738, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.059165120124817, |
|
"rewards/margins": 3.9192636013031006, |
|
"rewards/rejected": -2.860098361968994, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.0256880733944955, |
|
"grad_norm": 0.5068049430847168, |
|
"learning_rate": 1.4332286834844792e-06, |
|
"logits/chosen": -0.8756486773490906, |
|
"logits/rejected": -2.04034161567688, |
|
"logps/chosen": -283.6759338378906, |
|
"logps/rejected": -205.1702423095703, |
|
"loss": 0.0666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0573828220367432, |
|
"rewards/margins": 3.5449130535125732, |
|
"rewards/rejected": -2.487530469894409, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0403669724770643, |
|
"grad_norm": 0.6067166924476624, |
|
"learning_rate": 1.3946781570450563e-06, |
|
"logits/chosen": -0.7339982986450195, |
|
"logits/rejected": -2.013446092605591, |
|
"logps/chosen": -298.65966796875, |
|
"logps/rejected": -213.3814697265625, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3606170415878296, |
|
"rewards/margins": 3.6802518367767334, |
|
"rewards/rejected": -2.319634437561035, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.055045871559633, |
|
"grad_norm": 0.3535687029361725, |
|
"learning_rate": 1.3564516013194023e-06, |
|
"logits/chosen": -0.5440888404846191, |
|
"logits/rejected": -1.890367031097412, |
|
"logps/chosen": -262.8106689453125, |
|
"logps/rejected": -203.97817993164062, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1478538513183594, |
|
"rewards/margins": 3.9146053791046143, |
|
"rewards/rejected": -2.766751766204834, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.069724770642202, |
|
"grad_norm": 0.5773036479949951, |
|
"learning_rate": 1.3185602205414894e-06, |
|
"logits/chosen": -0.7184356451034546, |
|
"logits/rejected": -1.9216872453689575, |
|
"logps/chosen": -266.2982482910156, |
|
"logps/rejected": -188.89344787597656, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1561676263809204, |
|
"rewards/margins": 3.482301712036133, |
|
"rewards/rejected": -2.326133966445923, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.0844036697247708, |
|
"grad_norm": 0.3960573077201843, |
|
"learning_rate": 1.2810151207052465e-06, |
|
"logits/chosen": -0.7768682241439819, |
|
"logits/rejected": -1.998164415359497, |
|
"logps/chosen": -332.774658203125, |
|
"logps/rejected": -239.0347137451172, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0610469579696655, |
|
"rewards/margins": 3.5978243350982666, |
|
"rewards/rejected": -2.5367772579193115, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.0990825688073396, |
|
"grad_norm": 0.5275505781173706, |
|
"learning_rate": 1.2438273063093811e-06, |
|
"logits/chosen": -0.6288259029388428, |
|
"logits/rejected": -1.8450119495391846, |
|
"logps/chosen": -274.0487060546875, |
|
"logps/rejected": -184.83047485351562, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.0822153091430664, |
|
"rewards/margins": 3.4356398582458496, |
|
"rewards/rejected": -2.353424549102783, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.1137614678899084, |
|
"grad_norm": 0.7754862308502197, |
|
"learning_rate": 1.2070076771319536e-06, |
|
"logits/chosen": -0.8596429228782654, |
|
"logits/rejected": -1.8019381761550903, |
|
"logps/chosen": -350.53326416015625, |
|
"logps/rejected": -216.1761932373047, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1380345821380615, |
|
"rewards/margins": 3.3932504653930664, |
|
"rewards/rejected": -2.255215883255005, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.128440366972477, |
|
"grad_norm": 0.615742027759552, |
|
"learning_rate": 1.1705670250356417e-06, |
|
"logits/chosen": -0.6248029470443726, |
|
"logits/rejected": -1.9336309432983398, |
|
"logps/chosen": -307.3043518066406, |
|
"logps/rejected": -212.56773376464844, |
|
"loss": 0.064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3314754962921143, |
|
"rewards/margins": 3.7979063987731934, |
|
"rewards/rejected": -2.466430902481079, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.143119266055046, |
|
"grad_norm": 0.3835650086402893, |
|
"learning_rate": 1.1345160308046413e-06, |
|
"logits/chosen": -0.7160873413085938, |
|
"logits/rejected": -2.185732364654541, |
|
"logps/chosen": -379.47021484375, |
|
"logps/rejected": -225.2349395751953, |
|
"loss": 0.0507, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2591447830200195, |
|
"rewards/margins": 4.147268295288086, |
|
"rewards/rejected": -2.8881237506866455, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.157798165137615, |
|
"grad_norm": 0.47745481133461, |
|
"learning_rate": 1.0988652610141154e-06, |
|
"logits/chosen": -0.6995221972465515, |
|
"logits/rejected": -1.8448468446731567, |
|
"logps/chosen": -272.8988037109375, |
|
"logps/rejected": -229.68360900878906, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1089469194412231, |
|
"rewards/margins": 3.6219894886016846, |
|
"rewards/rejected": -2.513042449951172, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.1724770642201836, |
|
"grad_norm": 0.34081774950027466, |
|
"learning_rate": 1.063625164933124e-06, |
|
"logits/chosen": -0.6344919204711914, |
|
"logits/rejected": -1.979994297027588, |
|
"logps/chosen": -326.6709289550781, |
|
"logps/rejected": -229.17945861816406, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3035708665847778, |
|
"rewards/margins": 4.305816650390625, |
|
"rewards/rejected": -3.0022459030151367, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.1871559633027524, |
|
"grad_norm": 0.5146552324295044, |
|
"learning_rate": 1.0288060714619359e-06, |
|
"logits/chosen": -0.8762121796607971, |
|
"logits/rejected": -2.13432240486145, |
|
"logps/chosen": -312.33612060546875, |
|
"logps/rejected": -185.08656311035156, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3586633205413818, |
|
"rewards/margins": 3.8767006397247314, |
|
"rewards/rejected": -2.5180373191833496, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 0.4197719991207123, |
|
"learning_rate": 9.944181861046188e-07, |
|
"logits/chosen": -0.6559761762619019, |
|
"logits/rejected": -1.8893078565597534, |
|
"logps/chosen": -330.83319091796875, |
|
"logps/rejected": -221.11766052246094, |
|
"loss": 0.065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1609145402908325, |
|
"rewards/margins": 4.062168598175049, |
|
"rewards/rejected": -2.9012537002563477, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.21651376146789, |
|
"grad_norm": 0.3998691439628601, |
|
"learning_rate": 9.604715879777986e-07, |
|
"logits/chosen": -0.6999265551567078, |
|
"logits/rejected": -2.0933423042297363, |
|
"logps/chosen": -275.30718994140625, |
|
"logps/rejected": -171.385986328125, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.279176950454712, |
|
"rewards/margins": 3.8434293270111084, |
|
"rewards/rejected": -2.5642526149749756, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.231192660550459, |
|
"grad_norm": 0.4111442565917969, |
|
"learning_rate": 9.269762268564616e-07, |
|
"logits/chosen": -0.8195481300354004, |
|
"logits/rejected": -2.072096824645996, |
|
"logps/chosen": -251.16116333007812, |
|
"logps/rejected": -178.15469360351562, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2159481048583984, |
|
"rewards/margins": 3.747391700744629, |
|
"rewards/rejected": -2.5314438343048096, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.2458715596330276, |
|
"grad_norm": 0.6149260401725769, |
|
"learning_rate": 8.939419202576694e-07, |
|
"logits/chosen": -0.546268105506897, |
|
"logits/rejected": -1.6728581190109253, |
|
"logps/chosen": -255.0876922607422, |
|
"logps/rejected": -198.06613159179688, |
|
"loss": 0.097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1009477376937866, |
|
"rewards/margins": 3.073812961578369, |
|
"rewards/rejected": -1.972865104675293, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.2605504587155965, |
|
"grad_norm": 0.584430456161499, |
|
"learning_rate": 8.61378350563033e-07, |
|
"logits/chosen": -0.6843928098678589, |
|
"logits/rejected": -1.8941227197647095, |
|
"logps/chosen": -246.6961669921875, |
|
"logps/rejected": -210.56329345703125, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1296510696411133, |
|
"rewards/margins": 3.5654549598693848, |
|
"rewards/rejected": -2.4358038902282715, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.2752293577981653, |
|
"grad_norm": 0.4649229049682617, |
|
"learning_rate": 8.292950621808022e-07, |
|
"logits/chosen": -0.7501717209815979, |
|
"logits/rejected": -1.959920048713684, |
|
"logps/chosen": -280.8645324707031, |
|
"logps/rejected": -210.0735626220703, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2929048538208008, |
|
"rewards/margins": 3.938021421432495, |
|
"rewards/rejected": -2.6451170444488525, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.289908256880734, |
|
"grad_norm": 0.6232532858848572, |
|
"learning_rate": 7.977014587483925e-07, |
|
"logits/chosen": -0.7559989094734192, |
|
"logits/rejected": -1.9293510913848877, |
|
"logps/chosen": -270.5220947265625, |
|
"logps/rejected": -245.95974731445312, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.148956060409546, |
|
"rewards/margins": 3.7466979026794434, |
|
"rewards/rejected": -2.5977416038513184, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.304587155963303, |
|
"grad_norm": 0.4444005787372589, |
|
"learning_rate": 7.666068003761684e-07, |
|
"logits/chosen": -0.6887121796607971, |
|
"logits/rejected": -1.9678281545639038, |
|
"logps/chosen": -293.02899169921875, |
|
"logps/rejected": -188.30133056640625, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1815087795257568, |
|
"rewards/margins": 4.006950855255127, |
|
"rewards/rejected": -2.825442314147949, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.3192660550458717, |
|
"grad_norm": 0.4742709696292877, |
|
"learning_rate": 7.360202009332993e-07, |
|
"logits/chosen": -0.8080792427062988, |
|
"logits/rejected": -2.064013719558716, |
|
"logps/chosen": -293.2082824707031, |
|
"logps/rejected": -204.35791015625, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.128941297531128, |
|
"rewards/margins": 3.8874096870422363, |
|
"rewards/rejected": -2.7584681510925293, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.3339449541284405, |
|
"grad_norm": 0.3781169652938843, |
|
"learning_rate": 7.059506253764773e-07, |
|
"logits/chosen": -0.7421601414680481, |
|
"logits/rejected": -1.9860243797302246, |
|
"logps/chosen": -310.87603759765625, |
|
"logps/rejected": -214.26307678222656, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1338543891906738, |
|
"rewards/margins": 3.990175247192383, |
|
"rewards/rejected": -2.856320381164551, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.3486238532110093, |
|
"grad_norm": 0.585981547832489, |
|
"learning_rate": 6.764068871222825e-07, |
|
"logits/chosen": -0.49822843074798584, |
|
"logits/rejected": -1.8513426780700684, |
|
"logps/chosen": -284.0379943847656, |
|
"logps/rejected": -204.76626586914062, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1651420593261719, |
|
"rewards/margins": 3.555227518081665, |
|
"rewards/rejected": -2.3900856971740723, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.363302752293578, |
|
"grad_norm": 0.4085443317890167, |
|
"learning_rate": 6.473976454639608e-07, |
|
"logits/chosen": -0.6734147667884827, |
|
"logits/rejected": -2.0145885944366455, |
|
"logps/chosen": -289.7484130859375, |
|
"logps/rejected": -187.61102294921875, |
|
"loss": 0.052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3118236064910889, |
|
"rewards/margins": 4.029073715209961, |
|
"rewards/rejected": -2.717249870300293, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.377981651376147, |
|
"grad_norm": 0.5898061394691467, |
|
"learning_rate": 6.189314030333796e-07, |
|
"logits/chosen": -0.5899083614349365, |
|
"logits/rejected": -1.810298204421997, |
|
"logps/chosen": -276.9262390136719, |
|
"logps/rejected": -240.26417541503906, |
|
"loss": 0.063, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1324195861816406, |
|
"rewards/margins": 4.060219764709473, |
|
"rewards/rejected": -2.927799701690674, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.3926605504587157, |
|
"grad_norm": 0.41931232810020447, |
|
"learning_rate": 5.910165033089e-07, |
|
"logits/chosen": -0.6229602098464966, |
|
"logits/rejected": -1.9887808561325073, |
|
"logps/chosen": -312.65863037109375, |
|
"logps/rejected": -219.41209411621094, |
|
"loss": 0.046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2578260898590088, |
|
"rewards/margins": 3.8544065952301025, |
|
"rewards/rejected": -2.5965805053710938, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.4073394495412845, |
|
"grad_norm": 0.4480370581150055, |
|
"learning_rate": 5.636611281698956e-07, |
|
"logits/chosen": -0.6608809232711792, |
|
"logits/rejected": -1.8771038055419922, |
|
"logps/chosen": -259.6578369140625, |
|
"logps/rejected": -202.66000366210938, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0445836782455444, |
|
"rewards/margins": 3.572209119796753, |
|
"rewards/rejected": -2.52762508392334, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.4220183486238533, |
|
"grad_norm": 0.5525819659233093, |
|
"learning_rate": 5.368732954986389e-07, |
|
"logits/chosen": -0.7845314741134644, |
|
"logits/rejected": -1.9783120155334473, |
|
"logps/chosen": -276.59765625, |
|
"logps/rejected": -214.77944946289062, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0571794509887695, |
|
"rewards/margins": 3.684526205062866, |
|
"rewards/rejected": -2.6273467540740967, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.436697247706422, |
|
"grad_norm": 0.49101755023002625, |
|
"learning_rate": 5.106608568302504e-07, |
|
"logits/chosen": -0.8155212998390198, |
|
"logits/rejected": -1.9448341131210327, |
|
"logps/chosen": -255.01223754882812, |
|
"logps/rejected": -215.4046173095703, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.0779012441635132, |
|
"rewards/margins": 3.9367024898529053, |
|
"rewards/rejected": -2.8588013648986816, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.451376146788991, |
|
"grad_norm": 0.5052284598350525, |
|
"learning_rate": 4.850314950514124e-07, |
|
"logits/chosen": -0.587533712387085, |
|
"logits/rejected": -1.8359942436218262, |
|
"logps/chosen": -277.79736328125, |
|
"logps/rejected": -211.66757202148438, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1925641298294067, |
|
"rewards/margins": 3.949004650115967, |
|
"rewards/rejected": -2.7564406394958496, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.4660550458715598, |
|
"grad_norm": 0.7067945599555969, |
|
"learning_rate": 4.599927221485034e-07, |
|
"logits/chosen": -0.6442288160324097, |
|
"logits/rejected": -2.023991107940674, |
|
"logps/chosen": -274.1790771484375, |
|
"logps/rejected": -190.11256408691406, |
|
"loss": 0.06, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1234374046325684, |
|
"rewards/margins": 3.860576629638672, |
|
"rewards/rejected": -2.7371394634246826, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.4807339449541286, |
|
"grad_norm": 0.4873596429824829, |
|
"learning_rate": 4.3555187700583175e-07, |
|
"logits/chosen": -0.6312516331672668, |
|
"logits/rejected": -1.9823637008666992, |
|
"logps/chosen": -261.86920166015625, |
|
"logps/rejected": -208.9684295654297, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1352046728134155, |
|
"rewards/margins": 4.077635765075684, |
|
"rewards/rejected": -2.9424314498901367, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.4954128440366974, |
|
"grad_norm": 0.39889803528785706, |
|
"learning_rate": 4.1171612325460244e-07, |
|
"logits/chosen": -0.6575929522514343, |
|
"logits/rejected": -1.8394465446472168, |
|
"logps/chosen": -276.56951904296875, |
|
"logps/rejected": -203.4097137451172, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9956181049346924, |
|
"rewards/margins": 3.6727139949798584, |
|
"rewards/rejected": -2.677096128463745, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.510091743119266, |
|
"grad_norm": 0.31606411933898926, |
|
"learning_rate": 3.8849244717325206e-07, |
|
"logits/chosen": -0.6538981795310974, |
|
"logits/rejected": -1.8527557849884033, |
|
"logps/chosen": -265.1747741699219, |
|
"logps/rejected": -223.43577575683594, |
|
"loss": 0.056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2189264297485352, |
|
"rewards/margins": 4.1966047286987305, |
|
"rewards/rejected": -2.977677822113037, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.524770642201835, |
|
"grad_norm": 0.5571908354759216, |
|
"learning_rate": 3.658876556397628e-07, |
|
"logits/chosen": -0.8455044031143188, |
|
"logits/rejected": -2.0153141021728516, |
|
"logps/chosen": -251.9333038330078, |
|
"logps/rejected": -189.60812377929688, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1785720586776733, |
|
"rewards/margins": 3.818125009536743, |
|
"rewards/rejected": -2.6395530700683594, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.539449541284404, |
|
"grad_norm": 0.6095588803291321, |
|
"learning_rate": 3.4390837413656256e-07, |
|
"logits/chosen": -0.7302352786064148, |
|
"logits/rejected": -2.0162761211395264, |
|
"logps/chosen": -274.54345703125, |
|
"logps/rejected": -224.21470642089844, |
|
"loss": 0.06, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2244491577148438, |
|
"rewards/margins": 4.071366310119629, |
|
"rewards/rejected": -2.846916913986206, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.5541284403669726, |
|
"grad_norm": 0.5406623482704163, |
|
"learning_rate": 3.225610448085903e-07, |
|
"logits/chosen": -0.6975880265235901, |
|
"logits/rejected": -1.935050129890442, |
|
"logps/chosen": -267.08587646484375, |
|
"logps/rejected": -203.06932067871094, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1748192310333252, |
|
"rewards/margins": 3.9555208683013916, |
|
"rewards/rejected": -2.7807018756866455, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 0.43646830320358276, |
|
"learning_rate": 3.018519245750989e-07, |
|
"logits/chosen": -0.7113920450210571, |
|
"logits/rejected": -1.8569591045379639, |
|
"logps/chosen": -318.1829833984375, |
|
"logps/rejected": -243.28494262695312, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1056281328201294, |
|
"rewards/margins": 3.9762771129608154, |
|
"rewards/rejected": -2.8706490993499756, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.5834862385321102, |
|
"grad_norm": 0.5032224655151367, |
|
"learning_rate": 2.817870832957459e-07, |
|
"logits/chosen": -0.6119564175605774, |
|
"logits/rejected": -1.8957453966140747, |
|
"logps/chosen": -255.68161010742188, |
|
"logps/rejected": -199.38027954101562, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2046058177947998, |
|
"rewards/margins": 3.997673511505127, |
|
"rewards/rejected": -2.793067693710327, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.598165137614679, |
|
"grad_norm": 0.6729313731193542, |
|
"learning_rate": 2.6237240199151386e-07, |
|
"logits/chosen": -0.7221190929412842, |
|
"logits/rejected": -1.9667840003967285, |
|
"logps/chosen": -261.57452392578125, |
|
"logps/rejected": -189.23782348632812, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1303036212921143, |
|
"rewards/margins": 3.524320125579834, |
|
"rewards/rejected": -2.3940162658691406, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.612844036697248, |
|
"grad_norm": 0.6832696795463562, |
|
"learning_rate": 2.436135711209786e-07, |
|
"logits/chosen": -0.945254385471344, |
|
"logits/rejected": -2.049318552017212, |
|
"logps/chosen": -276.5006103515625, |
|
"logps/rejected": -182.98167419433594, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0834262371063232, |
|
"rewards/margins": 3.6695027351379395, |
|
"rewards/rejected": -2.586076259613037, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.6275229357798167, |
|
"grad_norm": 0.5833069086074829, |
|
"learning_rate": 2.2551608891243026e-07, |
|
"logits/chosen": -0.9311258792877197, |
|
"logits/rejected": -2.0406694412231445, |
|
"logps/chosen": -350.43255615234375, |
|
"logps/rejected": -230.80328369140625, |
|
"loss": 0.0619, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0079129934310913, |
|
"rewards/margins": 3.520665168762207, |
|
"rewards/rejected": -2.512752056121826, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.6422018348623855, |
|
"grad_norm": 0.3629484474658966, |
|
"learning_rate": 2.0808525975233807e-07, |
|
"logits/chosen": -0.5598534345626831, |
|
"logits/rejected": -1.9144881963729858, |
|
"logps/chosen": -280.1201477050781, |
|
"logps/rejected": -219.12208557128906, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.011694312095642, |
|
"rewards/margins": 3.6944315433502197, |
|
"rewards/rejected": -2.682737350463867, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.6568807339449543, |
|
"grad_norm": 0.6347119808197021, |
|
"learning_rate": 1.9132619263063144e-07, |
|
"logits/chosen": -0.6190422773361206, |
|
"logits/rejected": -1.9601435661315918, |
|
"logps/chosen": -341.5980224609375, |
|
"logps/rejected": -231.95660400390625, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3936290740966797, |
|
"rewards/margins": 4.258563041687012, |
|
"rewards/rejected": -2.864933967590332, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.671559633027523, |
|
"grad_norm": 0.309498131275177, |
|
"learning_rate": 1.7524379964325155e-07, |
|
"logits/chosen": -0.6985868215560913, |
|
"logits/rejected": -1.9882198572158813, |
|
"logps/chosen": -324.7488098144531, |
|
"logps/rejected": -223.37721252441406, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1102869510650635, |
|
"rewards/margins": 3.9326446056365967, |
|
"rewards/rejected": -2.822357654571533, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.686238532110092, |
|
"grad_norm": 0.46792224049568176, |
|
"learning_rate": 1.5984279455240975e-07, |
|
"logits/chosen": -0.7208799719810486, |
|
"logits/rejected": -1.8783855438232422, |
|
"logps/chosen": -278.7039794921875, |
|
"logps/rejected": -210.89584350585938, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2110655307769775, |
|
"rewards/margins": 3.9659790992736816, |
|
"rewards/rejected": -2.754913806915283, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.7009174311926607, |
|
"grad_norm": 0.395177960395813, |
|
"learning_rate": 1.451276914049818e-07, |
|
"logits/chosen": -0.723671019077301, |
|
"logits/rejected": -1.9200284481048584, |
|
"logps/chosen": -253.25576782226562, |
|
"logps/rejected": -197.5750732421875, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0932037830352783, |
|
"rewards/margins": 3.944528341293335, |
|
"rewards/rejected": -2.8513240814208984, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.7155963302752295, |
|
"grad_norm": 0.38088735938072205, |
|
"learning_rate": 1.3110280320943692e-07, |
|
"logits/chosen": -0.6326093077659607, |
|
"logits/rejected": -2.0090925693511963, |
|
"logps/chosen": -267.09918212890625, |
|
"logps/rejected": -190.76773071289062, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2381752729415894, |
|
"rewards/margins": 4.017492294311523, |
|
"rewards/rejected": -2.7793164253234863, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.7302752293577983, |
|
"grad_norm": 0.4006403386592865, |
|
"learning_rate": 1.1777224067169218e-07, |
|
"logits/chosen": -0.5613418221473694, |
|
"logits/rejected": -1.8734657764434814, |
|
"logps/chosen": -274.9844055175781, |
|
"logps/rejected": -211.76947021484375, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.273461937904358, |
|
"rewards/margins": 4.165151596069336, |
|
"rewards/rejected": -2.8916897773742676, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.744954128440367, |
|
"grad_norm": 0.4330195188522339, |
|
"learning_rate": 1.0513991099025872e-07, |
|
"logits/chosen": -0.7487627863883972, |
|
"logits/rejected": -2.035677433013916, |
|
"logps/chosen": -320.5176086425781, |
|
"logps/rejected": -211.03750610351562, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1087989807128906, |
|
"rewards/margins": 3.594910144805908, |
|
"rewards/rejected": -2.4861111640930176, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.759633027522936, |
|
"grad_norm": 0.5134207606315613, |
|
"learning_rate": 9.320951671104194e-08, |
|
"logits/chosen": -0.6394185423851013, |
|
"logits/rejected": -2.0223844051361084, |
|
"logps/chosen": -308.8260803222656, |
|
"logps/rejected": -208.8563232421875, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4963104724884033, |
|
"rewards/margins": 4.028608322143555, |
|
"rewards/rejected": -2.5322980880737305, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.7743119266055047, |
|
"grad_norm": 0.4374036490917206, |
|
"learning_rate": 8.198455464212108e-08, |
|
"logits/chosen": -0.7048813104629517, |
|
"logits/rejected": -1.9578654766082764, |
|
"logps/chosen": -289.2408142089844, |
|
"logps/rejected": -196.0473175048828, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.321907877922058, |
|
"rewards/margins": 4.240860939025879, |
|
"rewards/rejected": -2.9189531803131104, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.7889908256880735, |
|
"grad_norm": 0.4215639531612396, |
|
"learning_rate": 7.146831482883115e-08, |
|
"logits/chosen": -0.46766549348831177, |
|
"logits/rejected": -1.9882452487945557, |
|
"logps/chosen": -293.3287658691406, |
|
"logps/rejected": -191.9895782470703, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2766855955123901, |
|
"rewards/margins": 4.219532489776611, |
|
"rewards/rejected": -2.9428470134735107, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.8036697247706424, |
|
"grad_norm": 0.6274383664131165, |
|
"learning_rate": 6.16638795894492e-08, |
|
"logits/chosen": -0.6346589922904968, |
|
"logits/rejected": -1.823696255683899, |
|
"logps/chosen": -258.1131286621094, |
|
"logps/rejected": -219.12423706054688, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2239983081817627, |
|
"rewards/margins": 3.9356327056884766, |
|
"rewards/rejected": -2.711634635925293, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.818348623853211, |
|
"grad_norm": 0.5807551145553589, |
|
"learning_rate": 5.257412261176375e-08, |
|
"logits/chosen": -0.8102257251739502, |
|
"logits/rejected": -1.9288864135742188, |
|
"logps/chosen": -269.1815185546875, |
|
"logps/rejected": -209.54632568359375, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.275071144104004, |
|
"rewards/margins": 3.9383811950683594, |
|
"rewards/rejected": -2.6633105278015137, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.83302752293578, |
|
"grad_norm": 0.6179290413856506, |
|
"learning_rate": 4.4201708110795384e-08, |
|
"logits/chosen": -0.6924893856048584, |
|
"logits/rejected": -1.8573181629180908, |
|
"logps/chosen": -289.4762878417969, |
|
"logps/rejected": -222.00888061523438, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1291638612747192, |
|
"rewards/margins": 3.648355722427368, |
|
"rewards/rejected": -2.5191917419433594, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.847706422018349, |
|
"grad_norm": 0.40115347504615784, |
|
"learning_rate": 3.654909004791152e-08, |
|
"logits/chosen": -0.684185802936554, |
|
"logits/rejected": -2.0209643840789795, |
|
"logps/chosen": -290.3592529296875, |
|
"logps/rejected": -203.97271728515625, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0887296199798584, |
|
"rewards/margins": 3.935429334640503, |
|
"rewards/rejected": -2.8466997146606445, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.8623853211009176, |
|
"grad_norm": 0.48927587270736694, |
|
"learning_rate": 2.9618511411570462e-08, |
|
"logits/chosen": -0.7535040378570557, |
|
"logits/rejected": -1.9976847171783447, |
|
"logps/chosen": -281.9385986328125, |
|
"logps/rejected": -191.1783905029297, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9771148562431335, |
|
"rewards/margins": 3.7266292572021484, |
|
"rewards/rejected": -2.749514102935791, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.8770642201834864, |
|
"grad_norm": 0.6194383502006531, |
|
"learning_rate": 2.3412003559898088e-08, |
|
"logits/chosen": -0.6612120270729065, |
|
"logits/rejected": -1.7749061584472656, |
|
"logps/chosen": -267.9380798339844, |
|
"logps/rejected": -225.19703674316406, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1520177125930786, |
|
"rewards/margins": 3.639789581298828, |
|
"rewards/rejected": -2.487771987915039, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.891743119266055, |
|
"grad_norm": 0.5731943845748901, |
|
"learning_rate": 1.793138562529634e-08, |
|
"logits/chosen": -0.7014014720916748, |
|
"logits/rejected": -2.0430359840393066, |
|
"logps/chosen": -342.8412780761719, |
|
"logps/rejected": -201.6863250732422, |
|
"loss": 0.054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3100138902664185, |
|
"rewards/margins": 3.81455397605896, |
|
"rewards/rejected": -2.504539966583252, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.906422018348624, |
|
"grad_norm": 0.43205344676971436, |
|
"learning_rate": 1.317826398125277e-08, |
|
"logits/chosen": -0.7878881692886353, |
|
"logits/rejected": -1.9929152727127075, |
|
"logps/chosen": -289.1486511230469, |
|
"logps/rejected": -225.01998901367188, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2740627527236938, |
|
"rewards/margins": 4.280096530914307, |
|
"rewards/rejected": -3.0060338973999023, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.921100917431193, |
|
"grad_norm": 0.4168623089790344, |
|
"learning_rate": 9.15403177151275e-09, |
|
"logits/chosen": -0.7053463459014893, |
|
"logits/rejected": -1.812314748764038, |
|
"logps/chosen": -272.26068115234375, |
|
"logps/rejected": -237.58221435546875, |
|
"loss": 0.045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2796133756637573, |
|
"rewards/margins": 4.088270664215088, |
|
"rewards/rejected": -2.80865740776062, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 0.3825678527355194, |
|
"learning_rate": 5.85986850174608e-09, |
|
"logits/chosen": -0.6317567825317383, |
|
"logits/rejected": -2.1290388107299805, |
|
"logps/chosen": -308.5335693359375, |
|
"logps/rejected": -204.36622619628906, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.297298789024353, |
|
"rewards/margins": 4.06523323059082, |
|
"rewards/rejected": -2.7679343223571777, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.9504587155963304, |
|
"grad_norm": 0.42925432324409485, |
|
"learning_rate": 3.296739693834927e-09, |
|
"logits/chosen": -0.8801270723342896, |
|
"logits/rejected": -1.8690441846847534, |
|
"logps/chosen": -302.94482421875, |
|
"logps/rejected": -201.8232879638672, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9750170707702637, |
|
"rewards/margins": 3.5083870887756348, |
|
"rewards/rejected": -2.533370018005371, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.9651376146788992, |
|
"grad_norm": 0.5444877743721008, |
|
"learning_rate": 1.4653966028774225e-09, |
|
"logits/chosen": -0.6766524314880371, |
|
"logits/rejected": -1.8512043952941895, |
|
"logps/chosen": -310.28594970703125, |
|
"logps/rejected": -234.16964721679688, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1554120779037476, |
|
"rewards/margins": 4.151627540588379, |
|
"rewards/rejected": -2.996215581893921, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.979816513761468, |
|
"grad_norm": 0.38042151927948, |
|
"learning_rate": 3.6637599699351766e-10, |
|
"logits/chosen": -0.6798438429832458, |
|
"logits/rejected": -2.07844877243042, |
|
"logps/chosen": -286.02349853515625, |
|
"logps/rejected": -199.32891845703125, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2158042192459106, |
|
"rewards/margins": 3.8894968032836914, |
|
"rewards/rejected": -2.6736927032470703, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"grad_norm": 0.43901610374450684, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.7945417761802673, |
|
"logits/rejected": -1.8590571880340576, |
|
"logps/chosen": -316.1878967285156, |
|
"logps/rejected": -232.12205505371094, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.2349828481674194, |
|
"rewards/margins": 3.9613094329833984, |
|
"rewards/rejected": -2.7263264656066895, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"step": 408, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_loss": 0.220101895632551, |
|
"train_runtime": 8071.0106, |
|
"train_samples_per_second": 1.619, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 408, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|