|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994495412844037, |
|
"eval_steps": 500, |
|
"global_step": 408, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014678899082568808, |
|
"grad_norm": 2.871569871902466, |
|
"learning_rate": 2.439024390243903e-07, |
|
"logits/chosen": -1.156640887260437, |
|
"logits/rejected": -2.0261764526367188, |
|
"logps/chosen": -291.95379638671875, |
|
"logps/rejected": -199.91015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.029357798165137616, |
|
"grad_norm": 2.7688803672790527, |
|
"learning_rate": 4.878048780487805e-07, |
|
"logits/chosen": -1.1512565612792969, |
|
"logits/rejected": -1.9958158731460571, |
|
"logps/chosen": -313.67742919921875, |
|
"logps/rejected": -219.4925537109375, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0017839791253209114, |
|
"rewards/margins": -0.0021596220321953297, |
|
"rewards/rejected": 0.003943601623177528, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.044036697247706424, |
|
"grad_norm": 2.8300042152404785, |
|
"learning_rate": 7.317073170731707e-07, |
|
"logits/chosen": -1.217061996459961, |
|
"logits/rejected": -2.1603338718414307, |
|
"logps/chosen": -318.8204650878906, |
|
"logps/rejected": -219.18704223632812, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.003621376119554043, |
|
"rewards/margins": 0.007228089962154627, |
|
"rewards/rejected": -0.003606713144108653, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05871559633027523, |
|
"grad_norm": 2.636244058609009, |
|
"learning_rate": 9.75609756097561e-07, |
|
"logits/chosen": -1.359943151473999, |
|
"logits/rejected": -2.125555992126465, |
|
"logps/chosen": -271.85272216796875, |
|
"logps/rejected": -177.42059326171875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.010891949757933617, |
|
"rewards/margins": 0.005428856238722801, |
|
"rewards/rejected": 0.005463093984872103, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07339449541284404, |
|
"grad_norm": 3.117539882659912, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"logits/chosen": -1.1746495962142944, |
|
"logits/rejected": -2.142481565475464, |
|
"logps/chosen": -329.56201171875, |
|
"logps/rejected": -171.868896484375, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.019563177600502968, |
|
"rewards/margins": 0.020585114136338234, |
|
"rewards/rejected": -0.0010219333926215768, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08807339449541285, |
|
"grad_norm": 3.573014497756958, |
|
"learning_rate": 1.4634146341463414e-06, |
|
"logits/chosen": -1.1120442152023315, |
|
"logits/rejected": -1.9781230688095093, |
|
"logps/chosen": -373.2279052734375, |
|
"logps/rejected": -240.803955078125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.010507804341614246, |
|
"rewards/margins": 0.00216490775346756, |
|
"rewards/rejected": 0.008342898450791836, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10275229357798166, |
|
"grad_norm": 3.1432557106018066, |
|
"learning_rate": 1.707317073170732e-06, |
|
"logits/chosen": -1.1176837682724, |
|
"logits/rejected": -1.9580059051513672, |
|
"logps/chosen": -281.2641296386719, |
|
"logps/rejected": -181.50938415527344, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.01434221863746643, |
|
"rewards/margins": 0.010814160108566284, |
|
"rewards/rejected": 0.0035280571319162846, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11743119266055047, |
|
"grad_norm": 3.08245587348938, |
|
"learning_rate": 1.951219512195122e-06, |
|
"logits/chosen": -1.2329456806182861, |
|
"logits/rejected": -2.0007548332214355, |
|
"logps/chosen": -292.1178894042969, |
|
"logps/rejected": -199.83258056640625, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004180246964097023, |
|
"rewards/margins": 0.008593017235398293, |
|
"rewards/rejected": -0.004412769805639982, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13211009174311927, |
|
"grad_norm": 3.315281391143799, |
|
"learning_rate": 2.1951219512195125e-06, |
|
"logits/chosen": -1.1571717262268066, |
|
"logits/rejected": -2.041630268096924, |
|
"logps/chosen": -337.57818603515625, |
|
"logps/rejected": -212.22586059570312, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.012045616284012794, |
|
"rewards/margins": 0.011737149208784103, |
|
"rewards/rejected": 0.0003084660565946251, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14678899082568808, |
|
"grad_norm": 3.288015127182007, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"logits/chosen": -1.170533299446106, |
|
"logits/rejected": -2.111523389816284, |
|
"logps/chosen": -332.5646057128906, |
|
"logps/rejected": -171.13861083984375, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.018359623849391937, |
|
"rewards/margins": 0.014990389347076416, |
|
"rewards/rejected": 0.003369236597791314, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1614678899082569, |
|
"grad_norm": 3.0890462398529053, |
|
"learning_rate": 2.682926829268293e-06, |
|
"logits/chosen": -1.326155662536621, |
|
"logits/rejected": -2.235764265060425, |
|
"logps/chosen": -321.82012939453125, |
|
"logps/rejected": -199.34010314941406, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009998606517910957, |
|
"rewards/margins": 0.014448178000748158, |
|
"rewards/rejected": -0.0044495705515146255, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1761467889908257, |
|
"grad_norm": 3.174973249435425, |
|
"learning_rate": 2.926829268292683e-06, |
|
"logits/chosen": -1.1311931610107422, |
|
"logits/rejected": -2.1738736629486084, |
|
"logps/chosen": -394.0300598144531, |
|
"logps/rejected": -168.5726776123047, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.001855961512774229, |
|
"rewards/margins": -0.00018751714378595352, |
|
"rewards/rejected": -0.0016684436704963446, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1908256880733945, |
|
"grad_norm": 2.7846882343292236, |
|
"learning_rate": 3.1707317073170736e-06, |
|
"logits/chosen": -1.315462589263916, |
|
"logits/rejected": -2.179847478866577, |
|
"logps/chosen": -349.72467041015625, |
|
"logps/rejected": -194.91355895996094, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.020769033581018448, |
|
"rewards/margins": 0.020399674773216248, |
|
"rewards/rejected": 0.00036935764364898205, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.20550458715596331, |
|
"grad_norm": 2.960986852645874, |
|
"learning_rate": 3.414634146341464e-06, |
|
"logits/chosen": -1.218693733215332, |
|
"logits/rejected": -2.219115734100342, |
|
"logps/chosen": -303.5213928222656, |
|
"logps/rejected": -176.81622314453125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.001343409065157175, |
|
"rewards/margins": 0.0020109512843191624, |
|
"rewards/rejected": -0.0006675421027466655, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 2.6187989711761475, |
|
"learning_rate": 3.6585365853658537e-06, |
|
"logits/chosen": -1.2147996425628662, |
|
"logits/rejected": -2.09503173828125, |
|
"logps/chosen": -311.60198974609375, |
|
"logps/rejected": -211.1887664794922, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.027364609763026237, |
|
"rewards/margins": 0.022295203059911728, |
|
"rewards/rejected": 0.0050694081000983715, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23486238532110093, |
|
"grad_norm": 3.18058180809021, |
|
"learning_rate": 3.902439024390244e-06, |
|
"logits/chosen": -1.269258975982666, |
|
"logits/rejected": -2.129913806915283, |
|
"logps/chosen": -310.4969787597656, |
|
"logps/rejected": -175.62393188476562, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.02811383828520775, |
|
"rewards/margins": 0.031415536999702454, |
|
"rewards/rejected": -0.0033016952220350504, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.24954128440366974, |
|
"grad_norm": 3.44490647315979, |
|
"learning_rate": 4.146341463414634e-06, |
|
"logits/chosen": -1.2504366636276245, |
|
"logits/rejected": -2.2198028564453125, |
|
"logps/chosen": -346.65069580078125, |
|
"logps/rejected": -176.64193725585938, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.027392717078328133, |
|
"rewards/margins": 0.02168484590947628, |
|
"rewards/rejected": 0.0057078697718679905, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.26422018348623855, |
|
"grad_norm": 2.8181567192077637, |
|
"learning_rate": 4.390243902439025e-06, |
|
"logits/chosen": -1.2708137035369873, |
|
"logits/rejected": -2.0570731163024902, |
|
"logps/chosen": -332.41156005859375, |
|
"logps/rejected": -219.01556396484375, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04396076127886772, |
|
"rewards/margins": 0.0344666913151741, |
|
"rewards/rejected": 0.009494070895016193, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.27889908256880735, |
|
"grad_norm": 3.29911208152771, |
|
"learning_rate": 4.634146341463416e-06, |
|
"logits/chosen": -1.2899575233459473, |
|
"logits/rejected": -2.1684398651123047, |
|
"logps/chosen": -316.49993896484375, |
|
"logps/rejected": -214.9636688232422, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.022868501022458076, |
|
"rewards/margins": 0.014677047729492188, |
|
"rewards/rejected": 0.008191454224288464, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.29357798165137616, |
|
"grad_norm": 2.80910325050354, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"logits/chosen": -1.1400400400161743, |
|
"logits/rejected": -2.0709128379821777, |
|
"logps/chosen": -368.51824951171875, |
|
"logps/rejected": -194.36216735839844, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.04931124299764633, |
|
"rewards/margins": 0.04868461191654205, |
|
"rewards/rejected": 0.0006266293348744512, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30825688073394497, |
|
"grad_norm": 3.187028169631958, |
|
"learning_rate": 4.999908404322799e-06, |
|
"logits/chosen": -1.142716646194458, |
|
"logits/rejected": -2.20780348777771, |
|
"logps/chosen": -343.4991760253906, |
|
"logps/rejected": -184.4697265625, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 0.06981995701789856, |
|
"rewards/margins": 0.06430794298648834, |
|
"rewards/rejected": 0.005512019619345665, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3229357798165138, |
|
"grad_norm": 2.664074659347534, |
|
"learning_rate": 4.999175679175577e-06, |
|
"logits/chosen": -1.209214448928833, |
|
"logits/rejected": -2.1323928833007812, |
|
"logps/chosen": -270.0044860839844, |
|
"logps/rejected": -171.32073974609375, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.07609987258911133, |
|
"rewards/margins": 0.07787147164344788, |
|
"rewards/rejected": -0.0017715932335704565, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3376146788990826, |
|
"grad_norm": 2.661236047744751, |
|
"learning_rate": 4.997710443643461e-06, |
|
"logits/chosen": -1.235365629196167, |
|
"logits/rejected": -2.0518736839294434, |
|
"logps/chosen": -279.3170166015625, |
|
"logps/rejected": -219.13522338867188, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.0708962082862854, |
|
"rewards/margins": 0.05834145471453667, |
|
"rewards/rejected": 0.012554753571748734, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3522935779816514, |
|
"grad_norm": 3.9819839000701904, |
|
"learning_rate": 4.995513127188151e-06, |
|
"logits/chosen": -1.1877082586288452, |
|
"logits/rejected": -2.2009482383728027, |
|
"logps/chosen": -392.36041259765625, |
|
"logps/rejected": -197.9148406982422, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.10185055434703827, |
|
"rewards/margins": 0.06925681233406067, |
|
"rewards/rejected": 0.0325937457382679, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 3.3627212047576904, |
|
"learning_rate": 4.992584373844853e-06, |
|
"logits/chosen": -1.3079514503479004, |
|
"logits/rejected": -2.1042516231536865, |
|
"logps/chosen": -367.0893859863281, |
|
"logps/rejected": -195.80905151367188, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.10206526517868042, |
|
"rewards/margins": 0.06879469007253647, |
|
"rewards/rejected": 0.03327057510614395, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.381651376146789, |
|
"grad_norm": 3.4364843368530273, |
|
"learning_rate": 4.98892504203351e-06, |
|
"logits/chosen": -1.3703242540359497, |
|
"logits/rejected": -2.135772228240967, |
|
"logps/chosen": -305.8392639160156, |
|
"logps/rejected": -170.4441680908203, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.13675755262374878, |
|
"rewards/margins": 0.10614188760519028, |
|
"rewards/rejected": 0.030615665018558502, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3963302752293578, |
|
"grad_norm": 3.014284372329712, |
|
"learning_rate": 4.9845362043071925e-06, |
|
"logits/chosen": -1.1213593482971191, |
|
"logits/rejected": -2.040038585662842, |
|
"logps/chosen": -311.7105712890625, |
|
"logps/rejected": -176.02438354492188, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": 0.13455447554588318, |
|
"rewards/margins": 0.10230613499879837, |
|
"rewards/rejected": 0.03224834054708481, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.41100917431192663, |
|
"grad_norm": 3.0536396503448486, |
|
"learning_rate": 4.97941914703774e-06, |
|
"logits/chosen": -1.2472190856933594, |
|
"logits/rejected": -2.175790309906006, |
|
"logps/chosen": -310.2051086425781, |
|
"logps/rejected": -214.69712829589844, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 0.18810473382472992, |
|
"rewards/margins": 0.13538572192192078, |
|
"rewards/rejected": 0.052719030529260635, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.42568807339449544, |
|
"grad_norm": 3.687453031539917, |
|
"learning_rate": 4.973575370038718e-06, |
|
"logits/chosen": -1.161484956741333, |
|
"logits/rejected": -2.056807518005371, |
|
"logps/chosen": -331.156005859375, |
|
"logps/rejected": -206.752685546875, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.22813093662261963, |
|
"rewards/margins": 0.178737074136734, |
|
"rewards/rejected": 0.04939386993646622, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 2.6800389289855957, |
|
"learning_rate": 4.967006586125827e-06, |
|
"logits/chosen": -1.3047680854797363, |
|
"logits/rejected": -2.1053338050842285, |
|
"logps/chosen": -320.47052001953125, |
|
"logps/rejected": -198.96849060058594, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.25883767008781433, |
|
"rewards/margins": 0.21324561536312103, |
|
"rewards/rejected": 0.0455920584499836, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45504587155963305, |
|
"grad_norm": 3.319866180419922, |
|
"learning_rate": 4.959714720614871e-06, |
|
"logits/chosen": -1.2463948726654053, |
|
"logits/rejected": -2.2376761436462402, |
|
"logps/chosen": -343.1983642578125, |
|
"logps/rejected": -197.24610900878906, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.3166658878326416, |
|
"rewards/margins": 0.26522931456565857, |
|
"rewards/rejected": 0.05143657326698303, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.46972477064220186, |
|
"grad_norm": 2.6847336292266846, |
|
"learning_rate": 4.951701910757446e-06, |
|
"logits/chosen": -1.252946138381958, |
|
"logits/rejected": -2.0270633697509766, |
|
"logps/chosen": -273.5660400390625, |
|
"logps/rejected": -200.33984375, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.3213706910610199, |
|
"rewards/margins": 0.2726665437221527, |
|
"rewards/rejected": 0.04870418459177017, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.48440366972477067, |
|
"grad_norm": 3.1882617473602295, |
|
"learning_rate": 4.942970505114514e-06, |
|
"logits/chosen": -1.1212793588638306, |
|
"logits/rejected": -2.0485286712646484, |
|
"logps/chosen": -329.2900390625, |
|
"logps/rejected": -188.50067138671875, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.37985220551490784, |
|
"rewards/margins": 0.32368168234825134, |
|
"rewards/rejected": 0.056170523166656494, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4990825688073395, |
|
"grad_norm": 2.6009716987609863, |
|
"learning_rate": 4.933523062868033e-06, |
|
"logits/chosen": -1.1749910116195679, |
|
"logits/rejected": -2.1656789779663086, |
|
"logps/chosen": -290.49560546875, |
|
"logps/rejected": -177.18348693847656, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.399168461561203, |
|
"rewards/margins": 0.3291959762573242, |
|
"rewards/rejected": 0.0699724480509758, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5137614678899083, |
|
"grad_norm": 2.7933292388916016, |
|
"learning_rate": 4.923362353070859e-06, |
|
"logits/chosen": -0.9930830597877502, |
|
"logits/rejected": -2.1664011478424072, |
|
"logps/chosen": -308.12164306640625, |
|
"logps/rejected": -170.24810791015625, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.4771941602230072, |
|
"rewards/margins": 0.4229365289211273, |
|
"rewards/rejected": 0.05425760895013809, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5284403669724771, |
|
"grad_norm": 2.4665513038635254, |
|
"learning_rate": 4.912491353835138e-06, |
|
"logits/chosen": -1.2331562042236328, |
|
"logits/rejected": -2.0544230937957764, |
|
"logps/chosen": -277.6913757324219, |
|
"logps/rejected": -196.8771209716797, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.4595210552215576, |
|
"rewards/margins": 0.3660896122455597, |
|
"rewards/rejected": 0.09343138337135315, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5431192660550459, |
|
"grad_norm": 2.463873863220215, |
|
"learning_rate": 4.900913251459418e-06, |
|
"logits/chosen": -1.1638422012329102, |
|
"logits/rejected": -2.0549814701080322, |
|
"logps/chosen": -280.3222961425781, |
|
"logps/rejected": -182.9549560546875, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.49483105540275574, |
|
"rewards/margins": 0.4287148714065552, |
|
"rewards/rejected": 0.06611625105142593, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5577981651376147, |
|
"grad_norm": 2.5419061183929443, |
|
"learning_rate": 4.8886314394947396e-06, |
|
"logits/chosen": -1.0577822923660278, |
|
"logits/rejected": -2.03446364402771, |
|
"logps/chosen": -299.0617980957031, |
|
"logps/rejected": -196.64585876464844, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.6822911500930786, |
|
"rewards/margins": 0.5850739479064941, |
|
"rewards/rejected": 0.09721729159355164, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5724770642201835, |
|
"grad_norm": 2.5450778007507324, |
|
"learning_rate": 4.875649517749985e-06, |
|
"logits/chosen": -1.0982365608215332, |
|
"logits/rejected": -2.1213526725769043, |
|
"logps/chosen": -301.862548828125, |
|
"logps/rejected": -203.84742736816406, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.6690702438354492, |
|
"rewards/margins": 0.5742719769477844, |
|
"rewards/rejected": 0.0947982519865036, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"grad_norm": 2.306406259536743, |
|
"learning_rate": 4.861971291236772e-06, |
|
"logits/chosen": -1.243112325668335, |
|
"logits/rejected": -2.0873706340789795, |
|
"logps/chosen": -346.6309509277344, |
|
"logps/rejected": -203.404052734375, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.7503749132156372, |
|
"rewards/margins": 0.5946022868156433, |
|
"rewards/rejected": 0.15577253699302673, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6018348623853211, |
|
"grad_norm": 2.5219640731811523, |
|
"learning_rate": 4.847600769054201e-06, |
|
"logits/chosen": -1.2759498357772827, |
|
"logits/rejected": -2.1124911308288574, |
|
"logps/chosen": -385.54498291015625, |
|
"logps/rejected": -234.3006591796875, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8981677293777466, |
|
"rewards/margins": 0.7219379544258118, |
|
"rewards/rejected": 0.1762298047542572, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6165137614678899, |
|
"grad_norm": 2.297736406326294, |
|
"learning_rate": 4.832542163213787e-06, |
|
"logits/chosen": -1.1348319053649902, |
|
"logits/rejected": -2.198058605194092, |
|
"logps/chosen": -278.73016357421875, |
|
"logps/rejected": -165.02432250976562, |
|
"loss": 0.4082, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.8287730813026428, |
|
"rewards/margins": 0.7731601595878601, |
|
"rewards/rejected": 0.05561291426420212, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6311926605504588, |
|
"grad_norm": 2.225381851196289, |
|
"learning_rate": 4.816799887404911e-06, |
|
"logits/chosen": -1.299065351486206, |
|
"logits/rejected": -2.1710290908813477, |
|
"logps/chosen": -316.4049377441406, |
|
"logps/rejected": -197.56884765625, |
|
"loss": 0.4463, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.8337810039520264, |
|
"rewards/margins": 0.6480390429496765, |
|
"rewards/rejected": 0.18574194610118866, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6458715596330276, |
|
"grad_norm": 2.1773393154144287, |
|
"learning_rate": 4.800378555701168e-06, |
|
"logits/chosen": -1.145480751991272, |
|
"logits/rejected": -2.0223851203918457, |
|
"logps/chosen": -370.3527526855469, |
|
"logps/rejected": -196.968505859375, |
|
"loss": 0.4075, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.898174524307251, |
|
"rewards/margins": 0.7861010432243347, |
|
"rewards/rejected": 0.11207354068756104, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 2.2705249786376953, |
|
"learning_rate": 4.783282981207979e-06, |
|
"logits/chosen": -1.191556453704834, |
|
"logits/rejected": -2.307077407836914, |
|
"logps/chosen": -312.4258728027344, |
|
"logps/rejected": -179.88075256347656, |
|
"loss": 0.3893, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 1.0026105642318726, |
|
"rewards/margins": 0.8912415504455566, |
|
"rewards/rejected": 0.11136899888515472, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6752293577981652, |
|
"grad_norm": 1.855381965637207, |
|
"learning_rate": 4.765518174651864e-06, |
|
"logits/chosen": -1.1708786487579346, |
|
"logits/rejected": -2.0928103923797607, |
|
"logps/chosen": -301.8147277832031, |
|
"logps/rejected": -201.09478759765625, |
|
"loss": 0.3757, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.9942986369132996, |
|
"rewards/margins": 0.8888772130012512, |
|
"rewards/rejected": 0.10542140901088715, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.689908256880734, |
|
"grad_norm": 2.0521061420440674, |
|
"learning_rate": 4.747089342911793e-06, |
|
"logits/chosen": -1.011386513710022, |
|
"logits/rejected": -2.1828246116638184, |
|
"logps/chosen": -308.777099609375, |
|
"logps/rejected": -185.42471313476562, |
|
"loss": 0.3329, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.1515001058578491, |
|
"rewards/margins": 1.04723060131073, |
|
"rewards/rejected": 0.10426945239305496, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7045871559633028, |
|
"grad_norm": 1.8322721719741821, |
|
"learning_rate": 4.728001887493048e-06, |
|
"logits/chosen": -1.0440161228179932, |
|
"logits/rejected": -2.2036566734313965, |
|
"logps/chosen": -317.36346435546875, |
|
"logps/rejected": -204.70556640625, |
|
"loss": 0.3371, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 1.257871150970459, |
|
"rewards/margins": 1.1031622886657715, |
|
"rewards/rejected": 0.15470871329307556, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7192660550458716, |
|
"grad_norm": 1.716375708580017, |
|
"learning_rate": 4.708261402944036e-06, |
|
"logits/chosen": -1.1383062601089478, |
|
"logits/rejected": -2.189666271209717, |
|
"logps/chosen": -333.7127380371094, |
|
"logps/rejected": -198.931884765625, |
|
"loss": 0.2993, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.3938922882080078, |
|
"rewards/margins": 1.2491440773010254, |
|
"rewards/rejected": 0.14474821090698242, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 1.7844756841659546, |
|
"learning_rate": 4.687873675216522e-06, |
|
"logits/chosen": -1.0265507698059082, |
|
"logits/rejected": -1.989030122756958, |
|
"logps/chosen": -318.661865234375, |
|
"logps/rejected": -211.2397918701172, |
|
"loss": 0.3127, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 1.453789472579956, |
|
"rewards/margins": 1.268122673034668, |
|
"rewards/rejected": 0.1856667846441269, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7486238532110092, |
|
"grad_norm": 1.7730361223220825, |
|
"learning_rate": 4.666844679969765e-06, |
|
"logits/chosen": -1.3037304878234863, |
|
"logits/rejected": -2.2598671913146973, |
|
"logps/chosen": -312.95440673828125, |
|
"logps/rejected": -219.03636169433594, |
|
"loss": 0.3016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2438600063323975, |
|
"rewards/margins": 1.2158725261688232, |
|
"rewards/rejected": 0.02798762172460556, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.763302752293578, |
|
"grad_norm": 1.6278932094573975, |
|
"learning_rate": 4.6451805808190464e-06, |
|
"logits/chosen": -1.1335176229476929, |
|
"logits/rejected": -2.17392635345459, |
|
"logps/chosen": -299.39410400390625, |
|
"logps/rejected": -186.06622314453125, |
|
"loss": 0.2634, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.4063000679016113, |
|
"rewards/margins": 1.4068892002105713, |
|
"rewards/rejected": -0.0005892012268304825, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7779816513761468, |
|
"grad_norm": 1.5209800004959106, |
|
"learning_rate": 4.622887727529104e-06, |
|
"logits/chosen": -1.1014411449432373, |
|
"logits/rejected": -2.1214916706085205, |
|
"logps/chosen": -271.7640075683594, |
|
"logps/rejected": -216.89988708496094, |
|
"loss": 0.2627, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.386069416999817, |
|
"rewards/margins": 1.4243448972702026, |
|
"rewards/rejected": -0.03827540576457977, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7926605504587156, |
|
"grad_norm": 1.5802730321884155, |
|
"learning_rate": 4.599972654153018e-06, |
|
"logits/chosen": -0.9640820026397705, |
|
"logits/rejected": -2.146678924560547, |
|
"logps/chosen": -315.3819885253906, |
|
"logps/rejected": -184.68304443359375, |
|
"loss": 0.2601, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 1.5327023267745972, |
|
"rewards/margins": 1.4815881252288818, |
|
"rewards/rejected": 0.05111423879861832, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8073394495412844, |
|
"grad_norm": 1.6033107042312622, |
|
"learning_rate": 4.5764420771170735e-06, |
|
"logits/chosen": -0.9946492910385132, |
|
"logits/rejected": -2.0975136756896973, |
|
"logps/chosen": -292.52398681640625, |
|
"logps/rejected": -202.83602905273438, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.4424684047698975, |
|
"rewards/margins": 1.4901291131973267, |
|
"rewards/rejected": -0.04766057804226875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8220183486238533, |
|
"grad_norm": 1.6146634817123413, |
|
"learning_rate": 4.552302893252166e-06, |
|
"logits/chosen": -1.2488244771957397, |
|
"logits/rejected": -2.2399239540100098, |
|
"logps/chosen": -319.301025390625, |
|
"logps/rejected": -215.10731506347656, |
|
"loss": 0.2432, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.492225170135498, |
|
"rewards/margins": 1.5009602308273315, |
|
"rewards/rejected": -0.008735168725252151, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8366972477064221, |
|
"grad_norm": 1.879619836807251, |
|
"learning_rate": 4.52756217777234e-06, |
|
"logits/chosen": -1.2845666408538818, |
|
"logits/rejected": -2.2133727073669434, |
|
"logps/chosen": -325.5247497558594, |
|
"logps/rejected": -219.1314697265625, |
|
"loss": 0.2626, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.571778655052185, |
|
"rewards/margins": 1.5141938924789429, |
|
"rewards/rejected": 0.05758478865027428, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8513761467889909, |
|
"grad_norm": 1.5598102807998657, |
|
"learning_rate": 4.502227182201035e-06, |
|
"logits/chosen": -0.9802009463310242, |
|
"logits/rejected": -2.0259878635406494, |
|
"logps/chosen": -275.55816650390625, |
|
"logps/rejected": -185.3338165283203, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5974626541137695, |
|
"rewards/margins": 1.620395541191101, |
|
"rewards/rejected": -0.022933142259716988, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.8660550458715597, |
|
"grad_norm": 1.277979850769043, |
|
"learning_rate": 4.476305332245662e-06, |
|
"logits/chosen": -1.1266419887542725, |
|
"logits/rejected": -2.322726249694824, |
|
"logps/chosen": -327.810302734375, |
|
"logps/rejected": -161.39149475097656, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5967400074005127, |
|
"rewards/margins": 1.8176448345184326, |
|
"rewards/rejected": -0.2209048718214035, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 1.6042323112487793, |
|
"learning_rate": 4.449804225621116e-06, |
|
"logits/chosen": -1.0289760828018188, |
|
"logits/rejected": -2.102262496948242, |
|
"logps/chosen": -291.6026611328125, |
|
"logps/rejected": -190.6699676513672, |
|
"loss": 0.249, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.5607997179031372, |
|
"rewards/margins": 1.628572702407837, |
|
"rewards/rejected": -0.06777279078960419, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8954128440366973, |
|
"grad_norm": 1.430982232093811, |
|
"learning_rate": 4.422731629822887e-06, |
|
"logits/chosen": -0.9640188217163086, |
|
"logits/rejected": -2.000277519226074, |
|
"logps/chosen": -327.7152404785156, |
|
"logps/rejected": -205.96337890625, |
|
"loss": 0.2425, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 1.6806552410125732, |
|
"rewards/margins": 1.685612440109253, |
|
"rewards/rejected": -0.004957253113389015, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9100917431192661, |
|
"grad_norm": 1.513214111328125, |
|
"learning_rate": 4.395095479850396e-06, |
|
"logits/chosen": -0.972959578037262, |
|
"logits/rejected": -1.9764440059661865, |
|
"logps/chosen": -299.74847412109375, |
|
"logps/rejected": -197.39337158203125, |
|
"loss": 0.2516, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 1.5539629459381104, |
|
"rewards/margins": 1.6879091262817383, |
|
"rewards/rejected": -0.13394607603549957, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.9247706422018349, |
|
"grad_norm": 1.2460252046585083, |
|
"learning_rate": 4.366903875881243e-06, |
|
"logits/chosen": -1.1148145198822021, |
|
"logits/rejected": -2.3518619537353516, |
|
"logps/chosen": -287.5447692871094, |
|
"logps/rejected": -175.43360900878906, |
|
"loss": 0.19, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5850166082382202, |
|
"rewards/margins": 1.998946189880371, |
|
"rewards/rejected": -0.4139295220375061, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9394495412844037, |
|
"grad_norm": 1.4544743299484253, |
|
"learning_rate": 4.3381650808970365e-06, |
|
"logits/chosen": -1.0423675775527954, |
|
"logits/rejected": -1.992466926574707, |
|
"logps/chosen": -265.5049743652344, |
|
"logps/rejected": -196.2741241455078, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.5719702243804932, |
|
"rewards/margins": 1.7563403844833374, |
|
"rewards/rejected": -0.18437033891677856, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9541284403669725, |
|
"grad_norm": 1.5302927494049072, |
|
"learning_rate": 4.308887518261507e-06, |
|
"logits/chosen": -0.8528121113777161, |
|
"logits/rejected": -1.961355447769165, |
|
"logps/chosen": -288.3016357421875, |
|
"logps/rejected": -206.49557495117188, |
|
"loss": 0.2075, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.7642028331756592, |
|
"rewards/margins": 1.9030241966247559, |
|
"rewards/rejected": -0.13882134854793549, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9688073394495413, |
|
"grad_norm": 1.4101622104644775, |
|
"learning_rate": 4.279079769251617e-06, |
|
"logits/chosen": -1.2729012966156006, |
|
"logits/rejected": -2.241056203842163, |
|
"logps/chosen": -362.6707458496094, |
|
"logps/rejected": -222.91549682617188, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.8280537128448486, |
|
"rewards/margins": 2.065840005874634, |
|
"rewards/rejected": -0.23778626322746277, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9834862385321101, |
|
"grad_norm": 1.1177998781204224, |
|
"learning_rate": 4.248750570542373e-06, |
|
"logits/chosen": -1.0287914276123047, |
|
"logits/rejected": -2.1009342670440674, |
|
"logps/chosen": -281.2322998046875, |
|
"logps/rejected": -189.8081512451172, |
|
"loss": 0.1931, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5686054229736328, |
|
"rewards/margins": 1.9290703535079956, |
|
"rewards/rejected": -0.360464870929718, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.998165137614679, |
|
"grad_norm": 1.2145086526870728, |
|
"learning_rate": 4.21790881164611e-06, |
|
"logits/chosen": -0.9554519653320312, |
|
"logits/rejected": -2.0969762802124023, |
|
"logps/chosen": -292.5300598144531, |
|
"logps/rejected": -207.0960235595703, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.7437012195587158, |
|
"rewards/margins": 2.259512424468994, |
|
"rewards/rejected": -0.5158110857009888, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.0128440366972478, |
|
"grad_norm": 1.095413327217102, |
|
"learning_rate": 4.186563532306957e-06, |
|
"logits/chosen": -0.9077868461608887, |
|
"logits/rejected": -2.1029911041259766, |
|
"logps/chosen": -300.1116943359375, |
|
"logps/rejected": -180.19322204589844, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7422677278518677, |
|
"rewards/margins": 2.208810567855835, |
|
"rewards/rejected": -0.4665430784225464, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.0275229357798166, |
|
"grad_norm": 1.3220425844192505, |
|
"learning_rate": 4.154723919851291e-06, |
|
"logits/chosen": -1.077134132385254, |
|
"logits/rejected": -2.1211631298065186, |
|
"logps/chosen": -300.9671325683594, |
|
"logps/rejected": -185.4986114501953, |
|
"loss": 0.2096, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.5104981660842896, |
|
"rewards/margins": 1.9692846536636353, |
|
"rewards/rejected": -0.4587865471839905, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0422018348623854, |
|
"grad_norm": 0.9982088208198547, |
|
"learning_rate": 4.122399306494918e-06, |
|
"logits/chosen": -1.1294522285461426, |
|
"logits/rejected": -2.265366792678833, |
|
"logps/chosen": -348.11553955078125, |
|
"logps/rejected": -211.96484375, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.8556833267211914, |
|
"rewards/margins": 2.2423272132873535, |
|
"rewards/rejected": -0.38664379715919495, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0568807339449542, |
|
"grad_norm": 0.9512726068496704, |
|
"learning_rate": 4.089599166607794e-06, |
|
"logits/chosen": -1.0260741710662842, |
|
"logits/rejected": -2.078310489654541, |
|
"logps/chosen": -301.7906494140625, |
|
"logps/rejected": -200.17333984375, |
|
"loss": 0.132, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.709304690361023, |
|
"rewards/margins": 2.428473949432373, |
|
"rewards/rejected": -0.7191690802574158, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.071559633027523, |
|
"grad_norm": 0.9233289957046509, |
|
"learning_rate": 4.05633311393708e-06, |
|
"logits/chosen": -0.9745887517929077, |
|
"logits/rejected": -2.032710313796997, |
|
"logps/chosen": -267.1161804199219, |
|
"logps/rejected": -185.32769775390625, |
|
"loss": 0.1508, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.708602786064148, |
|
"rewards/margins": 2.2974541187286377, |
|
"rewards/rejected": -0.5888515710830688, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.0862385321100918, |
|
"grad_norm": 0.9916685223579407, |
|
"learning_rate": 4.022610898789349e-06, |
|
"logits/chosen": -0.9556669592857361, |
|
"logits/rejected": -2.117856979370117, |
|
"logps/chosen": -277.4543762207031, |
|
"logps/rejected": -200.896728515625, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.785478115081787, |
|
"rewards/margins": 2.374891519546509, |
|
"rewards/rejected": -0.5894135236740112, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 1.2182554006576538, |
|
"learning_rate": 3.988442405172755e-06, |
|
"logits/chosen": -0.8240389823913574, |
|
"logits/rejected": -2.0166051387786865, |
|
"logps/chosen": -293.0532531738281, |
|
"logps/rejected": -215.48983764648438, |
|
"loss": 0.121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0409958362579346, |
|
"rewards/margins": 2.6529250144958496, |
|
"rewards/rejected": -0.6119292378425598, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1155963302752294, |
|
"grad_norm": 1.0240944623947144, |
|
"learning_rate": 3.953837647900031e-06, |
|
"logits/chosen": -0.899176836013794, |
|
"logits/rejected": -2.119375705718994, |
|
"logps/chosen": -283.8042907714844, |
|
"logps/rejected": -211.6457977294922, |
|
"loss": 0.1437, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.990633249282837, |
|
"rewards/margins": 2.620699405670166, |
|
"rewards/rejected": -0.6300662159919739, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.1302752293577982, |
|
"grad_norm": 1.21559476852417, |
|
"learning_rate": 3.918806769653135e-06, |
|
"logits/chosen": -0.8191251754760742, |
|
"logits/rejected": -2.017087459564209, |
|
"logps/chosen": -331.17724609375, |
|
"logps/rejected": -209.400146484375, |
|
"loss": 0.152, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.9586645364761353, |
|
"rewards/margins": 2.562222480773926, |
|
"rewards/rejected": -0.6035579442977905, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.144954128440367, |
|
"grad_norm": 1.2314106225967407, |
|
"learning_rate": 3.88336003801042e-06, |
|
"logits/chosen": -0.9168681502342224, |
|
"logits/rejected": -2.054666519165039, |
|
"logps/chosen": -264.9989013671875, |
|
"logps/rejected": -192.7652587890625, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7247517108917236, |
|
"rewards/margins": 2.3017380237579346, |
|
"rewards/rejected": -0.5769862532615662, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.1596330275229358, |
|
"grad_norm": 0.9996971487998962, |
|
"learning_rate": 3.847507842437205e-06, |
|
"logits/chosen": -0.788710355758667, |
|
"logits/rejected": -2.0527966022491455, |
|
"logps/chosen": -306.01373291015625, |
|
"logps/rejected": -187.79794311523438, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9181973934173584, |
|
"rewards/margins": 2.8037965297698975, |
|
"rewards/rejected": -0.8855991959571838, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 0.9679911732673645, |
|
"learning_rate": 3.811260691240604e-06, |
|
"logits/chosen": -0.8132730722427368, |
|
"logits/rejected": -2.0696139335632324, |
|
"logps/chosen": -351.917236328125, |
|
"logps/rejected": -204.27964782714844, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.159268856048584, |
|
"rewards/margins": 2.8514890670776367, |
|
"rewards/rejected": -0.6922197937965393, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1889908256880735, |
|
"grad_norm": 0.9500184059143066, |
|
"learning_rate": 3.774629208489547e-06, |
|
"logits/chosen": -0.9215357899665833, |
|
"logits/rejected": -2.1160709857940674, |
|
"logps/chosen": -253.12631225585938, |
|
"logps/rejected": -187.95811462402344, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8172812461853027, |
|
"rewards/margins": 2.5532562732696533, |
|
"rewards/rejected": -0.7359753251075745, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.2036697247706423, |
|
"grad_norm": 1.0494946241378784, |
|
"learning_rate": 3.7376241309008433e-06, |
|
"logits/chosen": -1.0810823440551758, |
|
"logits/rejected": -2.151219606399536, |
|
"logps/chosen": -337.37255859375, |
|
"logps/rejected": -198.8866424560547, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.12947154045105, |
|
"rewards/margins": 2.8516111373901367, |
|
"rewards/rejected": -0.7221395373344421, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.218348623853211, |
|
"grad_norm": 0.822201669216156, |
|
"learning_rate": 3.7002563046922502e-06, |
|
"logits/chosen": -1.0325469970703125, |
|
"logits/rejected": -2.2076807022094727, |
|
"logps/chosen": -337.1971435546875, |
|
"logps/rejected": -189.85455322265625, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9719089269638062, |
|
"rewards/margins": 3.0117526054382324, |
|
"rewards/rejected": -1.0398434400558472, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.2330275229357799, |
|
"grad_norm": 0.681236982345581, |
|
"learning_rate": 3.6625366824034337e-06, |
|
"logits/chosen": -0.7656459212303162, |
|
"logits/rejected": -2.049311399459839, |
|
"logps/chosen": -289.5611877441406, |
|
"logps/rejected": -224.8207550048828, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0607948303222656, |
|
"rewards/margins": 3.2540061473846436, |
|
"rewards/rejected": -1.1932108402252197, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.2477064220183487, |
|
"grad_norm": 0.9994679689407349, |
|
"learning_rate": 3.6244763196857714e-06, |
|
"logits/chosen": -0.9609106183052063, |
|
"logits/rejected": -2.1387076377868652, |
|
"logps/chosen": -307.74798583984375, |
|
"logps/rejected": -199.2579345703125, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0705361366271973, |
|
"rewards/margins": 3.1018238067626953, |
|
"rewards/rejected": -1.0312877893447876, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2623853211009175, |
|
"grad_norm": 1.2497354745864868, |
|
"learning_rate": 3.5860863720619333e-06, |
|
"logits/chosen": -0.9625377058982849, |
|
"logits/rejected": -2.073275089263916, |
|
"logps/chosen": -297.9329833984375, |
|
"logps/rejected": -200.70681762695312, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.188173532485962, |
|
"rewards/margins": 2.902965784072876, |
|
"rewards/rejected": -0.7147922515869141, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2770642201834863, |
|
"grad_norm": 0.862918496131897, |
|
"learning_rate": 3.547378091656186e-06, |
|
"logits/chosen": -0.7778910994529724, |
|
"logits/rejected": -2.1054413318634033, |
|
"logps/chosen": -304.24798583984375, |
|
"logps/rejected": -189.96273803710938, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.911596655845642, |
|
"rewards/margins": 3.0262608528137207, |
|
"rewards/rejected": -1.1146641969680786, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.2917431192660551, |
|
"grad_norm": 0.7902020812034607, |
|
"learning_rate": 3.5083628238963913e-06, |
|
"logits/chosen": -1.0238415002822876, |
|
"logits/rejected": -1.960688829421997, |
|
"logps/chosen": -243.5750274658203, |
|
"logps/rejected": -191.24264526367188, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8321638107299805, |
|
"rewards/margins": 2.780503988265991, |
|
"rewards/rejected": -0.948340117931366, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.306422018348624, |
|
"grad_norm": 0.9199721813201904, |
|
"learning_rate": 3.4690520041886473e-06, |
|
"logits/chosen": -0.7949679493904114, |
|
"logits/rejected": -2.0139424800872803, |
|
"logps/chosen": -287.1697082519531, |
|
"logps/rejected": -230.3143310546875, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.025575876235962, |
|
"rewards/margins": 3.0686216354370117, |
|
"rewards/rejected": -1.0430455207824707, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 0.6183698773384094, |
|
"learning_rate": 3.4294571545655653e-06, |
|
"logits/chosen": -0.8391042947769165, |
|
"logits/rejected": -2.1887526512145996, |
|
"logps/chosen": -302.6844482421875, |
|
"logps/rejected": -199.70486450195312, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0722954273223877, |
|
"rewards/margins": 3.3604629039764404, |
|
"rewards/rejected": -1.2881678342819214, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3357798165137615, |
|
"grad_norm": 0.6749584674835205, |
|
"learning_rate": 3.38958988030915e-06, |
|
"logits/chosen": -1.1391972303390503, |
|
"logits/rejected": -2.056378126144409, |
|
"logps/chosen": -285.07562255859375, |
|
"logps/rejected": -243.91146850585938, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.9815781116485596, |
|
"rewards/margins": 3.0620830059051514, |
|
"rewards/rejected": -1.0805050134658813, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.3504587155963304, |
|
"grad_norm": 0.9916686415672302, |
|
"learning_rate": 3.3494618665492833e-06, |
|
"logits/chosen": -0.974543571472168, |
|
"logits/rejected": -1.9790008068084717, |
|
"logps/chosen": -265.7524719238281, |
|
"logps/rejected": -210.968994140625, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7159056663513184, |
|
"rewards/margins": 2.7169814109802246, |
|
"rewards/rejected": -1.0010758638381958, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.3651376146788992, |
|
"grad_norm": 0.7534170746803284, |
|
"learning_rate": 3.3090848748388042e-06, |
|
"logits/chosen": -0.9359984993934631, |
|
"logits/rejected": -2.1165120601654053, |
|
"logps/chosen": -365.70556640625, |
|
"logps/rejected": -213.4051513671875, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9640876054763794, |
|
"rewards/margins": 3.4107747077941895, |
|
"rewards/rejected": -1.4466872215270996, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.379816513761468, |
|
"grad_norm": 0.7047733068466187, |
|
"learning_rate": 3.2684707397061887e-06, |
|
"logits/chosen": -1.0234425067901611, |
|
"logits/rejected": -2.067413806915283, |
|
"logps/chosen": -304.1073913574219, |
|
"logps/rejected": -191.95208740234375, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.091536045074463, |
|
"rewards/margins": 3.2847490310668945, |
|
"rewards/rejected": -1.1932129859924316, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.3944954128440368, |
|
"grad_norm": 0.915761411190033, |
|
"learning_rate": 3.2276313651868364e-06, |
|
"logits/chosen": -0.8797706365585327, |
|
"logits/rejected": -2.130256414413452, |
|
"logps/chosen": -307.41839599609375, |
|
"logps/rejected": -180.803466796875, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8479622602462769, |
|
"rewards/margins": 3.153357982635498, |
|
"rewards/rejected": -1.3053958415985107, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.4091743119266056, |
|
"grad_norm": 0.7284132838249207, |
|
"learning_rate": 3.1865787213339926e-06, |
|
"logits/chosen": -0.8553410768508911, |
|
"logits/rejected": -2.044377565383911, |
|
"logps/chosen": -292.92144775390625, |
|
"logps/rejected": -205.9124298095703, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0033276081085205, |
|
"rewards/margins": 3.4773898124694824, |
|
"rewards/rejected": -1.474062442779541, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.4238532110091744, |
|
"grad_norm": 0.7230023145675659, |
|
"learning_rate": 3.1453248407103156e-06, |
|
"logits/chosen": -0.8956843614578247, |
|
"logits/rejected": -2.0704410076141357, |
|
"logps/chosen": -297.47418212890625, |
|
"logps/rejected": -189.42091369628906, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.829613208770752, |
|
"rewards/margins": 3.2233619689941406, |
|
"rewards/rejected": -1.3937489986419678, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.4385321100917432, |
|
"grad_norm": 1.092043161392212, |
|
"learning_rate": 3.1038818148611178e-06, |
|
"logits/chosen": -0.9160604476928711, |
|
"logits/rejected": -1.9689029455184937, |
|
"logps/chosen": -323.4578552246094, |
|
"logps/rejected": -202.6251220703125, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.9870991706848145, |
|
"rewards/margins": 3.379544973373413, |
|
"rewards/rejected": -1.3924458026885986, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.453211009174312, |
|
"grad_norm": 0.849423348903656, |
|
"learning_rate": 3.062261790770331e-06, |
|
"logits/chosen": -0.8054502010345459, |
|
"logits/rejected": -2.017672061920166, |
|
"logps/chosen": -268.9284973144531, |
|
"logps/rejected": -201.11390686035156, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7870291471481323, |
|
"rewards/margins": 2.959620952606201, |
|
"rewards/rejected": -1.1725919246673584, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 0.5849136710166931, |
|
"learning_rate": 3.0204769673002123e-06, |
|
"logits/chosen": -0.8214648365974426, |
|
"logits/rejected": -2.103921890258789, |
|
"logps/chosen": -343.6684265136719, |
|
"logps/rejected": -218.4034423828125, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9067230224609375, |
|
"rewards/margins": 3.2361087799072266, |
|
"rewards/rejected": -1.3293852806091309, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4825688073394496, |
|
"grad_norm": 0.8638609647750854, |
|
"learning_rate": 2.978539591615848e-06, |
|
"logits/chosen": -0.9360217452049255, |
|
"logits/rejected": -1.8377161026000977, |
|
"logps/chosen": -310.77203369140625, |
|
"logps/rejected": -217.95361328125, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8223047256469727, |
|
"rewards/margins": 3.374411106109619, |
|
"rewards/rejected": -1.5521066188812256, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.4972477064220184, |
|
"grad_norm": 0.7469986081123352, |
|
"learning_rate": 2.936461955595501e-06, |
|
"logits/chosen": -0.9148820638656616, |
|
"logits/rejected": -2.0849192142486572, |
|
"logps/chosen": -309.4117736816406, |
|
"logps/rejected": -211.26283264160156, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0167510509490967, |
|
"rewards/margins": 3.318629503250122, |
|
"rewards/rejected": -1.301878571510315, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.5119266055045872, |
|
"grad_norm": 0.48730000853538513, |
|
"learning_rate": 2.8942563922278487e-06, |
|
"logits/chosen": -0.8627596497535706, |
|
"logits/rejected": -1.997396469116211, |
|
"logps/chosen": -297.6988220214844, |
|
"logps/rejected": -219.91180419921875, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9595637321472168, |
|
"rewards/margins": 3.744344711303711, |
|
"rewards/rejected": -1.7847814559936523, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.526605504587156, |
|
"grad_norm": 1.1042286157608032, |
|
"learning_rate": 2.8519352719971783e-06, |
|
"logits/chosen": -0.9377632141113281, |
|
"logits/rejected": -2.024191379547119, |
|
"logps/chosen": -327.47027587890625, |
|
"logps/rejected": -223.6087646484375, |
|
"loss": 0.1017, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.0964934825897217, |
|
"rewards/margins": 3.4565787315368652, |
|
"rewards/rejected": -1.3600847721099854, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 0.7358872294425964, |
|
"learning_rate": 2.8095109992575824e-06, |
|
"logits/chosen": -0.9008034467697144, |
|
"logits/rejected": -2.1022136211395264, |
|
"logps/chosen": -340.1212158203125, |
|
"logps/rejected": -223.19918823242188, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2666172981262207, |
|
"rewards/margins": 3.668931007385254, |
|
"rewards/rejected": -1.4023137092590332, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.5559633027522937, |
|
"grad_norm": 0.823003888130188, |
|
"learning_rate": 2.7669960085972407e-06, |
|
"logits/chosen": -0.8504350185394287, |
|
"logits/rejected": -2.14664888381958, |
|
"logps/chosen": -363.5140075683594, |
|
"logps/rejected": -241.92892456054688, |
|
"loss": 0.068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1337637901306152, |
|
"rewards/margins": 3.620941638946533, |
|
"rewards/rejected": -1.4871773719787598, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.5706422018348625, |
|
"grad_norm": 0.9012424349784851, |
|
"learning_rate": 2.7244027611938247e-06, |
|
"logits/chosen": -0.6944912672042847, |
|
"logits/rejected": -1.8317877054214478, |
|
"logps/chosen": -261.44049072265625, |
|
"logps/rejected": -243.61410522460938, |
|
"loss": 0.1016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.750416874885559, |
|
"rewards/margins": 3.3300774097442627, |
|
"rewards/rejected": -1.5796607732772827, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.5853211009174313, |
|
"grad_norm": 0.9822458028793335, |
|
"learning_rate": 2.6817437411621194e-06, |
|
"logits/chosen": -0.8393555283546448, |
|
"logits/rejected": -1.9610698223114014, |
|
"logps/chosen": -357.4717102050781, |
|
"logps/rejected": -259.9384765625, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1349892616271973, |
|
"rewards/margins": 3.471536636352539, |
|
"rewards/rejected": -1.336547613143921, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.7191787958145142, |
|
"learning_rate": 2.639031451894923e-06, |
|
"logits/chosen": -0.8827037811279297, |
|
"logits/rejected": -1.878009557723999, |
|
"logps/chosen": -341.8013916015625, |
|
"logps/rejected": -246.149169921875, |
|
"loss": 0.0649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.039233446121216, |
|
"rewards/margins": 3.67873215675354, |
|
"rewards/rejected": -1.6394988298416138, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.614678899082569, |
|
"grad_norm": 0.7397493124008179, |
|
"learning_rate": 2.5962784123982843e-06, |
|
"logits/chosen": -0.9270643591880798, |
|
"logits/rejected": -2.148819923400879, |
|
"logps/chosen": -318.17242431640625, |
|
"logps/rejected": -221.3958282470703, |
|
"loss": 0.077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9221253395080566, |
|
"rewards/margins": 3.66496205329895, |
|
"rewards/rejected": -1.7428367137908936, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.6293577981651377, |
|
"grad_norm": 0.5408302545547485, |
|
"learning_rate": 2.5534971536221804e-06, |
|
"logits/chosen": -0.7174456715583801, |
|
"logits/rejected": -1.9402276277542114, |
|
"logps/chosen": -280.41265869140625, |
|
"logps/rejected": -214.61036682128906, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.8169009685516357, |
|
"rewards/margins": 3.658087730407715, |
|
"rewards/rejected": -1.841186761856079, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.6440366972477065, |
|
"grad_norm": 0.6373718976974487, |
|
"learning_rate": 2.5107002147876814e-06, |
|
"logits/chosen": -0.8338260650634766, |
|
"logits/rejected": -1.8052666187286377, |
|
"logps/chosen": -274.18408203125, |
|
"logps/rejected": -231.13385009765625, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8962229490280151, |
|
"rewards/margins": 3.6884357929229736, |
|
"rewards/rejected": -1.792212724685669, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.6587155963302753, |
|
"grad_norm": 1.006023645401001, |
|
"learning_rate": 2.467900139711693e-06, |
|
"logits/chosen": -0.8586325645446777, |
|
"logits/rejected": -1.8590312004089355, |
|
"logps/chosen": -284.29498291015625, |
|
"logps/rejected": -219.96942138671875, |
|
"loss": 0.1002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.610360860824585, |
|
"rewards/margins": 3.314877986907959, |
|
"rewards/rejected": -1.7045170068740845, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.6733944954128441, |
|
"grad_norm": 0.4218728542327881, |
|
"learning_rate": 2.4251094731303586e-06, |
|
"logits/chosen": -0.7588306665420532, |
|
"logits/rejected": -2.020467758178711, |
|
"logps/chosen": -301.7962341308594, |
|
"logps/rejected": -201.7028350830078, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0787746906280518, |
|
"rewards/margins": 3.6262543201446533, |
|
"rewards/rejected": -1.547479271888733, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.688073394495413, |
|
"grad_norm": 0.44578853249549866, |
|
"learning_rate": 2.3823407570221812e-06, |
|
"logits/chosen": -0.681371808052063, |
|
"logits/rejected": -2.0245919227600098, |
|
"logps/chosen": -310.5913391113281, |
|
"logps/rejected": -196.76724243164062, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.959742784500122, |
|
"rewards/margins": 3.5932304859161377, |
|
"rewards/rejected": -1.633487582206726, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.7027522935779817, |
|
"grad_norm": 0.6186323761940002, |
|
"learning_rate": 2.3396065269319655e-06, |
|
"logits/chosen": -0.8481271862983704, |
|
"logits/rejected": -2.065420150756836, |
|
"logps/chosen": -310.2200012207031, |
|
"logps/rejected": -194.04493713378906, |
|
"loss": 0.0628, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0263564586639404, |
|
"rewards/margins": 3.842787265777588, |
|
"rewards/rejected": -1.8164305686950684, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.7174311926605506, |
|
"grad_norm": 0.5135802626609802, |
|
"learning_rate": 2.2969193082966353e-06, |
|
"logits/chosen": -0.7080973386764526, |
|
"logits/rejected": -2.007819652557373, |
|
"logps/chosen": -296.1108093261719, |
|
"logps/rejected": -216.99868774414062, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9229261875152588, |
|
"rewards/margins": 3.921962022781372, |
|
"rewards/rejected": -1.9990354776382446, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.7321100917431194, |
|
"grad_norm": 0.9138413071632385, |
|
"learning_rate": 2.2542916127740194e-06, |
|
"logits/chosen": -0.6951168775558472, |
|
"logits/rejected": -1.6621724367141724, |
|
"logps/chosen": -323.9538269042969, |
|
"logps/rejected": -260.28900146484375, |
|
"loss": 0.0728, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0283045768737793, |
|
"rewards/margins": 3.7805428504943848, |
|
"rewards/rejected": -1.7522385120391846, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.7467889908256882, |
|
"grad_norm": 0.62326979637146, |
|
"learning_rate": 2.211735934575674e-06, |
|
"logits/chosen": -0.7624643445014954, |
|
"logits/rejected": -2.0803322792053223, |
|
"logps/chosen": -293.841552734375, |
|
"logps/rejected": -189.69631958007812, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6327762603759766, |
|
"rewards/margins": 3.612248420715332, |
|
"rewards/rejected": -1.979472279548645, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 0.5615968108177185, |
|
"learning_rate": 2.1692647468048235e-06, |
|
"logits/chosen": -0.8942849636077881, |
|
"logits/rejected": -1.9355003833770752, |
|
"logps/chosen": -318.2629699707031, |
|
"logps/rejected": -235.68296813964844, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.87472403049469, |
|
"rewards/margins": 4.362048149108887, |
|
"rewards/rejected": -2.4873242378234863, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7761467889908258, |
|
"grad_norm": 0.6113856434822083, |
|
"learning_rate": 2.126890497800477e-06, |
|
"logits/chosen": -0.9161121845245361, |
|
"logits/rejected": -1.843569040298462, |
|
"logps/chosen": -309.8831787109375, |
|
"logps/rejected": -226.34967041015625, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9025682210922241, |
|
"rewards/margins": 3.5658414363861084, |
|
"rewards/rejected": -1.6632736921310425, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.7908256880733946, |
|
"grad_norm": 0.7386473417282104, |
|
"learning_rate": 2.084625607488816e-06, |
|
"logits/chosen": -0.7687922716140747, |
|
"logits/rejected": -1.982967734336853, |
|
"logps/chosen": -285.9901428222656, |
|
"logps/rejected": -213.30564880371094, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0006887912750244, |
|
"rewards/margins": 4.181596279144287, |
|
"rewards/rejected": -2.180907726287842, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.8055045871559634, |
|
"grad_norm": 0.620130717754364, |
|
"learning_rate": 2.0424824637428995e-06, |
|
"logits/chosen": -0.7613787651062012, |
|
"logits/rejected": -2.176778554916382, |
|
"logps/chosen": -278.2284851074219, |
|
"logps/rejected": -196.99716186523438, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.8024476766586304, |
|
"rewards/margins": 3.8208680152893066, |
|
"rewards/rejected": -2.0184202194213867, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.8201834862385322, |
|
"grad_norm": 0.964788556098938, |
|
"learning_rate": 2.0004734187517744e-06, |
|
"logits/chosen": -0.9343721270561218, |
|
"logits/rejected": -1.8525314331054688, |
|
"logps/chosen": -328.5677795410156, |
|
"logps/rejected": -199.83840942382812, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8486145734786987, |
|
"rewards/margins": 3.6849093437194824, |
|
"rewards/rejected": -1.8362950086593628, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.3955663740634918, |
|
"learning_rate": 1.9586107854000327e-06, |
|
"logits/chosen": -0.9676373600959778, |
|
"logits/rejected": -2.1090657711029053, |
|
"logps/chosen": -307.66302490234375, |
|
"logps/rejected": -193.6895751953125, |
|
"loss": 0.057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8102517127990723, |
|
"rewards/margins": 3.7792747020721436, |
|
"rewards/rejected": -1.9690231084823608, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8495412844036698, |
|
"grad_norm": 0.6646362543106079, |
|
"learning_rate": 1.916906833658899e-06, |
|
"logits/chosen": -0.7113239169120789, |
|
"logits/rejected": -1.9650328159332275, |
|
"logps/chosen": -337.66107177734375, |
|
"logps/rejected": -242.595703125, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.94010329246521, |
|
"rewards/margins": 4.105426788330078, |
|
"rewards/rejected": -2.165323495864868, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.8642201834862386, |
|
"grad_norm": 0.6509953737258911, |
|
"learning_rate": 1.8753737869898921e-06, |
|
"logits/chosen": -0.794485330581665, |
|
"logits/rejected": -1.901089072227478, |
|
"logps/chosen": -258.750732421875, |
|
"logps/rejected": -213.0238037109375, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.602518916130066, |
|
"rewards/margins": 4.075562477111816, |
|
"rewards/rejected": -2.473043441772461, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.8788990825688074, |
|
"grad_norm": 0.4499273896217346, |
|
"learning_rate": 1.8340238187621185e-06, |
|
"logits/chosen": -0.7047321200370789, |
|
"logits/rejected": -1.8908119201660156, |
|
"logps/chosen": -273.62322998046875, |
|
"logps/rejected": -198.1600341796875, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.7582603693008423, |
|
"rewards/margins": 3.6687071323394775, |
|
"rewards/rejected": -1.9104465246200562, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.8935779816513763, |
|
"grad_norm": 0.8414962291717529, |
|
"learning_rate": 1.7928690486842438e-06, |
|
"logits/chosen": -0.871714174747467, |
|
"logits/rejected": -2.0030646324157715, |
|
"logps/chosen": -264.42059326171875, |
|
"logps/rejected": -185.0747833251953, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.900803565979004, |
|
"rewards/margins": 3.7862067222595215, |
|
"rewards/rejected": -1.8854031562805176, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.908256880733945, |
|
"grad_norm": 0.8687112927436829, |
|
"learning_rate": 1.7519215392522026e-06, |
|
"logits/chosen": -0.8036646246910095, |
|
"logits/rejected": -2.0354790687561035, |
|
"logps/chosen": -293.50811767578125, |
|
"logps/rejected": -191.86962890625, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9246323108673096, |
|
"rewards/margins": 4.019070148468018, |
|
"rewards/rejected": -2.094437599182129, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.9229357798165139, |
|
"grad_norm": 0.4811760485172272, |
|
"learning_rate": 1.7111932922136715e-06, |
|
"logits/chosen": -0.7815529108047485, |
|
"logits/rejected": -1.7573397159576416, |
|
"logps/chosen": -263.450927734375, |
|
"logps/rejected": -229.46728515625, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6779381036758423, |
|
"rewards/margins": 3.870631217956543, |
|
"rewards/rejected": -2.1926932334899902, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.9376146788990827, |
|
"grad_norm": 0.5513655543327332, |
|
"learning_rate": 1.6706962450503408e-06, |
|
"logits/chosen": -0.6383249759674072, |
|
"logits/rejected": -1.9680360555648804, |
|
"logps/chosen": -293.52130126953125, |
|
"logps/rejected": -217.34693908691406, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9342231750488281, |
|
"rewards/margins": 4.369998931884766, |
|
"rewards/rejected": -2.4357762336730957, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.9522935779816515, |
|
"grad_norm": 0.7187495827674866, |
|
"learning_rate": 1.630442267479034e-06, |
|
"logits/chosen": -0.6566349267959595, |
|
"logits/rejected": -1.9347317218780518, |
|
"logps/chosen": -277.68890380859375, |
|
"logps/rejected": -224.22335815429688, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.052359104156494, |
|
"rewards/margins": 4.0643744468688965, |
|
"rewards/rejected": -2.0120151042938232, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.9669724770642203, |
|
"grad_norm": 0.33258092403411865, |
|
"learning_rate": 1.5904431579726837e-06, |
|
"logits/chosen": -0.7657849192619324, |
|
"logits/rejected": -2.030609369277954, |
|
"logps/chosen": -306.955322265625, |
|
"logps/rejected": -190.61703491210938, |
|
"loss": 0.0564, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.6874788999557495, |
|
"rewards/margins": 4.016414165496826, |
|
"rewards/rejected": -2.328935146331787, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 0.5519306659698486, |
|
"learning_rate": 1.5507106403021897e-06, |
|
"logits/chosen": -0.7592746019363403, |
|
"logits/rejected": -2.0932528972625732, |
|
"logps/chosen": -341.2933349609375, |
|
"logps/rejected": -232.65756225585938, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3383517265319824, |
|
"rewards/margins": 4.453994274139404, |
|
"rewards/rejected": -2.115642547607422, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.996330275229358, |
|
"grad_norm": 0.5155956745147705, |
|
"learning_rate": 1.511256360100171e-06, |
|
"logits/chosen": -0.7073550224304199, |
|
"logits/rejected": -2.0184946060180664, |
|
"logps/chosen": -306.38116455078125, |
|
"logps/rejected": -217.38668823242188, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8283504247665405, |
|
"rewards/margins": 4.175046920776367, |
|
"rewards/rejected": -2.346696376800537, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.0110091743119267, |
|
"grad_norm": 0.7801055908203125, |
|
"learning_rate": 1.4720918814476234e-06, |
|
"logits/chosen": -0.9376870393753052, |
|
"logits/rejected": -2.1091787815093994, |
|
"logps/chosen": -266.37811279296875, |
|
"logps/rejected": -209.93710327148438, |
|
"loss": 0.0743, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6686122417449951, |
|
"rewards/margins": 4.310929775238037, |
|
"rewards/rejected": -2.642317771911621, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.0256880733944955, |
|
"grad_norm": 0.6762734055519104, |
|
"learning_rate": 1.4332286834844792e-06, |
|
"logits/chosen": -0.9745014309883118, |
|
"logits/rejected": -2.0172030925750732, |
|
"logps/chosen": -297.90997314453125, |
|
"logps/rejected": -215.2535400390625, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7038819789886475, |
|
"rewards/margins": 3.7982983589172363, |
|
"rewards/rejected": -2.0944161415100098, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0403669724770643, |
|
"grad_norm": 0.6311278939247131, |
|
"learning_rate": 1.3946781570450563e-06, |
|
"logits/chosen": -0.792485773563385, |
|
"logits/rejected": -2.0446367263793945, |
|
"logps/chosen": -316.6257019042969, |
|
"logps/rejected": -225.79873657226562, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1141061782836914, |
|
"rewards/margins": 4.177621841430664, |
|
"rewards/rejected": -2.0635154247283936, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.055045871559633, |
|
"grad_norm": 0.4802553653717041, |
|
"learning_rate": 1.3564516013194023e-06, |
|
"logits/chosen": -0.5846218466758728, |
|
"logits/rejected": -1.8708997964859009, |
|
"logps/chosen": -278.2353515625, |
|
"logps/rejected": -215.2820587158203, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7591543197631836, |
|
"rewards/margins": 4.34114933013916, |
|
"rewards/rejected": -2.5819950103759766, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.069724770642202, |
|
"grad_norm": 0.6416748762130737, |
|
"learning_rate": 1.3185602205414894e-06, |
|
"logits/chosen": -0.7558883428573608, |
|
"logits/rejected": -1.8708809614181519, |
|
"logps/chosen": -280.8486633300781, |
|
"logps/rejected": -198.6562042236328, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9279158115386963, |
|
"rewards/margins": 3.8500123023986816, |
|
"rewards/rejected": -1.9220962524414062, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.0844036697247708, |
|
"grad_norm": 0.8262112736701965, |
|
"learning_rate": 1.2810151207052465e-06, |
|
"logits/chosen": -0.8148822784423828, |
|
"logits/rejected": -1.9564712047576904, |
|
"logps/chosen": -348.1204833984375, |
|
"logps/rejected": -250.0408172607422, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6666010618209839, |
|
"rewards/margins": 3.944627046585083, |
|
"rewards/rejected": -2.2780258655548096, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.0990825688073396, |
|
"grad_norm": 0.4954426884651184, |
|
"learning_rate": 1.2438273063093811e-06, |
|
"logits/chosen": -0.6735963225364685, |
|
"logits/rejected": -1.8776307106018066, |
|
"logps/chosen": -291.0019836425781, |
|
"logps/rejected": -194.4311981201172, |
|
"loss": 0.0706, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.7643864154815674, |
|
"rewards/margins": 3.9881067276000977, |
|
"rewards/rejected": -2.223719835281372, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.1137614678899084, |
|
"grad_norm": 0.8490874171257019, |
|
"learning_rate": 1.2070076771319536e-06, |
|
"logits/chosen": -0.9455384612083435, |
|
"logits/rejected": -1.8131248950958252, |
|
"logps/chosen": -365.147705078125, |
|
"logps/rejected": -228.15090942382812, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8452998399734497, |
|
"rewards/margins": 3.8042831420898438, |
|
"rewards/rejected": -1.958983063697815, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.128440366972477, |
|
"grad_norm": 0.7720925807952881, |
|
"learning_rate": 1.1705670250356417e-06, |
|
"logits/chosen": -0.6748377084732056, |
|
"logits/rejected": -1.9302213191986084, |
|
"logps/chosen": -322.6198425292969, |
|
"logps/rejected": -224.2333526611328, |
|
"loss": 0.053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.065561532974243, |
|
"rewards/margins": 4.38578987121582, |
|
"rewards/rejected": -2.320228338241577, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.143119266055046, |
|
"grad_norm": 0.4491863548755646, |
|
"learning_rate": 1.1345160308046413e-06, |
|
"logits/chosen": -0.7005204558372498, |
|
"logits/rejected": -2.1741456985473633, |
|
"logps/chosen": -398.5745849609375, |
|
"logps/rejected": -235.8988800048828, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.9305046796798706, |
|
"rewards/margins": 4.522059440612793, |
|
"rewards/rejected": -2.591554641723633, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.157798165137615, |
|
"grad_norm": 0.6360311508178711, |
|
"learning_rate": 1.0988652610141154e-06, |
|
"logits/chosen": -0.7096176147460938, |
|
"logits/rejected": -1.7769296169281006, |
|
"logps/chosen": -288.8116760253906, |
|
"logps/rejected": -241.828369140625, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.8457667827606201, |
|
"rewards/margins": 3.990344524383545, |
|
"rewards/rejected": -2.144577980041504, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.1724770642201836, |
|
"grad_norm": 0.3716106116771698, |
|
"learning_rate": 1.063625164933124e-06, |
|
"logits/chosen": -0.6774280667304993, |
|
"logits/rejected": -1.9684358835220337, |
|
"logps/chosen": -345.6900939941406, |
|
"logps/rejected": -240.35296630859375, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1445114612579346, |
|
"rewards/margins": 4.849350452423096, |
|
"rewards/rejected": -2.704838752746582, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.1871559633027524, |
|
"grad_norm": 0.525005578994751, |
|
"learning_rate": 1.0288060714619359e-06, |
|
"logits/chosen": -0.9460769891738892, |
|
"logits/rejected": -2.1344943046569824, |
|
"logps/chosen": -330.6282043457031, |
|
"logps/rejected": -195.7637481689453, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1682145595550537, |
|
"rewards/margins": 4.502593040466309, |
|
"rewards/rejected": -2.334378480911255, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 0.458524227142334, |
|
"learning_rate": 9.944181861046188e-07, |
|
"logits/chosen": -0.7203876376152039, |
|
"logits/rejected": -1.8515840768814087, |
|
"logps/chosen": -347.7017517089844, |
|
"logps/rejected": -233.29393005371094, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.7970588207244873, |
|
"rewards/margins": 4.392740249633789, |
|
"rewards/rejected": -2.5956814289093018, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.21651376146789, |
|
"grad_norm": 0.43879008293151855, |
|
"learning_rate": 9.604715879777986e-07, |
|
"logits/chosen": -0.7226991057395935, |
|
"logits/rejected": -2.0477523803710938, |
|
"logps/chosen": -290.74530029296875, |
|
"logps/rejected": -181.2649688720703, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9678212404251099, |
|
"rewards/margins": 4.382383346557617, |
|
"rewards/rejected": -2.4145617485046387, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.231192660550459, |
|
"grad_norm": 0.4379405081272125, |
|
"learning_rate": 9.269762268564616e-07, |
|
"logits/chosen": -0.8170676231384277, |
|
"logits/rejected": -2.0070619583129883, |
|
"logps/chosen": -265.16571044921875, |
|
"logps/rejected": -186.93112182617188, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8528399467468262, |
|
"rewards/margins": 4.132846355438232, |
|
"rewards/rejected": -2.2800064086914062, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.2458715596330276, |
|
"grad_norm": 0.8870872855186462, |
|
"learning_rate": 8.939419202576694e-07, |
|
"logits/chosen": -0.5970391631126404, |
|
"logits/rejected": -1.7150076627731323, |
|
"logps/chosen": -268.1172180175781, |
|
"logps/rejected": -207.63734436035156, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.653077244758606, |
|
"rewards/margins": 3.3603720664978027, |
|
"rewards/rejected": -1.7072948217391968, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.2605504587155965, |
|
"grad_norm": 0.737343966960907, |
|
"learning_rate": 8.61378350563033e-07, |
|
"logits/chosen": -0.7202005386352539, |
|
"logits/rejected": -1.8895469903945923, |
|
"logps/chosen": -262.6046447753906, |
|
"logps/rejected": -221.35336303710938, |
|
"loss": 0.0627, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.750601053237915, |
|
"rewards/margins": 3.905064582824707, |
|
"rewards/rejected": -2.154463529586792, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.2752293577981653, |
|
"grad_norm": 0.5748594403266907, |
|
"learning_rate": 8.292950621808022e-07, |
|
"logits/chosen": -0.7942256927490234, |
|
"logits/rejected": -1.9462255239486694, |
|
"logps/chosen": -297.3062438964844, |
|
"logps/rejected": -220.7073516845703, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.907147765159607, |
|
"rewards/margins": 4.309232711791992, |
|
"rewards/rejected": -2.402085065841675, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.289908256880734, |
|
"grad_norm": 0.5790998339653015, |
|
"learning_rate": 7.977014587483925e-07, |
|
"logits/chosen": -0.8033642768859863, |
|
"logits/rejected": -1.9477308988571167, |
|
"logps/chosen": -285.6184997558594, |
|
"logps/rejected": -257.89910888671875, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8432916402816772, |
|
"rewards/margins": 4.054279804229736, |
|
"rewards/rejected": -2.2109880447387695, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.304587155963303, |
|
"grad_norm": 0.6188729405403137, |
|
"learning_rate": 7.666068003761684e-07, |
|
"logits/chosen": -0.7408751249313354, |
|
"logits/rejected": -1.9631062746047974, |
|
"logps/chosen": -308.1776123046875, |
|
"logps/rejected": -198.61410522460938, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7822659015655518, |
|
"rewards/margins": 4.317424297332764, |
|
"rewards/rejected": -2.535158634185791, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.3192660550458717, |
|
"grad_norm": 0.5603534579277039, |
|
"learning_rate": 7.360202009332993e-07, |
|
"logits/chosen": -0.8284570574760437, |
|
"logits/rejected": -2.0091702938079834, |
|
"logps/chosen": -307.47088623046875, |
|
"logps/rejected": -215.27903747558594, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6950868368148804, |
|
"rewards/margins": 4.28853178024292, |
|
"rewards/rejected": -2.59344482421875, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.3339449541284405, |
|
"grad_norm": 0.3757495582103729, |
|
"learning_rate": 7.059506253764773e-07, |
|
"logits/chosen": -0.7530102729797363, |
|
"logits/rejected": -1.9654746055603027, |
|
"logps/chosen": -326.5684814453125, |
|
"logps/rejected": -226.15786743164062, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8712252378463745, |
|
"rewards/margins": 4.424376964569092, |
|
"rewards/rejected": -2.553151845932007, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.3486238532110093, |
|
"grad_norm": 0.6858806014060974, |
|
"learning_rate": 6.764068871222825e-07, |
|
"logits/chosen": -0.5249571204185486, |
|
"logits/rejected": -1.8156137466430664, |
|
"logps/chosen": -298.5492858886719, |
|
"logps/rejected": -212.81187438964844, |
|
"loss": 0.0564, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8946092128753662, |
|
"rewards/margins": 4.014616012573242, |
|
"rewards/rejected": -2.120006561279297, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.363302752293578, |
|
"grad_norm": 0.4449942409992218, |
|
"learning_rate": 6.473976454639608e-07, |
|
"logits/chosen": -0.7823415398597717, |
|
"logits/rejected": -2.0849199295043945, |
|
"logps/chosen": -306.5335693359375, |
|
"logps/rejected": -197.4770050048828, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.062614679336548, |
|
"rewards/margins": 4.463562488555908, |
|
"rewards/rejected": -2.4009478092193604, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.377981651376147, |
|
"grad_norm": 0.699175238609314, |
|
"learning_rate": 6.189314030333796e-07, |
|
"logits/chosen": -0.5810756087303162, |
|
"logits/rejected": -1.8031431436538696, |
|
"logps/chosen": -292.385009765625, |
|
"logps/rejected": -252.5111083984375, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7218618392944336, |
|
"rewards/margins": 4.281242847442627, |
|
"rewards/rejected": -2.5593810081481934, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.3926605504587157, |
|
"grad_norm": 0.5212377309799194, |
|
"learning_rate": 5.910165033089e-07, |
|
"logits/chosen": -0.6628118753433228, |
|
"logits/rejected": -2.0212368965148926, |
|
"logps/chosen": -328.28729248046875, |
|
"logps/rejected": -230.48863220214844, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0009829998016357, |
|
"rewards/margins": 4.275434494018555, |
|
"rewards/rejected": -2.274451732635498, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.4073394495412845, |
|
"grad_norm": 0.5146971344947815, |
|
"learning_rate": 5.636611281698956e-07, |
|
"logits/chosen": -0.7095816731452942, |
|
"logits/rejected": -1.83794367313385, |
|
"logps/chosen": -272.989990234375, |
|
"logps/rejected": -213.9049835205078, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5980645418167114, |
|
"rewards/margins": 3.967491388320923, |
|
"rewards/rejected": -2.369426727294922, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.4220183486238533, |
|
"grad_norm": 0.6070245504379272, |
|
"learning_rate": 5.368732954986389e-07, |
|
"logits/chosen": -0.8353590369224548, |
|
"logits/rejected": -1.9633159637451172, |
|
"logps/chosen": -291.64990234375, |
|
"logps/rejected": -226.0115966796875, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7673264741897583, |
|
"rewards/margins": 4.178317070007324, |
|
"rewards/rejected": -2.4109902381896973, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.436697247706422, |
|
"grad_norm": 0.411520391702652, |
|
"learning_rate": 5.106608568302504e-07, |
|
"logits/chosen": -0.8378889560699463, |
|
"logits/rejected": -1.9491535425186157, |
|
"logps/chosen": -269.3817443847656, |
|
"logps/rejected": -226.02801513671875, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.776644229888916, |
|
"rewards/margins": 4.346107006072998, |
|
"rewards/rejected": -2.569462537765503, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.451376146788991, |
|
"grad_norm": 0.538725733757019, |
|
"learning_rate": 4.850314950514124e-07, |
|
"logits/chosen": -0.5758827328681946, |
|
"logits/rejected": -1.8072640895843506, |
|
"logps/chosen": -293.7169189453125, |
|
"logps/rejected": -222.18191528320312, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8555824756622314, |
|
"rewards/margins": 4.301581382751465, |
|
"rewards/rejected": -2.4459989070892334, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.4660550458715598, |
|
"grad_norm": 0.6865962147712708, |
|
"learning_rate": 4.599927221485034e-07, |
|
"logits/chosen": -0.6990569233894348, |
|
"logits/rejected": -1.9968361854553223, |
|
"logps/chosen": -290.4656066894531, |
|
"logps/rejected": -200.7618408203125, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.737301230430603, |
|
"rewards/margins": 4.213505744934082, |
|
"rewards/rejected": -2.4762046337127686, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.4807339449541286, |
|
"grad_norm": 0.5077099204063416, |
|
"learning_rate": 4.3555187700583175e-07, |
|
"logits/chosen": -0.6568117141723633, |
|
"logits/rejected": -1.949430227279663, |
|
"logps/chosen": -277.690673828125, |
|
"logps/rejected": -220.354248046875, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8167202472686768, |
|
"rewards/margins": 4.532853126525879, |
|
"rewards/rejected": -2.716132879257202, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.4954128440366974, |
|
"grad_norm": 0.49507051706314087, |
|
"learning_rate": 4.1171612325460244e-07, |
|
"logits/chosen": -0.7259389162063599, |
|
"logits/rejected": -1.808924674987793, |
|
"logps/chosen": -290.60845947265625, |
|
"logps/rejected": -214.2810821533203, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6286503076553345, |
|
"rewards/margins": 4.091249942779541, |
|
"rewards/rejected": -2.462599515914917, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.510091743119266, |
|
"grad_norm": 0.348964124917984, |
|
"learning_rate": 3.8849244717325206e-07, |
|
"logits/chosen": -0.727351188659668, |
|
"logits/rejected": -1.7707502841949463, |
|
"logps/chosen": -281.7944030761719, |
|
"logps/rejected": -234.1016082763672, |
|
"loss": 0.0477, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9929590225219727, |
|
"rewards/margins": 4.754191875457764, |
|
"rewards/rejected": -2.761232852935791, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.524770642201835, |
|
"grad_norm": 0.5973061919212341, |
|
"learning_rate": 3.658876556397628e-07, |
|
"logits/chosen": -0.8893070816993713, |
|
"logits/rejected": -2.0552244186401367, |
|
"logps/chosen": -266.6897277832031, |
|
"logps/rejected": -199.23097229003906, |
|
"loss": 0.059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8175071477890015, |
|
"rewards/margins": 4.262746810913086, |
|
"rewards/rejected": -2.445240020751953, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.539449541284404, |
|
"grad_norm": 0.669189453125, |
|
"learning_rate": 3.4390837413656256e-07, |
|
"logits/chosen": -0.7612945437431335, |
|
"logits/rejected": -2.0197830200195312, |
|
"logps/chosen": -289.519775390625, |
|
"logps/rejected": -236.5748748779297, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8634060621261597, |
|
"rewards/margins": 4.5830864906311035, |
|
"rewards/rejected": -2.7196803092956543, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.5541284403669726, |
|
"grad_norm": 0.49376487731933594, |
|
"learning_rate": 3.225610448085903e-07, |
|
"logits/chosen": -0.703992486000061, |
|
"logits/rejected": -1.8440505266189575, |
|
"logps/chosen": -282.47967529296875, |
|
"logps/rejected": -213.64584350585938, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8914369344711304, |
|
"rewards/margins": 4.45714807510376, |
|
"rewards/rejected": -2.565711498260498, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 0.6668093204498291, |
|
"learning_rate": 3.018519245750989e-07, |
|
"logits/chosen": -0.775786817073822, |
|
"logits/rejected": -1.7931033372879028, |
|
"logps/chosen": -332.7348937988281, |
|
"logps/rejected": -254.2784423828125, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.7138545513153076, |
|
"rewards/margins": 4.272766590118408, |
|
"rewards/rejected": -2.5589118003845215, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.5834862385321102, |
|
"grad_norm": 0.46660616993904114, |
|
"learning_rate": 2.817870832957459e-07, |
|
"logits/chosen": -0.6354199051856995, |
|
"logits/rejected": -1.8320108652114868, |
|
"logps/chosen": -270.6486511230469, |
|
"logps/rejected": -209.35401916503906, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8665962219238281, |
|
"rewards/margins": 4.460667133331299, |
|
"rewards/rejected": -2.594071388244629, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.598165137614679, |
|
"grad_norm": 0.7453739047050476, |
|
"learning_rate": 2.6237240199151386e-07, |
|
"logits/chosen": -0.7968777418136597, |
|
"logits/rejected": -2.040590763092041, |
|
"logps/chosen": -278.96051025390625, |
|
"logps/rejected": -198.50344848632812, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8018391132354736, |
|
"rewards/margins": 3.8879919052124023, |
|
"rewards/rejected": -2.0861527919769287, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.612844036697248, |
|
"grad_norm": 0.6645973920822144, |
|
"learning_rate": 2.436135711209786e-07, |
|
"logits/chosen": -1.0428318977355957, |
|
"logits/rejected": -2.0885515213012695, |
|
"logps/chosen": -291.73846435546875, |
|
"logps/rejected": -194.1337890625, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6952629089355469, |
|
"rewards/margins": 4.137482166290283, |
|
"rewards/rejected": -2.4422197341918945, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.6275229357798167, |
|
"grad_norm": 0.6470810174942017, |
|
"learning_rate": 2.2551608891243026e-07, |
|
"logits/chosen": -1.004224419593811, |
|
"logits/rejected": -2.139845609664917, |
|
"logps/chosen": -366.6258544921875, |
|
"logps/rejected": -242.1497344970703, |
|
"loss": 0.053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7366106510162354, |
|
"rewards/margins": 3.9900641441345215, |
|
"rewards/rejected": -2.2534537315368652, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.6422018348623855, |
|
"grad_norm": 0.4664456248283386, |
|
"learning_rate": 2.0808525975233807e-07, |
|
"logits/chosen": -0.6308703422546387, |
|
"logits/rejected": -1.8344846963882446, |
|
"logps/chosen": -294.6444091796875, |
|
"logps/rejected": -229.31024169921875, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.5859092473983765, |
|
"rewards/margins": 4.006678104400635, |
|
"rewards/rejected": -2.4207687377929688, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.6568807339449543, |
|
"grad_norm": 0.8631575107574463, |
|
"learning_rate": 1.9132619263063144e-07, |
|
"logits/chosen": -0.6818079352378845, |
|
"logits/rejected": -1.9622324705123901, |
|
"logps/chosen": -360.9144287109375, |
|
"logps/rejected": -245.8763885498047, |
|
"loss": 0.0507, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0507702827453613, |
|
"rewards/margins": 4.704620838165283, |
|
"rewards/rejected": -2.653850555419922, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.671559633027523, |
|
"grad_norm": 0.2709774672985077, |
|
"learning_rate": 1.7524379964325155e-07, |
|
"logits/chosen": -0.7185477614402771, |
|
"logits/rejected": -1.9397680759429932, |
|
"logps/chosen": -340.63604736328125, |
|
"logps/rejected": -234.94650268554688, |
|
"loss": 0.038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.689025640487671, |
|
"rewards/margins": 4.374544620513916, |
|
"rewards/rejected": -2.685518741607666, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.686238532110092, |
|
"grad_norm": 0.533819854259491, |
|
"learning_rate": 1.5984279455240975e-07, |
|
"logits/chosen": -0.8093196153640747, |
|
"logits/rejected": -1.8389997482299805, |
|
"logps/chosen": -295.1790771484375, |
|
"logps/rejected": -222.28091430664062, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.990577220916748, |
|
"rewards/margins": 4.476598262786865, |
|
"rewards/rejected": -2.486021041870117, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.7009174311926607, |
|
"grad_norm": 0.49332335591316223, |
|
"learning_rate": 1.451276914049818e-07, |
|
"logits/chosen": -0.7148327827453613, |
|
"logits/rejected": -1.8303236961364746, |
|
"logps/chosen": -265.9716491699219, |
|
"logps/rejected": -208.4017791748047, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5792350769042969, |
|
"rewards/margins": 4.226253509521484, |
|
"rewards/rejected": -2.6470184326171875, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.7155963302752295, |
|
"grad_norm": 0.44270747900009155, |
|
"learning_rate": 1.3110280320943692e-07, |
|
"logits/chosen": -0.6963136792182922, |
|
"logits/rejected": -2.0225512981414795, |
|
"logps/chosen": -283.3634033203125, |
|
"logps/rejected": -202.95513916015625, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.947371244430542, |
|
"rewards/margins": 4.56584358215332, |
|
"rewards/rejected": -2.6184728145599365, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.7302752293577983, |
|
"grad_norm": 0.43144798278808594, |
|
"learning_rate": 1.1777224067169218e-07, |
|
"logits/chosen": -0.6372362375259399, |
|
"logits/rejected": -1.8398162126541138, |
|
"logps/chosen": -290.5965576171875, |
|
"logps/rejected": -221.44293212890625, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9076143503189087, |
|
"rewards/margins": 4.488263130187988, |
|
"rewards/rejected": -2.580648899078369, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.744954128440367, |
|
"grad_norm": 0.6024923920631409, |
|
"learning_rate": 1.0513991099025872e-07, |
|
"logits/chosen": -0.797070324420929, |
|
"logits/rejected": -1.9885629415512085, |
|
"logps/chosen": -334.8035888671875, |
|
"logps/rejected": -221.12759399414062, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.713030457496643, |
|
"rewards/margins": 3.9955036640167236, |
|
"rewards/rejected": -2.28247332572937, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.759633027522936, |
|
"grad_norm": 0.42591243982315063, |
|
"learning_rate": 9.320951671104194e-08, |
|
"logits/chosen": -0.6949442625045776, |
|
"logits/rejected": -1.9821323156356812, |
|
"logps/chosen": -326.1830749511719, |
|
"logps/rejected": -219.29837036132812, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3185112476348877, |
|
"rewards/margins": 4.512205600738525, |
|
"rewards/rejected": -2.1936943531036377, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.7743119266055047, |
|
"grad_norm": 0.5629270076751709, |
|
"learning_rate": 8.198455464212108e-08, |
|
"logits/chosen": -0.734917402267456, |
|
"logits/rejected": -1.9623744487762451, |
|
"logps/chosen": -304.7027282714844, |
|
"logps/rejected": -207.42129516601562, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.003225326538086, |
|
"rewards/margins": 4.722169876098633, |
|
"rewards/rejected": -2.7189443111419678, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.7889908256880735, |
|
"grad_norm": 0.45993342995643616, |
|
"learning_rate": 7.146831482883115e-08, |
|
"logits/chosen": -0.5041406750679016, |
|
"logits/rejected": -1.9728295803070068, |
|
"logps/chosen": -310.0588073730469, |
|
"logps/rejected": -202.70693969726562, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.020038604736328, |
|
"rewards/margins": 4.748332500457764, |
|
"rewards/rejected": -2.7282943725585938, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.8036697247706424, |
|
"grad_norm": 0.772245466709137, |
|
"learning_rate": 6.16638795894492e-08, |
|
"logits/chosen": -0.6536301374435425, |
|
"logits/rejected": -1.7665328979492188, |
|
"logps/chosen": -273.5377197265625, |
|
"logps/rejected": -230.2478485107422, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9565868377685547, |
|
"rewards/margins": 4.441068649291992, |
|
"rewards/rejected": -2.4844815731048584, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.818348623853211, |
|
"grad_norm": 0.7546908855438232, |
|
"learning_rate": 5.257412261176375e-08, |
|
"logits/chosen": -0.8912358283996582, |
|
"logits/rejected": -1.845367193222046, |
|
"logps/chosen": -286.1430969238281, |
|
"logps/rejected": -220.30615234375, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9721031188964844, |
|
"rewards/margins": 4.426693916320801, |
|
"rewards/rejected": -2.4545907974243164, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.83302752293578, |
|
"grad_norm": 0.8646131157875061, |
|
"learning_rate": 4.4201708110795384e-08, |
|
"logits/chosen": -0.7442179918289185, |
|
"logits/rejected": -1.8478055000305176, |
|
"logps/chosen": -304.1729431152344, |
|
"logps/rejected": -233.10855102539062, |
|
"loss": 0.0542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.854830265045166, |
|
"rewards/margins": 4.118017673492432, |
|
"rewards/rejected": -2.2631874084472656, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.847706422018349, |
|
"grad_norm": 0.5237764120101929, |
|
"learning_rate": 3.654909004791152e-08, |
|
"logits/chosen": -0.7583023309707642, |
|
"logits/rejected": -2.0417721271514893, |
|
"logps/chosen": -305.7694091796875, |
|
"logps/rejected": -214.91346740722656, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7464522123336792, |
|
"rewards/margins": 4.34686279296875, |
|
"rewards/rejected": -2.600410223007202, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.8623853211009176, |
|
"grad_norm": 0.4562954306602478, |
|
"learning_rate": 2.9618511411570462e-08, |
|
"logits/chosen": -0.8513392210006714, |
|
"logits/rejected": -1.93741774559021, |
|
"logps/chosen": -298.6360778808594, |
|
"logps/rejected": -200.97865295410156, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5848456621170044, |
|
"rewards/margins": 4.198357105255127, |
|
"rewards/rejected": -2.613511562347412, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.8770642201834864, |
|
"grad_norm": 0.8423421382904053, |
|
"learning_rate": 2.3412003559898088e-08, |
|
"logits/chosen": -0.701295018196106, |
|
"logits/rejected": -1.7541186809539795, |
|
"logps/chosen": -282.74078369140625, |
|
"logps/rejected": -235.9966278076172, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.771613359451294, |
|
"rewards/margins": 3.9545040130615234, |
|
"rewards/rejected": -2.1828906536102295, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.891743119266055, |
|
"grad_norm": 0.7763597369194031, |
|
"learning_rate": 1.793138562529634e-08, |
|
"logits/chosen": -0.818265438079834, |
|
"logits/rejected": -2.0317091941833496, |
|
"logps/chosen": -358.9674377441406, |
|
"logps/rejected": -211.57412719726562, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.096813917160034, |
|
"rewards/margins": 4.269334316253662, |
|
"rewards/rejected": -2.172520399093628, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.906422018348624, |
|
"grad_norm": 0.543138325214386, |
|
"learning_rate": 1.317826398125277e-08, |
|
"logits/chosen": -0.8907778263092041, |
|
"logits/rejected": -2.0098018646240234, |
|
"logps/chosen": -306.7756652832031, |
|
"logps/rejected": -236.91026306152344, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.1162776947021484, |
|
"rewards/margins": 4.927333831787109, |
|
"rewards/rejected": -2.81105637550354, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.921100917431193, |
|
"grad_norm": 0.40663444995880127, |
|
"learning_rate": 9.15403177151275e-09, |
|
"logits/chosen": -0.744702160358429, |
|
"logits/rejected": -1.7668923139572144, |
|
"logps/chosen": -288.4136962890625, |
|
"logps/rejected": -249.9727325439453, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.962416648864746, |
|
"rewards/margins": 4.565864562988281, |
|
"rewards/rejected": -2.603447914123535, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 0.4989350736141205, |
|
"learning_rate": 5.85986850174608e-09, |
|
"logits/chosen": -0.6515053510665894, |
|
"logits/rejected": -2.1018004417419434, |
|
"logps/chosen": -325.62371826171875, |
|
"logps/rejected": -215.43890380859375, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.057291030883789, |
|
"rewards/margins": 4.5394287109375, |
|
"rewards/rejected": -2.4821372032165527, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.9504587155963304, |
|
"grad_norm": 0.5903070569038391, |
|
"learning_rate": 3.296739693834927e-09, |
|
"logits/chosen": -0.936674952507019, |
|
"logits/rejected": -1.8789682388305664, |
|
"logps/chosen": -317.982666015625, |
|
"logps/rejected": -212.38453674316406, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4384199380874634, |
|
"rewards/margins": 3.8503003120422363, |
|
"rewards/rejected": -2.4118804931640625, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.9651376146788992, |
|
"grad_norm": 0.7236863374710083, |
|
"learning_rate": 1.4653966028774225e-09, |
|
"logits/chosen": -0.7320691347122192, |
|
"logits/rejected": -1.8207372426986694, |
|
"logps/chosen": -326.4574890136719, |
|
"logps/rejected": -244.78536987304688, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.709165096282959, |
|
"rewards/margins": 4.418180465698242, |
|
"rewards/rejected": -2.7090158462524414, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.979816513761468, |
|
"grad_norm": 0.37622901797294617, |
|
"learning_rate": 3.6637599699351766e-10, |
|
"logits/chosen": -0.6842759847640991, |
|
"logits/rejected": -2.033496856689453, |
|
"logps/chosen": -302.9255065917969, |
|
"logps/rejected": -209.06802368164062, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8500210046768188, |
|
"rewards/margins": 4.324655532836914, |
|
"rewards/rejected": -2.4746341705322266, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"grad_norm": 0.5513418316841125, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.8159844279289246, |
|
"logits/rejected": -1.814368724822998, |
|
"logps/chosen": -333.46990966796875, |
|
"logps/rejected": -244.63433837890625, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 1.8996886014938354, |
|
"rewards/margins": 4.328461170196533, |
|
"rewards/rejected": -2.4287726879119873, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"step": 408, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_loss": 0.2111055671474805, |
|
"train_runtime": 8097.1834, |
|
"train_samples_per_second": 1.614, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 408, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|