|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998828811243412, |
|
"eval_steps": 75, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.024985360140542652, |
|
"grad_norm": 14.177000429059124, |
|
"learning_rate": 4e-09, |
|
"logits/chosen": -0.7216415405273438, |
|
"logits/rejected": -0.7776755690574646, |
|
"logps/chosen": -180.14370727539062, |
|
"logps/rejected": -163.619384765625, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 2.089264392852783, |
|
"rewards/margins": 0.4413459897041321, |
|
"rewards/rejected": 1.647918462753296, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.049970720281085304, |
|
"grad_norm": 13.032161582993453, |
|
"learning_rate": 8e-09, |
|
"logits/chosen": -0.6800286769866943, |
|
"logits/rejected": -0.7293923497200012, |
|
"logps/chosen": -187.60638427734375, |
|
"logps/rejected": -170.1859893798828, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.66796875, |
|
"rewards/chosen": 2.104745388031006, |
|
"rewards/margins": 0.4643358588218689, |
|
"rewards/rejected": 1.6404094696044922, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07495608042162795, |
|
"grad_norm": 14.424559495239313, |
|
"learning_rate": 1.1999999999999998e-08, |
|
"logits/chosen": -0.7123140096664429, |
|
"logits/rejected": -0.7762022018432617, |
|
"logps/chosen": -186.28652954101562, |
|
"logps/rejected": -169.9141387939453, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.66015625, |
|
"rewards/chosen": 2.052624464035034, |
|
"rewards/margins": 0.40381550788879395, |
|
"rewards/rejected": 1.6488089561462402, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09994144056217061, |
|
"grad_norm": 15.25048206057953, |
|
"learning_rate": 1.6e-08, |
|
"logits/chosen": -0.6801178455352783, |
|
"logits/rejected": -0.7383629083633423, |
|
"logps/chosen": -184.20584106445312, |
|
"logps/rejected": -170.67449951171875, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 2.0641534328460693, |
|
"rewards/margins": 0.4149114787578583, |
|
"rewards/rejected": 1.6492421627044678, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12492680070271325, |
|
"grad_norm": 15.020860898871753, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": -0.6762746572494507, |
|
"logits/rejected": -0.7306088805198669, |
|
"logps/chosen": -181.46649169921875, |
|
"logps/rejected": -171.62355041503906, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 2.0547406673431396, |
|
"rewards/margins": 0.4107271730899811, |
|
"rewards/rejected": 1.6440132856369019, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1499121608432559, |
|
"grad_norm": 15.546401340611093, |
|
"learning_rate": 2.3999999999999997e-08, |
|
"logits/chosen": -0.6915724873542786, |
|
"logits/rejected": -0.7597174048423767, |
|
"logps/chosen": -191.33026123046875, |
|
"logps/rejected": -172.05740356445312, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.68359375, |
|
"rewards/chosen": 2.090841293334961, |
|
"rewards/margins": 0.4058065116405487, |
|
"rewards/rejected": 1.685034990310669, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17489752098379854, |
|
"grad_norm": 15.179087180823856, |
|
"learning_rate": 2.8000000000000003e-08, |
|
"logits/chosen": -0.648224413394928, |
|
"logits/rejected": -0.7036635875701904, |
|
"logps/chosen": -181.56240844726562, |
|
"logps/rejected": -161.77291870117188, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.69140625, |
|
"rewards/chosen": 2.07316255569458, |
|
"rewards/margins": 0.45191100239753723, |
|
"rewards/rejected": 1.6212515830993652, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19988288112434122, |
|
"grad_norm": 14.717039853262694, |
|
"learning_rate": 3.2e-08, |
|
"logits/chosen": -0.678563117980957, |
|
"logits/rejected": -0.7321793437004089, |
|
"logps/chosen": -191.10882568359375, |
|
"logps/rejected": -172.11471557617188, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.69140625, |
|
"rewards/chosen": 2.1359810829162598, |
|
"rewards/margins": 0.4702029824256897, |
|
"rewards/rejected": 1.6657780408859253, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22486824126488386, |
|
"grad_norm": 15.26388916085504, |
|
"learning_rate": 3.6e-08, |
|
"logits/chosen": -0.6794100999832153, |
|
"logits/rejected": -0.7467265129089355, |
|
"logps/chosen": -190.895751953125, |
|
"logps/rejected": -171.15126037597656, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.66796875, |
|
"rewards/chosen": 2.0863959789276123, |
|
"rewards/margins": 0.4278351664543152, |
|
"rewards/rejected": 1.658560872077942, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2498536014054265, |
|
"grad_norm": 14.719456857161541, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -0.6606219410896301, |
|
"logits/rejected": -0.7190724611282349, |
|
"logps/chosen": -179.43295288085938, |
|
"logps/rejected": -163.46678161621094, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.71484375, |
|
"rewards/chosen": 2.1400554180145264, |
|
"rewards/margins": 0.5210827589035034, |
|
"rewards/rejected": 1.6189727783203125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27483896154596915, |
|
"grad_norm": 14.799553727376024, |
|
"learning_rate": 4.4e-08, |
|
"logits/chosen": -0.6596983671188354, |
|
"logits/rejected": -0.7132915258407593, |
|
"logps/chosen": -186.89849853515625, |
|
"logps/rejected": -177.6392364501953, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.73828125, |
|
"rewards/chosen": 2.0855584144592285, |
|
"rewards/margins": 0.5197086334228516, |
|
"rewards/rejected": 1.5658495426177979, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2998243216865118, |
|
"grad_norm": 16.401751337438842, |
|
"learning_rate": 4.799999999999999e-08, |
|
"logits/chosen": -0.6935199499130249, |
|
"logits/rejected": -0.7622916102409363, |
|
"logps/chosen": -191.56312561035156, |
|
"logps/rejected": -166.0808563232422, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.72265625, |
|
"rewards/chosen": 2.1759369373321533, |
|
"rewards/margins": 0.584960401058197, |
|
"rewards/rejected": 1.590976357460022, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32480968182705444, |
|
"grad_norm": 14.092401453744207, |
|
"learning_rate": 5.2e-08, |
|
"logits/chosen": -0.6964302062988281, |
|
"logits/rejected": -0.7522369623184204, |
|
"logps/chosen": -183.28709411621094, |
|
"logps/rejected": -176.45947265625, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.71484375, |
|
"rewards/chosen": 2.1809558868408203, |
|
"rewards/margins": 0.4667380154132843, |
|
"rewards/rejected": 1.7142179012298584, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3497950419675971, |
|
"grad_norm": 14.992677802834425, |
|
"learning_rate": 5.6000000000000005e-08, |
|
"logits/chosen": -0.6727583408355713, |
|
"logits/rejected": -0.7299581170082092, |
|
"logps/chosen": -176.1138458251953, |
|
"logps/rejected": -165.51553344726562, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 2.0702695846557617, |
|
"rewards/margins": 0.42442983388900757, |
|
"rewards/rejected": 1.645839810371399, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3747804021081398, |
|
"grad_norm": 14.234258704647342, |
|
"learning_rate": 6e-08, |
|
"logits/chosen": -0.6875941753387451, |
|
"logits/rejected": -0.7378899455070496, |
|
"logps/chosen": -179.08218383789062, |
|
"logps/rejected": -170.3502197265625, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.67578125, |
|
"rewards/chosen": 2.12485408782959, |
|
"rewards/margins": 0.4190685749053955, |
|
"rewards/rejected": 1.7057857513427734, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39976576224868243, |
|
"grad_norm": 15.320369672206587, |
|
"learning_rate": 6.4e-08, |
|
"logits/chosen": -0.6457805633544922, |
|
"logits/rejected": -0.7087669372558594, |
|
"logps/chosen": -174.39279174804688, |
|
"logps/rejected": -161.18417358398438, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 2.1353416442871094, |
|
"rewards/margins": 0.5485972166061401, |
|
"rewards/rejected": 1.5867444276809692, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4247511223892251, |
|
"grad_norm": 14.996986688498861, |
|
"learning_rate": 6.8e-08, |
|
"logits/chosen": -0.677814781665802, |
|
"logits/rejected": -0.7355855703353882, |
|
"logps/chosen": -184.22764587402344, |
|
"logps/rejected": -164.98434448242188, |
|
"loss": 0.5726, |
|
"rewards/accuracies": 0.71484375, |
|
"rewards/chosen": 2.0874392986297607, |
|
"rewards/margins": 0.5497796535491943, |
|
"rewards/rejected": 1.5376596450805664, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4497364825297677, |
|
"grad_norm": 15.836133161520731, |
|
"learning_rate": 7.2e-08, |
|
"logits/chosen": -0.6465247273445129, |
|
"logits/rejected": -0.7009165287017822, |
|
"logps/chosen": -183.50096130371094, |
|
"logps/rejected": -172.29428100585938, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 2.1548638343811035, |
|
"rewards/margins": 0.48915886878967285, |
|
"rewards/rejected": 1.6657049655914307, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.47472184267031037, |
|
"grad_norm": 15.068791059044901, |
|
"learning_rate": 7.599999999999999e-08, |
|
"logits/chosen": -0.684742271900177, |
|
"logits/rejected": -0.7406108379364014, |
|
"logps/chosen": -178.16604614257812, |
|
"logps/rejected": -172.45472717285156, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.69140625, |
|
"rewards/chosen": 2.1049206256866455, |
|
"rewards/margins": 0.4164605736732483, |
|
"rewards/rejected": 1.6884599924087524, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.499707202810853, |
|
"grad_norm": 15.172663789942417, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -0.6802005767822266, |
|
"logits/rejected": -0.7308796048164368, |
|
"logps/chosen": -178.501708984375, |
|
"logps/rejected": -162.97750854492188, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 2.145007848739624, |
|
"rewards/margins": 0.5102132558822632, |
|
"rewards/rejected": 1.6347947120666504, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5246925629513957, |
|
"grad_norm": 13.193729893516823, |
|
"learning_rate": 8.4e-08, |
|
"logits/chosen": -0.67890864610672, |
|
"logits/rejected": -0.7359157204627991, |
|
"logps/chosen": -180.63043212890625, |
|
"logps/rejected": -177.0836181640625, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.70703125, |
|
"rewards/chosen": 2.1336517333984375, |
|
"rewards/margins": 0.4847910404205322, |
|
"rewards/rejected": 1.6488608121871948, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5496779230919383, |
|
"grad_norm": 14.543118187410414, |
|
"learning_rate": 8.8e-08, |
|
"logits/chosen": -0.6593753099441528, |
|
"logits/rejected": -0.7200923562049866, |
|
"logps/chosen": -179.75027465820312, |
|
"logps/rejected": -161.65733337402344, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.72265625, |
|
"rewards/chosen": 2.1679983139038086, |
|
"rewards/margins": 0.6123022437095642, |
|
"rewards/rejected": 1.5556960105895996, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.574663283232481, |
|
"grad_norm": 14.478724863209543, |
|
"learning_rate": 9.2e-08, |
|
"logits/chosen": -0.6733975410461426, |
|
"logits/rejected": -0.725917398929596, |
|
"logps/chosen": -179.19137573242188, |
|
"logps/rejected": -167.48928833007812, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 2.126537561416626, |
|
"rewards/margins": 0.5884015560150146, |
|
"rewards/rejected": 1.5381361246109009, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5996486433730236, |
|
"grad_norm": 13.598065354511457, |
|
"learning_rate": 9.599999999999999e-08, |
|
"logits/chosen": -0.6861451864242554, |
|
"logits/rejected": -0.7490273118019104, |
|
"logps/chosen": -192.40524291992188, |
|
"logps/rejected": -166.65826416015625, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.69140625, |
|
"rewards/chosen": 2.1456832885742188, |
|
"rewards/margins": 0.5288498997688293, |
|
"rewards/rejected": 1.6168336868286133, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6246340035135662, |
|
"grad_norm": 13.749586623653736, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -0.6737085580825806, |
|
"logits/rejected": -0.7165706753730774, |
|
"logps/chosen": -176.8297119140625, |
|
"logps/rejected": -168.13772583007812, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 2.0855467319488525, |
|
"rewards/margins": 0.5343782305717468, |
|
"rewards/rejected": 1.551168441772461, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6496193636541089, |
|
"grad_norm": 14.236522635027217, |
|
"learning_rate": 1.04e-07, |
|
"logits/chosen": -0.6797468662261963, |
|
"logits/rejected": -0.7432878613471985, |
|
"logps/chosen": -180.42208862304688, |
|
"logps/rejected": -165.42669677734375, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 2.1683268547058105, |
|
"rewards/margins": 0.678287148475647, |
|
"rewards/rejected": 1.4900394678115845, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6746047237946515, |
|
"grad_norm": 12.868471228668062, |
|
"learning_rate": 1.08e-07, |
|
"logits/chosen": -0.6745160818099976, |
|
"logits/rejected": -0.7254283428192139, |
|
"logps/chosen": -183.60704040527344, |
|
"logps/rejected": -170.13792419433594, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.73046875, |
|
"rewards/chosen": 2.0506410598754883, |
|
"rewards/margins": 0.5821288228034973, |
|
"rewards/rejected": 1.4685120582580566, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.6995900839351942, |
|
"grad_norm": 13.767404224251546, |
|
"learning_rate": 1.1200000000000001e-07, |
|
"logits/chosen": -0.6854877471923828, |
|
"logits/rejected": -0.746857225894928, |
|
"logps/chosen": -178.72006225585938, |
|
"logps/rejected": -162.08724975585938, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 2.124311923980713, |
|
"rewards/margins": 0.6121358871459961, |
|
"rewards/rejected": 1.5121760368347168, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7245754440757368, |
|
"grad_norm": 13.792453070210335, |
|
"learning_rate": 1.1599999999999999e-07, |
|
"logits/chosen": -0.6948191523551941, |
|
"logits/rejected": -0.7636308073997498, |
|
"logps/chosen": -195.91062927246094, |
|
"logps/rejected": -176.2474365234375, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.73046875, |
|
"rewards/chosen": 2.136486530303955, |
|
"rewards/margins": 0.6362313032150269, |
|
"rewards/rejected": 1.5002549886703491, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7495608042162796, |
|
"grad_norm": 12.863853198025703, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": -0.6513829231262207, |
|
"logits/rejected": -0.7188961505889893, |
|
"logps/chosen": -190.9204864501953, |
|
"logps/rejected": -170.77809143066406, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 2.1112589836120605, |
|
"rewards/margins": 0.6223936676979065, |
|
"rewards/rejected": 1.4888653755187988, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7745461643568222, |
|
"grad_norm": 13.104450063440881, |
|
"learning_rate": 1.24e-07, |
|
"logits/chosen": -0.6542866230010986, |
|
"logits/rejected": -0.7189180254936218, |
|
"logps/chosen": -183.43380737304688, |
|
"logps/rejected": -169.60279846191406, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": 2.178042411804199, |
|
"rewards/margins": 0.7206485867500305, |
|
"rewards/rejected": 1.4573938846588135, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.7995315244973649, |
|
"grad_norm": 14.223685904396252, |
|
"learning_rate": 1.28e-07, |
|
"logits/chosen": -0.6746785640716553, |
|
"logits/rejected": -0.7393426895141602, |
|
"logps/chosen": -187.83718872070312, |
|
"logps/rejected": -170.26303100585938, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 2.0394065380096436, |
|
"rewards/margins": 0.6107546091079712, |
|
"rewards/rejected": 1.4286518096923828, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8245168846379075, |
|
"grad_norm": 11.892937673420246, |
|
"learning_rate": 1.32e-07, |
|
"logits/chosen": -0.6900768280029297, |
|
"logits/rejected": -0.7491022944450378, |
|
"logps/chosen": -190.11602783203125, |
|
"logps/rejected": -177.3651123046875, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": 2.1415176391601562, |
|
"rewards/margins": 0.7801377773284912, |
|
"rewards/rejected": 1.3613799810409546, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.8495022447784502, |
|
"grad_norm": 12.90980511070953, |
|
"learning_rate": 1.36e-07, |
|
"logits/chosen": -0.671898603439331, |
|
"logits/rejected": -0.7221825122833252, |
|
"logps/chosen": -182.26194763183594, |
|
"logps/rejected": -171.02517700195312, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.9341095685958862, |
|
"rewards/margins": 0.7295835018157959, |
|
"rewards/rejected": 1.2045260667800903, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8744876049189928, |
|
"grad_norm": 11.941560260701717, |
|
"learning_rate": 1.3999999999999998e-07, |
|
"logits/chosen": -0.6553590297698975, |
|
"logits/rejected": -0.7277964353561401, |
|
"logps/chosen": -191.17935180664062, |
|
"logps/rejected": -182.75697326660156, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.76171875, |
|
"rewards/chosen": 1.9563246965408325, |
|
"rewards/margins": 0.735268771648407, |
|
"rewards/rejected": 1.2210559844970703, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8994729650595354, |
|
"grad_norm": 11.363918779296476, |
|
"learning_rate": 1.44e-07, |
|
"logits/chosen": -0.6677660346031189, |
|
"logits/rejected": -0.7358181476593018, |
|
"logps/chosen": -187.28823852539062, |
|
"logps/rejected": -175.23736572265625, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": 1.8920280933380127, |
|
"rewards/margins": 0.9304031729698181, |
|
"rewards/rejected": 0.9616249799728394, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.9244583252000781, |
|
"grad_norm": 11.665594088546383, |
|
"learning_rate": 1.48e-07, |
|
"logits/chosen": -0.6850963830947876, |
|
"logits/rejected": -0.750001072883606, |
|
"logps/chosen": -190.98031616210938, |
|
"logps/rejected": -173.23446655273438, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.79296875, |
|
"rewards/chosen": 1.8105218410491943, |
|
"rewards/margins": 0.8698927760124207, |
|
"rewards/rejected": 0.9406291246414185, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.9369510052703494, |
|
"eval_logits/chosen": -0.6242849230766296, |
|
"eval_logits/rejected": -0.7280451059341431, |
|
"eval_logps/chosen": -193.7286376953125, |
|
"eval_logps/rejected": -160.47738647460938, |
|
"eval_loss": 0.5220226645469666, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": 1.9179359674453735, |
|
"eval_rewards/margins": 1.0572994947433472, |
|
"eval_rewards/rejected": 0.8606364727020264, |
|
"eval_runtime": 29.5424, |
|
"eval_samples_per_second": 3.385, |
|
"eval_steps_per_second": 0.846, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9494436853406207, |
|
"grad_norm": 11.894064434017581, |
|
"learning_rate": 1.5199999999999998e-07, |
|
"logits/chosen": -0.6667495965957642, |
|
"logits/rejected": -0.7195772528648376, |
|
"logps/chosen": -188.1094512939453, |
|
"logps/rejected": -173.92083740234375, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.8090986013412476, |
|
"rewards/margins": 0.9019326567649841, |
|
"rewards/rejected": 0.9071658849716187, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.9744290454811634, |
|
"grad_norm": 12.785309864943024, |
|
"learning_rate": 1.56e-07, |
|
"logits/chosen": -0.6789891123771667, |
|
"logits/rejected": -0.7438546419143677, |
|
"logps/chosen": -190.32470703125, |
|
"logps/rejected": -169.19207763671875, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.80078125, |
|
"rewards/chosen": 1.736232042312622, |
|
"rewards/margins": 0.9202592372894287, |
|
"rewards/rejected": 0.8159728050231934, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.999414405621706, |
|
"grad_norm": 12.07847451966405, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -0.687256395816803, |
|
"logits/rejected": -0.7596179246902466, |
|
"logps/chosen": -195.14768981933594, |
|
"logps/rejected": -174.72589111328125, |
|
"loss": 0.4471, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": 1.6982433795928955, |
|
"rewards/margins": 0.9969222545623779, |
|
"rewards/rejected": 0.7013211250305176, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0243997657622488, |
|
"grad_norm": 12.209730650020617, |
|
"learning_rate": 1.6399999999999999e-07, |
|
"logits/chosen": -0.6745339632034302, |
|
"logits/rejected": -0.7286314368247986, |
|
"logps/chosen": -184.5238494873047, |
|
"logps/rejected": -180.26815795898438, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.78515625, |
|
"rewards/chosen": 1.6792489290237427, |
|
"rewards/margins": 0.8308749198913574, |
|
"rewards/rejected": 0.8483741879463196, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.0493851259027913, |
|
"grad_norm": 11.63839350311622, |
|
"learning_rate": 1.68e-07, |
|
"logits/chosen": -0.6981229186058044, |
|
"logits/rejected": -0.7625120878219604, |
|
"logps/chosen": -191.15847778320312, |
|
"logps/rejected": -189.08364868164062, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.80078125, |
|
"rewards/chosen": 1.7394218444824219, |
|
"rewards/margins": 1.0957342386245728, |
|
"rewards/rejected": 0.6436874866485596, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.074370486043334, |
|
"grad_norm": 12.047547514582906, |
|
"learning_rate": 1.7199999999999998e-07, |
|
"logits/chosen": -0.6573597192764282, |
|
"logits/rejected": -0.7058761715888977, |
|
"logps/chosen": -189.32237243652344, |
|
"logps/rejected": -182.69403076171875, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": 1.724177598953247, |
|
"rewards/margins": 1.020638108253479, |
|
"rewards/rejected": 0.7035394906997681, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.0993558461838766, |
|
"grad_norm": 12.65465734374361, |
|
"learning_rate": 1.76e-07, |
|
"logits/chosen": -0.67812180519104, |
|
"logits/rejected": -0.7225789427757263, |
|
"logps/chosen": -185.65040588378906, |
|
"logps/rejected": -187.9286651611328, |
|
"loss": 0.437, |
|
"rewards/accuracies": 0.81640625, |
|
"rewards/chosen": 1.5955438613891602, |
|
"rewards/margins": 0.9583697319030762, |
|
"rewards/rejected": 0.6371738910675049, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.1243412063244194, |
|
"grad_norm": 10.575082434339704, |
|
"learning_rate": 1.8e-07, |
|
"logits/chosen": -0.6781046986579895, |
|
"logits/rejected": -0.7251250743865967, |
|
"logps/chosen": -189.33551025390625, |
|
"logps/rejected": -188.9590606689453, |
|
"loss": 0.4156, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": 1.5732731819152832, |
|
"rewards/margins": 1.1116917133331299, |
|
"rewards/rejected": 0.4615815281867981, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.149326566464962, |
|
"grad_norm": 9.55392077902883, |
|
"learning_rate": 1.84e-07, |
|
"logits/chosen": -0.6788798570632935, |
|
"logits/rejected": -0.7428586483001709, |
|
"logps/chosen": -198.3631591796875, |
|
"logps/rejected": -182.88487243652344, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.82421875, |
|
"rewards/chosen": 1.3846931457519531, |
|
"rewards/margins": 1.0993235111236572, |
|
"rewards/rejected": 0.2853696346282959, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 9.670365765437687, |
|
"learning_rate": 1.88e-07, |
|
"logits/chosen": -0.7081943154335022, |
|
"logits/rejected": -0.775234580039978, |
|
"logps/chosen": -189.57760620117188, |
|
"logps/rejected": -181.6404571533203, |
|
"loss": 0.3911, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": 1.2745850086212158, |
|
"rewards/margins": 1.3070428371429443, |
|
"rewards/rejected": -0.032457947731018066, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.1992972867460472, |
|
"grad_norm": 10.78932105106093, |
|
"learning_rate": 1.9199999999999997e-07, |
|
"logits/chosen": -0.6830898523330688, |
|
"logits/rejected": -0.734713613986969, |
|
"logps/chosen": -191.22511291503906, |
|
"logps/rejected": -186.43077087402344, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.82421875, |
|
"rewards/chosen": 1.0674785375595093, |
|
"rewards/margins": 1.171497106552124, |
|
"rewards/rejected": -0.10401848703622818, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.22428264688659, |
|
"grad_norm": 9.097857033695211, |
|
"learning_rate": 1.9599999999999998e-07, |
|
"logits/chosen": -0.7023120522499084, |
|
"logits/rejected": -0.7581274509429932, |
|
"logps/chosen": -195.01312255859375, |
|
"logps/rejected": -188.2948455810547, |
|
"loss": 0.3536, |
|
"rewards/accuracies": 0.85546875, |
|
"rewards/chosen": 1.0987714529037476, |
|
"rewards/margins": 1.5480579137802124, |
|
"rewards/rejected": -0.4492865800857544, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.2492680070271325, |
|
"grad_norm": 12.363599073474044, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.6939373016357422, |
|
"logits/rejected": -0.7522105574607849, |
|
"logps/chosen": -193.21104431152344, |
|
"logps/rejected": -190.04568481445312, |
|
"loss": 0.3649, |
|
"rewards/accuracies": 0.85546875, |
|
"rewards/chosen": 0.7619870901107788, |
|
"rewards/margins": 1.3099664449691772, |
|
"rewards/rejected": -0.547979474067688, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2742533671676752, |
|
"grad_norm": 9.956621854528622, |
|
"learning_rate": 1.9945218953682733e-07, |
|
"logits/chosen": -0.7141095399856567, |
|
"logits/rejected": -0.772229015827179, |
|
"logps/chosen": -206.34132385253906, |
|
"logps/rejected": -199.00970458984375, |
|
"loss": 0.3505, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.8092713952064514, |
|
"rewards/margins": 1.6670289039611816, |
|
"rewards/rejected": -0.8577573299407959, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.2992387273082178, |
|
"grad_norm": 9.301525503546692, |
|
"learning_rate": 1.9781476007338056e-07, |
|
"logits/chosen": -0.7332565188407898, |
|
"logits/rejected": -0.7983365058898926, |
|
"logps/chosen": -201.5232696533203, |
|
"logps/rejected": -191.0933380126953, |
|
"loss": 0.3195, |
|
"rewards/accuracies": 0.89453125, |
|
"rewards/chosen": 0.5621832609176636, |
|
"rewards/margins": 1.6282891035079956, |
|
"rewards/rejected": -1.0661057233810425, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.3242240874487605, |
|
"grad_norm": 9.869807635078832, |
|
"learning_rate": 1.9510565162951537e-07, |
|
"logits/chosen": -0.7436533570289612, |
|
"logits/rejected": -0.8179137706756592, |
|
"logps/chosen": -198.8864288330078, |
|
"logps/rejected": -192.3636016845703, |
|
"loss": 0.3368, |
|
"rewards/accuracies": 0.87109375, |
|
"rewards/chosen": 0.5397917628288269, |
|
"rewards/margins": 1.7668784856796265, |
|
"rewards/rejected": -1.2270865440368652, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.349209447589303, |
|
"grad_norm": 10.035654401238393, |
|
"learning_rate": 1.9135454576426007e-07, |
|
"logits/chosen": -0.6918727159500122, |
|
"logits/rejected": -0.7655491828918457, |
|
"logps/chosen": -203.7888946533203, |
|
"logps/rejected": -215.9702606201172, |
|
"loss": 0.3397, |
|
"rewards/accuracies": 0.90234375, |
|
"rewards/chosen": 0.5103797912597656, |
|
"rewards/margins": 1.8079906702041626, |
|
"rewards/rejected": -1.297610878944397, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.3741948077298458, |
|
"grad_norm": 9.358482838142306, |
|
"learning_rate": 1.8660254037844388e-07, |
|
"logits/chosen": -0.7152352929115295, |
|
"logits/rejected": -0.7777791023254395, |
|
"logps/chosen": -207.73023986816406, |
|
"logps/rejected": -194.01126098632812, |
|
"loss": 0.3365, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4271202087402344, |
|
"rewards/margins": 1.687686562538147, |
|
"rewards/rejected": -1.2605663537979126, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3991801678703886, |
|
"grad_norm": 9.789127179150574, |
|
"learning_rate": 1.8090169943749475e-07, |
|
"logits/chosen": -0.7317672371864319, |
|
"logits/rejected": -0.7882843017578125, |
|
"logps/chosen": -203.46849060058594, |
|
"logps/rejected": -208.32135009765625, |
|
"loss": 0.3157, |
|
"rewards/accuracies": 0.87890625, |
|
"rewards/chosen": 0.33698615431785583, |
|
"rewards/margins": 1.7098716497421265, |
|
"rewards/rejected": -1.3728857040405273, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.424165528010931, |
|
"grad_norm": 9.275207179944992, |
|
"learning_rate": 1.7431448254773942e-07, |
|
"logits/chosen": -0.7219483852386475, |
|
"logits/rejected": -0.7694462537765503, |
|
"logps/chosen": -199.74270629882812, |
|
"logps/rejected": -204.81101989746094, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": 0.40492168068885803, |
|
"rewards/margins": 1.9214580059051514, |
|
"rewards/rejected": -1.5165363550186157, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.4491508881514736, |
|
"grad_norm": 9.183521827422608, |
|
"learning_rate": 1.669130606358858e-07, |
|
"logits/chosen": -0.7337281107902527, |
|
"logits/rejected": -0.7940360307693481, |
|
"logps/chosen": -198.19046020507812, |
|
"logps/rejected": -200.4697265625, |
|
"loss": 0.3176, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": 0.3844246566295624, |
|
"rewards/margins": 1.8816416263580322, |
|
"rewards/rejected": -1.4972169399261475, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.4741362482920164, |
|
"grad_norm": 9.337739314896169, |
|
"learning_rate": 1.5877852522924732e-07, |
|
"logits/chosen": -0.7224444150924683, |
|
"logits/rejected": -0.7723821401596069, |
|
"logps/chosen": -195.0646209716797, |
|
"logps/rejected": -201.89569091796875, |
|
"loss": 0.2752, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.3896394371986389, |
|
"rewards/margins": 1.9735894203186035, |
|
"rewards/rejected": -1.5839500427246094, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.4991216084325591, |
|
"grad_norm": 8.793369516249312, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -0.7569531202316284, |
|
"logits/rejected": -0.8058477640151978, |
|
"logps/chosen": -208.27215576171875, |
|
"logps/rejected": -208.09347534179688, |
|
"loss": 0.317, |
|
"rewards/accuracies": 0.87890625, |
|
"rewards/chosen": 0.2652769982814789, |
|
"rewards/margins": 1.866006851196289, |
|
"rewards/rejected": -1.6007298231124878, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5241069685731017, |
|
"grad_norm": 9.48150415114474, |
|
"learning_rate": 1.4067366430758004e-07, |
|
"logits/chosen": -0.7591882348060608, |
|
"logits/rejected": -0.8140251636505127, |
|
"logps/chosen": -205.2285614013672, |
|
"logps/rejected": -203.8860321044922, |
|
"loss": 0.2965, |
|
"rewards/accuracies": 0.86328125, |
|
"rewards/chosen": 0.22455668449401855, |
|
"rewards/margins": 1.875580072402954, |
|
"rewards/rejected": -1.6510233879089355, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.5490923287136442, |
|
"grad_norm": 9.98138144476122, |
|
"learning_rate": 1.3090169943749475e-07, |
|
"logits/chosen": -0.7185292840003967, |
|
"logits/rejected": -0.7869015336036682, |
|
"logps/chosen": -207.1554718017578, |
|
"logps/rejected": -239.03298950195312, |
|
"loss": 0.301, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": 0.21323075890541077, |
|
"rewards/margins": 1.779089093208313, |
|
"rewards/rejected": -1.565858244895935, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.574077688854187, |
|
"grad_norm": 7.9906817459937995, |
|
"learning_rate": 1.207911690817759e-07, |
|
"logits/chosen": -0.7233790159225464, |
|
"logits/rejected": -0.7781089544296265, |
|
"logps/chosen": -194.47396850585938, |
|
"logps/rejected": -199.163330078125, |
|
"loss": 0.3084, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": 0.403320848941803, |
|
"rewards/margins": 1.8897595405578613, |
|
"rewards/rejected": -1.486438512802124, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.5990630489947297, |
|
"grad_norm": 9.241594419692872, |
|
"learning_rate": 1.1045284632676535e-07, |
|
"logits/chosen": -0.7427763342857361, |
|
"logits/rejected": -0.811578094959259, |
|
"logps/chosen": -211.1174774169922, |
|
"logps/rejected": -199.56015014648438, |
|
"loss": 0.2962, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": 0.37835511565208435, |
|
"rewards/margins": 2.0853826999664307, |
|
"rewards/rejected": -1.7070273160934448, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.6240484091352723, |
|
"grad_norm": 9.666544829037878, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -0.7333863973617554, |
|
"logits/rejected": -0.7908891439437866, |
|
"logps/chosen": -201.62767028808594, |
|
"logps/rejected": -204.39947509765625, |
|
"loss": 0.3047, |
|
"rewards/accuracies": 0.91015625, |
|
"rewards/chosen": 0.5189218521118164, |
|
"rewards/margins": 2.134669542312622, |
|
"rewards/rejected": -1.6157476902008057, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.6490337692758148, |
|
"grad_norm": 8.419662280451101, |
|
"learning_rate": 8.954715367323466e-08, |
|
"logits/chosen": -0.7702259421348572, |
|
"logits/rejected": -0.8284745812416077, |
|
"logps/chosen": -199.48992919921875, |
|
"logps/rejected": -219.00027465820312, |
|
"loss": 0.2776, |
|
"rewards/accuracies": 0.86328125, |
|
"rewards/chosen": 0.36517998576164246, |
|
"rewards/margins": 2.0277538299560547, |
|
"rewards/rejected": -1.6625735759735107, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.6740191294163576, |
|
"grad_norm": 9.261225693287605, |
|
"learning_rate": 7.920883091822408e-08, |
|
"logits/chosen": -0.7342085242271423, |
|
"logits/rejected": -0.7807326912879944, |
|
"logps/chosen": -198.77467346191406, |
|
"logps/rejected": -204.98635864257812, |
|
"loss": 0.3185, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": 0.39617919921875, |
|
"rewards/margins": 1.929458737373352, |
|
"rewards/rejected": -1.533279538154602, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.6990044895569003, |
|
"grad_norm": 9.299697052167406, |
|
"learning_rate": 6.909830056250527e-08, |
|
"logits/chosen": -0.7558231949806213, |
|
"logits/rejected": -0.8186966776847839, |
|
"logps/chosen": -200.72955322265625, |
|
"logps/rejected": -197.19003295898438, |
|
"loss": 0.2807, |
|
"rewards/accuracies": 0.88671875, |
|
"rewards/chosen": 0.42143842577934265, |
|
"rewards/margins": 2.1845474243164062, |
|
"rewards/rejected": -1.7631090879440308, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.723989849697443, |
|
"grad_norm": 8.587815614728, |
|
"learning_rate": 5.9326335692419996e-08, |
|
"logits/chosen": -0.755694568157196, |
|
"logits/rejected": -0.8112677335739136, |
|
"logps/chosen": -198.68673706054688, |
|
"logps/rejected": -197.39120483398438, |
|
"loss": 0.2802, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": 0.23039419949054718, |
|
"rewards/margins": 1.9206252098083496, |
|
"rewards/rejected": -1.6902309656143188, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.7489752098379856, |
|
"grad_norm": 8.680976094750916, |
|
"learning_rate": 5.000000000000002e-08, |
|
"logits/chosen": -0.7553902864456177, |
|
"logits/rejected": -0.8158895373344421, |
|
"logps/chosen": -199.0127716064453, |
|
"logps/rejected": -202.2300262451172, |
|
"loss": 0.2768, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.5232993960380554, |
|
"rewards/margins": 2.2032899856567383, |
|
"rewards/rejected": -1.6799907684326172, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7739605699785281, |
|
"grad_norm": 9.234907906222318, |
|
"learning_rate": 4.1221474770752695e-08, |
|
"logits/chosen": -0.7363643646240234, |
|
"logits/rejected": -0.79323410987854, |
|
"logps/chosen": -203.90921020507812, |
|
"logps/rejected": -207.4276123046875, |
|
"loss": 0.2921, |
|
"rewards/accuracies": 0.85546875, |
|
"rewards/chosen": 0.3827190697193146, |
|
"rewards/margins": 2.0326881408691406, |
|
"rewards/rejected": -1.6499687433242798, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.798945930119071, |
|
"grad_norm": 8.988965068155167, |
|
"learning_rate": 3.3086939364114206e-08, |
|
"logits/chosen": -0.7579203844070435, |
|
"logits/rejected": -0.8293938636779785, |
|
"logps/chosen": -201.67063903808594, |
|
"logps/rejected": -223.98065185546875, |
|
"loss": 0.2825, |
|
"rewards/accuracies": 0.90234375, |
|
"rewards/chosen": 0.3738960325717926, |
|
"rewards/margins": 2.088986873626709, |
|
"rewards/rejected": -1.7150908708572388, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.8239312902596136, |
|
"grad_norm": 8.4124330379094, |
|
"learning_rate": 2.5685517452260564e-08, |
|
"logits/chosen": -0.7071250081062317, |
|
"logits/rejected": -0.7688826322555542, |
|
"logps/chosen": -203.57652282714844, |
|
"logps/rejected": -203.83291625976562, |
|
"loss": 0.282, |
|
"rewards/accuracies": 0.90234375, |
|
"rewards/chosen": 0.2634541988372803, |
|
"rewards/margins": 2.0646886825561523, |
|
"rewards/rejected": -1.801234245300293, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.8489166504001562, |
|
"grad_norm": 8.280715517124031, |
|
"learning_rate": 1.9098300562505266e-08, |
|
"logits/chosen": -0.755478024482727, |
|
"logits/rejected": -0.8133871555328369, |
|
"logps/chosen": -202.27098083496094, |
|
"logps/rejected": -195.1833038330078, |
|
"loss": 0.2677, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.45085206627845764, |
|
"rewards/margins": 2.2143564224243164, |
|
"rewards/rejected": -1.7635046243667603, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.8739020105406987, |
|
"grad_norm": 9.365073690139782, |
|
"learning_rate": 1.3397459621556128e-08, |
|
"logits/chosen": -0.7708315849304199, |
|
"logits/rejected": -0.8214279413223267, |
|
"logps/chosen": -198.73464965820312, |
|
"logps/rejected": -201.75244140625, |
|
"loss": 0.2866, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": 0.23038014769554138, |
|
"rewards/margins": 1.8956291675567627, |
|
"rewards/rejected": -1.6652488708496094, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8739020105406987, |
|
"eval_logits/chosen": -0.6863436102867126, |
|
"eval_logits/rejected": -0.7882587909698486, |
|
"eval_logps/chosen": -206.22607421875, |
|
"eval_logps/rejected": -185.4351806640625, |
|
"eval_loss": 0.28332585096359253, |
|
"eval_rewards/accuracies": 0.9200000166893005, |
|
"eval_rewards/chosen": 0.6681913137435913, |
|
"eval_rewards/margins": 2.303332567214966, |
|
"eval_rewards/rejected": -1.6351412534713745, |
|
"eval_runtime": 30.6661, |
|
"eval_samples_per_second": 3.261, |
|
"eval_steps_per_second": 0.815, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8988873706812415, |
|
"grad_norm": 8.503745608023737, |
|
"learning_rate": 8.645454235739902e-09, |
|
"logits/chosen": -0.7426515817642212, |
|
"logits/rejected": -0.8051266670227051, |
|
"logps/chosen": -195.47421264648438, |
|
"logps/rejected": -199.013916015625, |
|
"loss": 0.2643, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": 0.39815255999565125, |
|
"rewards/margins": 2.137446403503418, |
|
"rewards/rejected": -1.7392936944961548, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.9238727308217842, |
|
"grad_norm": 8.367825814127663, |
|
"learning_rate": 4.8943483704846465e-09, |
|
"logits/chosen": -0.7322957515716553, |
|
"logits/rejected": -0.7974464893341064, |
|
"logps/chosen": -193.97613525390625, |
|
"logps/rejected": -191.2456817626953, |
|
"loss": 0.2622, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.45596182346343994, |
|
"rewards/margins": 2.185451030731201, |
|
"rewards/rejected": -1.7294889688491821, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.9488580909623268, |
|
"grad_norm": 7.920183430972945, |
|
"learning_rate": 2.1852399266194312e-09, |
|
"logits/chosen": -0.7559969425201416, |
|
"logits/rejected": -0.8131712079048157, |
|
"logps/chosen": -203.8223876953125, |
|
"logps/rejected": -202.947509765625, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.91015625, |
|
"rewards/chosen": 0.32228347659111023, |
|
"rewards/margins": 2.08817458152771, |
|
"rewards/rejected": -1.7658910751342773, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.9738434511028693, |
|
"grad_norm": 8.06856390519066, |
|
"learning_rate": 5.47810463172671e-10, |
|
"logits/chosen": -0.7470804452896118, |
|
"logits/rejected": -0.8129448294639587, |
|
"logps/chosen": -210.6734619140625, |
|
"logps/rejected": -196.4785919189453, |
|
"loss": 0.2755, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.42607438564300537, |
|
"rewards/margins": 2.0202014446258545, |
|
"rewards/rejected": -1.5941270589828491, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.998828811243412, |
|
"grad_norm": 9.773194489937959, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.80845707654953, |
|
"logits/rejected": -0.8671077489852905, |
|
"logps/chosen": -203.4849853515625, |
|
"logps/rejected": -204.71002197265625, |
|
"loss": 0.2941, |
|
"rewards/accuracies": 0.91796875, |
|
"rewards/chosen": 0.35994353890419006, |
|
"rewards/margins": 2.0885844230651855, |
|
"rewards/rejected": -1.7286407947540283, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|