|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1807991321641656, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0036159826432833123, |
|
"grad_norm": 0.3235728144645691, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.043861389160156, |
|
"logits/rejected": -18.043861389160156, |
|
"logps/chosen": -0.6726381778717041, |
|
"logps/rejected": -0.6726381778717041, |
|
"loss": 6.9675, |
|
"nll_loss": 1.6725690364837646, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06726382672786713, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06726382672786713, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0072319652865666245, |
|
"grad_norm": 0.3509086072444916, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.013948440551758, |
|
"logits/rejected": -18.013948440551758, |
|
"logps/chosen": -0.6865767240524292, |
|
"logps/rejected": -0.6865767240524292, |
|
"loss": 6.7227, |
|
"nll_loss": 1.6113628149032593, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06865767389535904, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06865767389535904, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010847947929849937, |
|
"grad_norm": 0.38406211137771606, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.367061614990234, |
|
"logits/rejected": -18.367061614990234, |
|
"logps/chosen": -0.6837120056152344, |
|
"logps/rejected": -0.6837120056152344, |
|
"loss": 6.3986, |
|
"nll_loss": 1.5303384065628052, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06837119162082672, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06837119162082672, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014463930573133249, |
|
"grad_norm": 0.4101921319961548, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -17.99128532409668, |
|
"logits/rejected": -17.99128532409668, |
|
"logps/chosen": -0.6463029980659485, |
|
"logps/rejected": -0.6463029980659485, |
|
"loss": 7.0291, |
|
"nll_loss": 1.6879545450210571, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06463029980659485, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06463029980659485, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01807991321641656, |
|
"grad_norm": 0.444273978471756, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.08310890197754, |
|
"logits/rejected": -18.08310890197754, |
|
"logps/chosen": -0.6336182355880737, |
|
"logps/rejected": -0.6336182355880737, |
|
"loss": 6.5453, |
|
"nll_loss": 1.5670195817947388, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06336182355880737, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06336182355880737, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021695895859699874, |
|
"grad_norm": 0.67641681432724, |
|
"learning_rate": 6e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.269500732421875, |
|
"logits/rejected": -18.269500732421875, |
|
"logps/chosen": -0.6496673822402954, |
|
"logps/rejected": -0.6496673822402954, |
|
"loss": 6.733, |
|
"nll_loss": 1.6139262914657593, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06496674567461014, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06496674567461014, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025311878502983186, |
|
"grad_norm": 0.39636465907096863, |
|
"learning_rate": 7e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.115657806396484, |
|
"logits/rejected": -18.115657806396484, |
|
"logps/chosen": -0.6373459696769714, |
|
"logps/rejected": -0.6373459696769714, |
|
"loss": 6.8825, |
|
"nll_loss": 1.651307463645935, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06373460590839386, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06373460590839386, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028927861146266498, |
|
"grad_norm": 0.5290302634239197, |
|
"learning_rate": 8.000000000000001e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.138675689697266, |
|
"logits/rejected": -18.138675689697266, |
|
"logps/chosen": -0.6702518463134766, |
|
"logps/rejected": -0.6702518463134766, |
|
"loss": 6.4063, |
|
"nll_loss": 1.5322693586349487, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06702519208192825, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06702519208192825, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03254384378954981, |
|
"grad_norm": 0.2837754189968109, |
|
"learning_rate": 9e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.285091400146484, |
|
"logits/rejected": -18.285091400146484, |
|
"logps/chosen": -0.5695949196815491, |
|
"logps/rejected": -0.5695949196815491, |
|
"loss": 6.3599, |
|
"nll_loss": 1.520659327507019, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.056959498673677444, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.056959498673677444, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03615982643283312, |
|
"grad_norm": 0.38816723227500916, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.38321304321289, |
|
"logits/rejected": -18.38321304321289, |
|
"logps/chosen": -0.6220490336418152, |
|
"logps/rejected": -0.6220490336418152, |
|
"loss": 6.3579, |
|
"nll_loss": 1.5201700925827026, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.062204908579587936, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.062204908579587936, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.039775809076116435, |
|
"grad_norm": 0.485514760017395, |
|
"learning_rate": 9.88888888888889e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.24630355834961, |
|
"logits/rejected": -18.24630355834961, |
|
"logps/chosen": -0.58094322681427, |
|
"logps/rejected": -0.58094322681427, |
|
"loss": 6.8083, |
|
"nll_loss": 1.6327617168426514, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.058094322681427, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.058094322681427, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04339179171939975, |
|
"grad_norm": 0.7200180292129517, |
|
"learning_rate": 9.777777777777779e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.222978591918945, |
|
"logits/rejected": -18.222978591918945, |
|
"logps/chosen": -0.6140703558921814, |
|
"logps/rejected": -0.6140703558921814, |
|
"loss": 7.0165, |
|
"nll_loss": 1.6848167181015015, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06140704080462456, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06140704080462456, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04700777436268306, |
|
"grad_norm": 0.5130624175071716, |
|
"learning_rate": 9.666666666666667e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -17.98419189453125, |
|
"logits/rejected": -17.98419189453125, |
|
"logps/chosen": -0.6171376705169678, |
|
"logps/rejected": -0.6171376705169678, |
|
"loss": 6.9743, |
|
"nll_loss": 1.6742585897445679, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.06171376630663872, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.06171376630663872, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05062375700596637, |
|
"grad_norm": 0.6743359565734863, |
|
"learning_rate": 9.555555555555556e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.43941879272461, |
|
"logits/rejected": -18.43941879272461, |
|
"logps/chosen": -0.5736243724822998, |
|
"logps/rejected": -0.5736243724822998, |
|
"loss": 6.4179, |
|
"nll_loss": 1.5351518392562866, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.057362429797649384, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.057362429797649384, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.054239739649249684, |
|
"grad_norm": 0.7007283568382263, |
|
"learning_rate": 9.444444444444445e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.523662567138672, |
|
"logits/rejected": -18.523662567138672, |
|
"logps/chosen": -0.5047087073326111, |
|
"logps/rejected": -0.5047087073326111, |
|
"loss": 6.4734, |
|
"nll_loss": 1.549041986465454, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.050470877438783646, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.050470877438783646, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.057855722292532996, |
|
"grad_norm": 0.8055678009986877, |
|
"learning_rate": 9.333333333333334e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.57369041442871, |
|
"logits/rejected": -18.57369041442871, |
|
"logps/chosen": -0.48697715997695923, |
|
"logps/rejected": -0.48697715997695923, |
|
"loss": 6.1125, |
|
"nll_loss": 1.4588209390640259, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04869771748781204, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04869771748781204, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06147170493581631, |
|
"grad_norm": 0.5678962469100952, |
|
"learning_rate": 9.222222222222224e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.546266555786133, |
|
"logits/rejected": -18.546266555786133, |
|
"logps/chosen": -0.4713471531867981, |
|
"logps/rejected": -0.4713471531867981, |
|
"loss": 6.3675, |
|
"nll_loss": 1.5225670337677002, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04713470861315727, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04713470861315727, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06508768757909962, |
|
"grad_norm": 0.9227916598320007, |
|
"learning_rate": 9.111111111111112e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.552295684814453, |
|
"logits/rejected": -18.552295684814453, |
|
"logps/chosen": -0.5135122537612915, |
|
"logps/rejected": -0.5135122537612915, |
|
"loss": 6.3925, |
|
"nll_loss": 1.5288182497024536, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05135122686624527, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05135122686624527, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06870367022238293, |
|
"grad_norm": 1.3030140399932861, |
|
"learning_rate": 9e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.453067779541016, |
|
"logits/rejected": -18.453067779541016, |
|
"logps/chosen": -0.4881868362426758, |
|
"logps/rejected": -0.4881868362426758, |
|
"loss": 6.5908, |
|
"nll_loss": 1.5783830881118774, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0488186851143837, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0488186851143837, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07231965286566625, |
|
"grad_norm": 1.0002509355545044, |
|
"learning_rate": 8.888888888888888e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.477296829223633, |
|
"logits/rejected": -18.477296829223633, |
|
"logps/chosen": -0.46165475249290466, |
|
"logps/rejected": -0.46165475249290466, |
|
"loss": 6.6221, |
|
"nll_loss": 1.5862023830413818, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.046165481209754944, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.046165481209754944, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07593563550894955, |
|
"grad_norm": 0.7885683178901672, |
|
"learning_rate": 8.777777777777778e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.564895629882812, |
|
"logits/rejected": -18.564895629882812, |
|
"logps/chosen": -0.41595011949539185, |
|
"logps/rejected": -0.41595011949539185, |
|
"loss": 6.2789, |
|
"nll_loss": 1.500412940979004, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04159501940011978, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04159501940011978, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07955161815223287, |
|
"grad_norm": 0.6379426717758179, |
|
"learning_rate": 8.666666666666668e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.577289581298828, |
|
"logits/rejected": -18.577289581298828, |
|
"logps/chosen": -0.43426617980003357, |
|
"logps/rejected": -0.43426617980003357, |
|
"loss": 6.4009, |
|
"nll_loss": 1.5309035778045654, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043426621705293655, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043426621705293655, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08316760079551618, |
|
"grad_norm": 0.5890640020370483, |
|
"learning_rate": 8.555555555555556e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.817012786865234, |
|
"logits/rejected": -18.817012786865234, |
|
"logps/chosen": -0.38255172967910767, |
|
"logps/rejected": -0.38255172967910767, |
|
"loss": 6.021, |
|
"nll_loss": 1.4359278678894043, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.038255173712968826, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.038255173712968826, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0867835834387995, |
|
"grad_norm": 0.5675965547561646, |
|
"learning_rate": 8.444444444444446e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.88204002380371, |
|
"logits/rejected": -18.88204002380371, |
|
"logps/chosen": -0.357022225856781, |
|
"logps/rejected": -0.357022225856781, |
|
"loss": 6.3796, |
|
"nll_loss": 1.5255934000015259, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03570222482085228, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03570222482085228, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0903995660820828, |
|
"grad_norm": 0.284084677696228, |
|
"learning_rate": 8.344444444444445e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.659196853637695, |
|
"logits/rejected": -18.659196853637695, |
|
"logps/chosen": -0.35400137305259705, |
|
"logps/rejected": -0.35400137305259705, |
|
"loss": 6.1544, |
|
"nll_loss": 1.4692747592926025, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03540014103055, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03540014103055, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09401554872536612, |
|
"grad_norm": 0.6424693465232849, |
|
"learning_rate": 8.233333333333335e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.762996673583984, |
|
"logits/rejected": -18.762996673583984, |
|
"logps/chosen": -0.40258026123046875, |
|
"logps/rejected": -0.40258026123046875, |
|
"loss": 6.3857, |
|
"nll_loss": 1.5271098613739014, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.040258027613162994, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.040258027613162994, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09763153136864942, |
|
"grad_norm": 0.6682632565498352, |
|
"learning_rate": 8.122222222222223e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.719924926757812, |
|
"logits/rejected": -18.719924926757812, |
|
"logps/chosen": -0.3123939633369446, |
|
"logps/rejected": -0.3123939633369446, |
|
"loss": 5.7456, |
|
"nll_loss": 1.36708664894104, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.031239395961165428, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.031239395961165428, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10124751401193274, |
|
"grad_norm": 0.9669603109359741, |
|
"learning_rate": 8.011111111111113e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.799579620361328, |
|
"logits/rejected": -18.799579620361328, |
|
"logps/chosen": -0.3817462623119354, |
|
"logps/rejected": -0.3817462623119354, |
|
"loss": 6.2838, |
|
"nll_loss": 1.5016276836395264, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03817462921142578, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03817462921142578, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.10486349665521605, |
|
"grad_norm": 0.7264003157615662, |
|
"learning_rate": 7.9e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.090185165405273, |
|
"logits/rejected": -19.090185165405273, |
|
"logps/chosen": -0.3294925093650818, |
|
"logps/rejected": -0.3294925093650818, |
|
"loss": 5.6699, |
|
"nll_loss": 1.3481695652008057, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03294925019145012, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03294925019145012, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10847947929849937, |
|
"grad_norm": 0.564611554145813, |
|
"learning_rate": 7.788888888888889e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.767759323120117, |
|
"logits/rejected": -18.767759323120117, |
|
"logps/chosen": -0.26777949929237366, |
|
"logps/rejected": -0.26777949929237366, |
|
"loss": 6.5104, |
|
"nll_loss": 1.5582915544509888, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.026777952909469604, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.026777952909469604, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11209546194178267, |
|
"grad_norm": 0.6952475905418396, |
|
"learning_rate": 7.677777777777778e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.896703720092773, |
|
"logits/rejected": -18.896703720092773, |
|
"logps/chosen": -0.24664482474327087, |
|
"logps/rejected": -0.24664482474327087, |
|
"loss": 5.9777, |
|
"nll_loss": 1.4251067638397217, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02466448023915291, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02466448023915291, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11571144458506599, |
|
"grad_norm": 0.7094094753265381, |
|
"learning_rate": 7.566666666666667e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.993297576904297, |
|
"logits/rejected": -18.993297576904297, |
|
"logps/chosen": -0.27139216661453247, |
|
"logps/rejected": -0.27139216661453247, |
|
"loss": 5.848, |
|
"nll_loss": 1.3926928043365479, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.027139216661453247, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.027139216661453247, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1193274272283493, |
|
"grad_norm": 0.8386672735214233, |
|
"learning_rate": 7.455555555555556e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.818639755249023, |
|
"logits/rejected": -18.818639755249023, |
|
"logps/chosen": -0.2643309533596039, |
|
"logps/rejected": -0.2643309533596039, |
|
"loss": 6.2247, |
|
"nll_loss": 1.4868563413619995, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.026433095335960388, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.026433095335960388, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12294340987163262, |
|
"grad_norm": 0.6337667107582092, |
|
"learning_rate": 7.344444444444445e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.07388687133789, |
|
"logits/rejected": -19.07388687133789, |
|
"logps/chosen": -0.23179857432842255, |
|
"logps/rejected": -0.23179857432842255, |
|
"loss": 5.7871, |
|
"nll_loss": 1.3774592876434326, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.023179858922958374, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.023179858922958374, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12655939251491594, |
|
"grad_norm": 1.056504726409912, |
|
"learning_rate": 7.233333333333334e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.8520450592041, |
|
"logits/rejected": -18.8520450592041, |
|
"logps/chosen": -0.24602051079273224, |
|
"logps/rejected": -0.24602051079273224, |
|
"loss": 5.8268, |
|
"nll_loss": 1.3873755931854248, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.024602051824331284, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.024602051824331284, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.13017537515819924, |
|
"grad_norm": 0.4462037682533264, |
|
"learning_rate": 7.122222222222222e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.04658317565918, |
|
"logits/rejected": -19.04658317565918, |
|
"logps/chosen": -0.2291136234998703, |
|
"logps/rejected": -0.2291136234998703, |
|
"loss": 5.8142, |
|
"nll_loss": 1.384232521057129, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02291136048734188, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02291136048734188, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13379135780148255, |
|
"grad_norm": 0.5579663515090942, |
|
"learning_rate": 7.011111111111112e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -18.912765502929688, |
|
"logits/rejected": -18.912765502929688, |
|
"logps/chosen": -0.26528915762901306, |
|
"logps/rejected": -0.26528915762901306, |
|
"loss": 6.0316, |
|
"nll_loss": 1.4385900497436523, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.026528915390372276, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.026528915390372276, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13740734044476585, |
|
"grad_norm": 1.2433960437774658, |
|
"learning_rate": 6.9e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.217700958251953, |
|
"logits/rejected": -19.217700958251953, |
|
"logps/chosen": -0.21606405079364777, |
|
"logps/rejected": -0.21606405079364777, |
|
"loss": 5.7944, |
|
"nll_loss": 1.3792803287506104, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.021606406196951866, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.021606406196951866, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.14102332308804919, |
|
"grad_norm": 0.7657280564308167, |
|
"learning_rate": 6.788888888888889e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.175457000732422, |
|
"logits/rejected": -19.175457000732422, |
|
"logps/chosen": -0.2413448989391327, |
|
"logps/rejected": -0.2413448989391327, |
|
"loss": 5.7839, |
|
"nll_loss": 1.3766554594039917, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02413449063897133, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02413449063897133, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1446393057313325, |
|
"grad_norm": 0.7070457935333252, |
|
"learning_rate": 6.677777777777779e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.15035629272461, |
|
"logits/rejected": -19.15035629272461, |
|
"logps/chosen": -0.25096631050109863, |
|
"logps/rejected": -0.25096631050109863, |
|
"loss": 6.2307, |
|
"nll_loss": 1.4883568286895752, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.025096634402871132, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.025096634402871132, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1482552883746158, |
|
"grad_norm": 0.43025216460227966, |
|
"learning_rate": 6.566666666666667e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.02223014831543, |
|
"logits/rejected": -19.02223014831543, |
|
"logps/chosen": -0.24732474982738495, |
|
"logps/rejected": -0.24732474982738495, |
|
"loss": 6.2865, |
|
"nll_loss": 1.5023012161254883, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.024732474237680435, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.024732474237680435, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1518712710178991, |
|
"grad_norm": 1.3701528310775757, |
|
"learning_rate": 6.455555555555556e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.089460372924805, |
|
"logits/rejected": -19.089460372924805, |
|
"logps/chosen": -0.23749932646751404, |
|
"logps/rejected": -0.23749932646751404, |
|
"loss": 6.2238, |
|
"nll_loss": 1.4866377115249634, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.023749932646751404, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.023749932646751404, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15548725366118243, |
|
"grad_norm": 0.9421939253807068, |
|
"learning_rate": 6.3444444444444454e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.231048583984375, |
|
"logits/rejected": -19.231048583984375, |
|
"logps/chosen": -0.19140982627868652, |
|
"logps/rejected": -0.19140982627868652, |
|
"loss": 6.0814, |
|
"nll_loss": 1.451047420501709, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01914098486304283, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01914098486304283, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15910323630446574, |
|
"grad_norm": 0.9060840010643005, |
|
"learning_rate": 6.2333333333333335e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.107379913330078, |
|
"logits/rejected": -19.107379913330078, |
|
"logps/chosen": -0.2809773087501526, |
|
"logps/rejected": -0.2809773087501526, |
|
"loss": 6.3671, |
|
"nll_loss": 1.522457480430603, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0280977301299572, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0280977301299572, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16271921894774904, |
|
"grad_norm": 0.5965518355369568, |
|
"learning_rate": 6.1222222222222224e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.34296417236328, |
|
"logits/rejected": -19.34296417236328, |
|
"logps/chosen": -0.19397786259651184, |
|
"logps/rejected": -0.19397786259651184, |
|
"loss": 5.7975, |
|
"nll_loss": 1.3800629377365112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.019397784024477005, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.019397784024477005, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16633520159103235, |
|
"grad_norm": 0.9895502328872681, |
|
"learning_rate": 6.011111111111112e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.35286521911621, |
|
"logits/rejected": -19.35286521911621, |
|
"logps/chosen": -0.20646443963050842, |
|
"logps/rejected": -0.20646443963050842, |
|
"loss": 5.8574, |
|
"nll_loss": 1.3950278759002686, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02064644545316696, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02064644545316696, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.16995118423431568, |
|
"grad_norm": 1.6251282691955566, |
|
"learning_rate": 5.9e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.29703140258789, |
|
"logits/rejected": -19.29703140258789, |
|
"logps/chosen": -0.23044386506080627, |
|
"logps/rejected": -0.23044386506080627, |
|
"loss": 6.0743, |
|
"nll_loss": 1.4492676258087158, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.023044386878609657, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.023044386878609657, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.173567166877599, |
|
"grad_norm": 0.7959076762199402, |
|
"learning_rate": 5.788888888888889e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.089004516601562, |
|
"logits/rejected": -19.089004516601562, |
|
"logps/chosen": -0.1713530272245407, |
|
"logps/rejected": -0.1713530272245407, |
|
"loss": 6.0034, |
|
"nll_loss": 1.4315412044525146, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01713530346751213, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01713530346751213, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1771831495208823, |
|
"grad_norm": 0.6285834908485413, |
|
"learning_rate": 5.677777777777779e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.485876083374023, |
|
"logits/rejected": -19.485876083374023, |
|
"logps/chosen": -0.18361307680606842, |
|
"logps/rejected": -0.18361307680606842, |
|
"loss": 5.6858, |
|
"nll_loss": 1.3521363735198975, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.018361307680606842, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.018361307680606842, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1807991321641656, |
|
"grad_norm": 0.9556539058685303, |
|
"learning_rate": 5.566666666666667e-06, |
|
"log_odds_chosen": 0.0, |
|
"log_odds_ratio": -0.6931472420692444, |
|
"logits/chosen": -19.300710678100586, |
|
"logits/rejected": -19.300710678100586, |
|
"logps/chosen": -0.17280462384223938, |
|
"logps/rejected": -0.17280462384223938, |
|
"loss": 6.0335, |
|
"nll_loss": 1.439051866531372, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01728046126663685, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01728046126663685, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|