|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9865871833084947, |
|
"eval_steps": 500, |
|
"global_step": 501, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.029806259314456036, |
|
"grad_norm": 1762.8857421875, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"log_odds_chosen": -0.22333388030529022, |
|
"log_odds_ratio": -1.0081762075424194, |
|
"logits/chosen": 204.30679321289062, |
|
"logits/rejected": 202.9920654296875, |
|
"logps/chosen": -14.826652526855469, |
|
"logps/rejected": -14.603320121765137, |
|
"loss": 14.961, |
|
"nll_loss": 14.546102523803711, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.7413326501846313, |
|
"rewards/margins": -0.011166660115122795, |
|
"rewards/rejected": -0.7301660776138306, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05961251862891207, |
|
"grad_norm": 1195.5567626953125, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.25514093041419983, |
|
"log_odds_ratio": -0.770182192325592, |
|
"logits/chosen": 219.4593505859375, |
|
"logits/rejected": 223.51095581054688, |
|
"logps/chosen": -12.235333442687988, |
|
"logps/rejected": -12.489803314208984, |
|
"loss": 12.6124, |
|
"nll_loss": 12.337944984436035, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6117666363716125, |
|
"rewards/margins": 0.012723559513688087, |
|
"rewards/rejected": -0.6244901418685913, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08941877794336811, |
|
"grad_norm": 721.7440185546875, |
|
"learning_rate": 7.5e-07, |
|
"log_odds_chosen": 0.04993244633078575, |
|
"log_odds_ratio": -0.7743036150932312, |
|
"logits/chosen": 281.7969055175781, |
|
"logits/rejected": 260.814453125, |
|
"logps/chosen": -7.967254638671875, |
|
"logps/rejected": -8.01715087890625, |
|
"loss": 8.2807, |
|
"nll_loss": 7.958427429199219, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3983627259731293, |
|
"rewards/margins": 0.0024948143400251865, |
|
"rewards/rejected": -0.4008575975894928, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11922503725782414, |
|
"grad_norm": 213.13336181640625, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": -0.1490481197834015, |
|
"log_odds_ratio": -0.95225590467453, |
|
"logits/chosen": 280.4493103027344, |
|
"logits/rejected": 274.66717529296875, |
|
"logps/chosen": -5.374236583709717, |
|
"logps/rejected": -5.226569175720215, |
|
"loss": 5.4432, |
|
"nll_loss": 5.450861930847168, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.2687118351459503, |
|
"rewards/margins": -0.007383383810520172, |
|
"rewards/rejected": -0.26132842898368835, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14903129657228018, |
|
"grad_norm": 154.36373901367188, |
|
"learning_rate": 1.25e-06, |
|
"log_odds_chosen": -0.05349766090512276, |
|
"log_odds_ratio": -0.8921065330505371, |
|
"logits/chosen": 297.8148193359375, |
|
"logits/rejected": 307.04766845703125, |
|
"logps/chosen": -3.2826087474823, |
|
"logps/rejected": -3.2111122608184814, |
|
"loss": 3.5, |
|
"nll_loss": 3.3887104988098145, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.16413041949272156, |
|
"rewards/margins": -0.0035748339723795652, |
|
"rewards/rejected": -0.16055560111999512, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17883755588673622, |
|
"grad_norm": 80.20259094238281, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": -0.07229617983102798, |
|
"log_odds_ratio": -0.8916282653808594, |
|
"logits/chosen": 345.52191162109375, |
|
"logits/rejected": 374.13287353515625, |
|
"logps/chosen": -2.6274566650390625, |
|
"logps/rejected": -2.530172348022461, |
|
"loss": 2.5601, |
|
"nll_loss": 2.645339012145996, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1313728392124176, |
|
"rewards/margins": -0.004864226561039686, |
|
"rewards/rejected": -0.12650862336158752, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20864381520119224, |
|
"grad_norm": 41.495731353759766, |
|
"learning_rate": 1.75e-06, |
|
"log_odds_chosen": 0.1673038899898529, |
|
"log_odds_ratio": -0.7395197153091431, |
|
"logits/chosen": 379.2995300292969, |
|
"logits/rejected": 367.61065673828125, |
|
"logps/chosen": -1.7991399765014648, |
|
"logps/rejected": -1.9078947305679321, |
|
"loss": 2.1231, |
|
"nll_loss": 1.9985812902450562, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08995698392391205, |
|
"rewards/margins": 0.0054377405904233456, |
|
"rewards/rejected": -0.09539473056793213, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23845007451564829, |
|
"grad_norm": 57.26367950439453, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.02127310074865818, |
|
"log_odds_ratio": -0.7780741453170776, |
|
"logits/chosen": 371.747802734375, |
|
"logits/rejected": 370.3223571777344, |
|
"logps/chosen": -1.6784114837646484, |
|
"logps/rejected": -1.6915397644042969, |
|
"loss": 1.9474, |
|
"nll_loss": 2.0377304553985596, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08392057567834854, |
|
"rewards/margins": 0.0006564242066815495, |
|
"rewards/rejected": -0.08457700163125992, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26825633383010433, |
|
"grad_norm": 48.953094482421875, |
|
"learning_rate": 2.25e-06, |
|
"log_odds_chosen": 0.06037778779864311, |
|
"log_odds_ratio": -0.7294493317604065, |
|
"logits/chosen": 385.0721740722656, |
|
"logits/rejected": 395.3931884765625, |
|
"logps/chosen": -1.5469728708267212, |
|
"logps/rejected": -1.5890170335769653, |
|
"loss": 1.8679, |
|
"nll_loss": 1.742649793624878, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.07734864205121994, |
|
"rewards/margins": 0.0021022059954702854, |
|
"rewards/rejected": -0.07945084571838379, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29806259314456035, |
|
"grad_norm": 85.16621398925781, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.22148697078227997, |
|
"log_odds_ratio": -0.6563897728919983, |
|
"logits/chosen": 395.87554931640625, |
|
"logits/rejected": 417.33563232421875, |
|
"logps/chosen": -1.4042726755142212, |
|
"logps/rejected": -1.5677330493927002, |
|
"loss": 1.8511, |
|
"nll_loss": 1.8633716106414795, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.07021363079547882, |
|
"rewards/margins": 0.008173028007149696, |
|
"rewards/rejected": -0.07838664948940277, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 36.78052520751953, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"log_odds_chosen": 0.04750330001115799, |
|
"log_odds_ratio": -0.7403008341789246, |
|
"logits/chosen": 383.05865478515625, |
|
"logits/rejected": 376.47137451171875, |
|
"logps/chosen": -1.4311497211456299, |
|
"logps/rejected": -1.4584500789642334, |
|
"loss": 1.8524, |
|
"nll_loss": 1.9031813144683838, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.07155750691890717, |
|
"rewards/margins": 0.0013650130713358521, |
|
"rewards/rejected": -0.07292251288890839, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35767511177347244, |
|
"grad_norm": 43.11362838745117, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.15154634416103363, |
|
"log_odds_ratio": -0.6628466844558716, |
|
"logits/chosen": 388.72491455078125, |
|
"logits/rejected": 380.75030517578125, |
|
"logps/chosen": -1.324789047241211, |
|
"logps/rejected": -1.4295395612716675, |
|
"loss": 1.6907, |
|
"nll_loss": 1.754913568496704, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.06623945385217667, |
|
"rewards/margins": 0.005237526725977659, |
|
"rewards/rejected": -0.07147698104381561, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38748137108792846, |
|
"grad_norm": 29.449420928955078, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"log_odds_chosen": 0.0873890295624733, |
|
"log_odds_ratio": -0.710555911064148, |
|
"logits/chosen": 387.2967834472656, |
|
"logits/rejected": 388.5743103027344, |
|
"logps/chosen": -1.249342679977417, |
|
"logps/rejected": -1.2920448780059814, |
|
"loss": 1.5953, |
|
"nll_loss": 1.5086474418640137, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06246713548898697, |
|
"rewards/margins": 0.0021351135801523924, |
|
"rewards/rejected": -0.06460224092006683, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4172876304023845, |
|
"grad_norm": 66.8738784790039, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.049095284193754196, |
|
"log_odds_ratio": -0.7218947410583496, |
|
"logits/chosen": 375.4095153808594, |
|
"logits/rejected": 383.84027099609375, |
|
"logps/chosen": -1.3798081874847412, |
|
"logps/rejected": -1.4165852069854736, |
|
"loss": 1.632, |
|
"nll_loss": 1.642600655555725, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.06899039447307587, |
|
"rewards/margins": 0.001838861615397036, |
|
"rewards/rejected": -0.07082925736904144, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44709388971684055, |
|
"grad_norm": 24.510610580444336, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"log_odds_chosen": 0.21395280957221985, |
|
"log_odds_ratio": -0.6359378099441528, |
|
"logits/chosen": 395.4688415527344, |
|
"logits/rejected": 382.9261169433594, |
|
"logps/chosen": -1.1935937404632568, |
|
"logps/rejected": -1.337820291519165, |
|
"loss": 1.5629, |
|
"nll_loss": 1.5003348588943481, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05967969447374344, |
|
"rewards/margins": 0.00721132755279541, |
|
"rewards/rejected": -0.06689102202653885, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47690014903129657, |
|
"grad_norm": 30.089900970458984, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.20370396971702576, |
|
"log_odds_ratio": -0.6502530574798584, |
|
"logits/chosen": 382.20904541015625, |
|
"logits/rejected": 403.7727355957031, |
|
"logps/chosen": -1.17880117893219, |
|
"logps/rejected": -1.3107407093048096, |
|
"loss": 1.5995, |
|
"nll_loss": 1.6122217178344727, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.058940064162015915, |
|
"rewards/margins": 0.006596976425498724, |
|
"rewards/rejected": -0.06553704291582108, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5067064083457526, |
|
"grad_norm": 165.75381469726562, |
|
"learning_rate": 4.25e-06, |
|
"log_odds_chosen": 0.07357416301965714, |
|
"log_odds_ratio": -0.8076593279838562, |
|
"logits/chosen": 408.95843505859375, |
|
"logits/rejected": 394.03826904296875, |
|
"logps/chosen": -1.4526355266571045, |
|
"logps/rejected": -1.4595062732696533, |
|
"loss": 1.6746, |
|
"nll_loss": 1.7690614461898804, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07263178378343582, |
|
"rewards/margins": 0.0003435421676840633, |
|
"rewards/rejected": -0.07297532260417938, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5365126676602087, |
|
"grad_norm": 45.735618591308594, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.5337249040603638, |
|
"log_odds_ratio": -0.5693989396095276, |
|
"logits/chosen": 402.0947570800781, |
|
"logits/rejected": 416.75689697265625, |
|
"logps/chosen": -1.3862842321395874, |
|
"logps/rejected": -1.796555757522583, |
|
"loss": 1.5211, |
|
"nll_loss": 1.5622494220733643, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06931421905755997, |
|
"rewards/margins": 0.020513568073511124, |
|
"rewards/rejected": -0.0898277759552002, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5663189269746647, |
|
"grad_norm": 43.20003890991211, |
|
"learning_rate": 4.75e-06, |
|
"log_odds_chosen": 0.18776021897792816, |
|
"log_odds_ratio": -0.6678361892700195, |
|
"logits/chosen": 367.4861145019531, |
|
"logits/rejected": 380.6282958984375, |
|
"logps/chosen": -1.1577775478363037, |
|
"logps/rejected": -1.240468978881836, |
|
"loss": 1.5718, |
|
"nll_loss": 1.4726136922836304, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05788888409733772, |
|
"rewards/margins": 0.004134564660489559, |
|
"rewards/rejected": -0.06202344968914986, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5961251862891207, |
|
"grad_norm": 48.09437561035156, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.23021917045116425, |
|
"log_odds_ratio": -0.6669245958328247, |
|
"logits/chosen": 398.15692138671875, |
|
"logits/rejected": 436.06280517578125, |
|
"logps/chosen": -1.3762584924697876, |
|
"logps/rejected": -1.5756226778030396, |
|
"loss": 1.6621, |
|
"nll_loss": 1.676337480545044, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06881292164325714, |
|
"rewards/margins": 0.009968215599656105, |
|
"rewards/rejected": -0.0787811428308487, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6259314456035767, |
|
"grad_norm": 27.461023330688477, |
|
"learning_rate": 4.8795003647426654e-06, |
|
"log_odds_chosen": 0.25321143865585327, |
|
"log_odds_ratio": -0.6335381269454956, |
|
"logits/chosen": 394.9198303222656, |
|
"logits/rejected": 407.670166015625, |
|
"logps/chosen": -1.1359978914260864, |
|
"logps/rejected": -1.282949686050415, |
|
"loss": 1.5569, |
|
"nll_loss": 1.5841158628463745, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.05679989606142044, |
|
"rewards/margins": 0.007347588427364826, |
|
"rewards/rejected": -0.0641474798321724, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 58.5862922668457, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 0.17413778603076935, |
|
"log_odds_ratio": -0.6657994985580444, |
|
"logits/chosen": 372.2387390136719, |
|
"logits/rejected": 370.97259521484375, |
|
"logps/chosen": -1.1112958192825317, |
|
"logps/rejected": -1.2337472438812256, |
|
"loss": 1.5196, |
|
"nll_loss": 1.5138860940933228, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05556480213999748, |
|
"rewards/margins": 0.00612256396561861, |
|
"rewards/rejected": -0.06168735772371292, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6855439642324889, |
|
"grad_norm": 25.225566864013672, |
|
"learning_rate": 4.662524041201569e-06, |
|
"log_odds_chosen": 0.2932291030883789, |
|
"log_odds_ratio": -0.6261448264122009, |
|
"logits/chosen": 398.36285400390625, |
|
"logits/rejected": 405.1409912109375, |
|
"logps/chosen": -0.9624778032302856, |
|
"logps/rejected": -1.100894570350647, |
|
"loss": 1.4976, |
|
"nll_loss": 1.4066407680511475, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04812389984726906, |
|
"rewards/margins": 0.0069208345375955105, |
|
"rewards/rejected": -0.05504472926259041, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7153502235469449, |
|
"grad_norm": 25.138811111450195, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 0.30031102895736694, |
|
"log_odds_ratio": -0.6141648292541504, |
|
"logits/chosen": 381.42999267578125, |
|
"logits/rejected": 381.4985656738281, |
|
"logps/chosen": -1.05239999294281, |
|
"logps/rejected": -1.2082456350326538, |
|
"loss": 1.5521, |
|
"nll_loss": 1.5355098247528076, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05262000486254692, |
|
"rewards/margins": 0.007792273070663214, |
|
"rewards/rejected": -0.06041227653622627, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7451564828614009, |
|
"grad_norm": 19.848705291748047, |
|
"learning_rate": 4.47213595499958e-06, |
|
"log_odds_chosen": 0.05417771264910698, |
|
"log_odds_ratio": -0.7723890542984009, |
|
"logits/chosen": 375.4615173339844, |
|
"logits/rejected": 388.3155517578125, |
|
"logps/chosen": -1.1864535808563232, |
|
"logps/rejected": -1.1864855289459229, |
|
"loss": 1.4682, |
|
"nll_loss": 1.473937749862671, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05932268500328064, |
|
"rewards/margins": 1.5988014183676569e-06, |
|
"rewards/rejected": -0.05932428315281868, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7749627421758569, |
|
"grad_norm": 30.878917694091797, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 0.1284504234790802, |
|
"log_odds_ratio": -0.6890888214111328, |
|
"logits/chosen": 400.09014892578125, |
|
"logits/rejected": 389.0010070800781, |
|
"logps/chosen": -1.1370112895965576, |
|
"logps/rejected": -1.1725587844848633, |
|
"loss": 1.5141, |
|
"nll_loss": 1.4747650623321533, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05685057118535042, |
|
"rewards/margins": 0.0017773698782548308, |
|
"rewards/rejected": -0.05862794071435928, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8047690014903129, |
|
"grad_norm": 34.69911575317383, |
|
"learning_rate": 4.303314829119352e-06, |
|
"log_odds_chosen": 0.07419878244400024, |
|
"log_odds_ratio": -0.7176602482795715, |
|
"logits/chosen": 412.095703125, |
|
"logits/rejected": 414.66827392578125, |
|
"logps/chosen": -1.1232882738113403, |
|
"logps/rejected": -1.1864019632339478, |
|
"loss": 1.5359, |
|
"nll_loss": 1.5837700366973877, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05616441369056702, |
|
"rewards/margins": 0.0031556878238916397, |
|
"rewards/rejected": -0.059320103377103806, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.834575260804769, |
|
"grad_norm": 33.93345642089844, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": -0.04845789074897766, |
|
"log_odds_ratio": -0.7893471121788025, |
|
"logits/chosen": 398.22607421875, |
|
"logits/rejected": 404.393798828125, |
|
"logps/chosen": -1.119332194328308, |
|
"logps/rejected": -1.0812653303146362, |
|
"loss": 1.5122, |
|
"nll_loss": 1.6213722229003906, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.05596661567687988, |
|
"rewards/margins": -0.0019033461576327682, |
|
"rewards/rejected": -0.05406326800584793, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8643815201192251, |
|
"grad_norm": 22.562604904174805, |
|
"learning_rate": 4.1522739926869985e-06, |
|
"log_odds_chosen": -0.06688841432332993, |
|
"log_odds_ratio": -0.7556332349777222, |
|
"logits/chosen": 395.27984619140625, |
|
"logits/rejected": 398.4122009277344, |
|
"logps/chosen": -1.2002326250076294, |
|
"logps/rejected": -1.1435927152633667, |
|
"loss": 1.5121, |
|
"nll_loss": 1.514585256576538, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.06001163646578789, |
|
"rewards/margins": -0.0028319929260760546, |
|
"rewards/rejected": -0.05717964097857475, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8941877794336811, |
|
"grad_norm": 38.268333435058594, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 0.3597918152809143, |
|
"log_odds_ratio": -0.5650432705879211, |
|
"logits/chosen": 401.6814270019531, |
|
"logits/rejected": 418.9139709472656, |
|
"logps/chosen": -1.0605757236480713, |
|
"logps/rejected": -1.296025037765503, |
|
"loss": 1.4755, |
|
"nll_loss": 1.387669324874878, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.053028784692287445, |
|
"rewards/margins": 0.011772466823458672, |
|
"rewards/rejected": -0.06480124592781067, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9239940387481371, |
|
"grad_norm": 35.649044036865234, |
|
"learning_rate": 4.016096644512495e-06, |
|
"log_odds_chosen": 0.11360454559326172, |
|
"log_odds_ratio": -0.6917680501937866, |
|
"logits/chosen": 380.48785400390625, |
|
"logits/rejected": 395.10772705078125, |
|
"logps/chosen": -1.1738497018814087, |
|
"logps/rejected": -1.2541792392730713, |
|
"loss": 1.4352, |
|
"nll_loss": 1.3315799236297607, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.058692485094070435, |
|
"rewards/margins": 0.004016467835754156, |
|
"rewards/rejected": -0.06270895153284073, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9538002980625931, |
|
"grad_norm": 37.8629035949707, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 0.04191911593079567, |
|
"log_odds_ratio": -0.7673999071121216, |
|
"logits/chosen": 384.6130065917969, |
|
"logits/rejected": 430.66485595703125, |
|
"logps/chosen": -1.0005159378051758, |
|
"logps/rejected": -1.0551975965499878, |
|
"loss": 1.408, |
|
"nll_loss": 1.3416965007781982, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.05002579838037491, |
|
"rewards/margins": 0.00273408112116158, |
|
"rewards/rejected": -0.05275987833738327, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 19.95792007446289, |
|
"learning_rate": 3.892494720807615e-06, |
|
"log_odds_chosen": 0.05066202953457832, |
|
"log_odds_ratio": -0.7182776927947998, |
|
"logits/chosen": 395.8006591796875, |
|
"logits/rejected": 408.99554443359375, |
|
"logps/chosen": -1.0879595279693604, |
|
"logps/rejected": -1.125816822052002, |
|
"loss": 1.436, |
|
"nll_loss": 1.3948609828948975, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.05439797043800354, |
|
"rewards/margins": 0.0018928736681118608, |
|
"rewards/rejected": -0.056290846318006516, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9955290611028316, |
|
"eval_log_odds_chosen": 0.1983117312192917, |
|
"eval_log_odds_ratio": -0.6895310282707214, |
|
"eval_logits/chosen": 318.3812255859375, |
|
"eval_logits/rejected": 288.9291687011719, |
|
"eval_logps/chosen": -1.0157941579818726, |
|
"eval_logps/rejected": -1.1419692039489746, |
|
"eval_loss": 1.467863917350769, |
|
"eval_nll_loss": 1.4121437072753906, |
|
"eval_rewards/accuracies": 0.5467625856399536, |
|
"eval_rewards/chosen": -0.05078971013426781, |
|
"eval_rewards/margins": 0.006308753043413162, |
|
"eval_rewards/rejected": -0.05709846317768097, |
|
"eval_runtime": 112.1639, |
|
"eval_samples_per_second": 4.93, |
|
"eval_steps_per_second": 1.239, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.0134128166915053, |
|
"grad_norm": 16.564281463623047, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 0.39181432127952576, |
|
"log_odds_ratio": -0.5932676196098328, |
|
"logits/chosen": 378.3958435058594, |
|
"logits/rejected": 403.1106262207031, |
|
"logps/chosen": -0.9357401132583618, |
|
"logps/rejected": -1.1598111391067505, |
|
"loss": 1.2992, |
|
"nll_loss": 1.1567914485931396, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04678700119256973, |
|
"rewards/margins": 0.011203557252883911, |
|
"rewards/rejected": -0.05799056217074394, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0432190760059612, |
|
"grad_norm": 24.491374969482422, |
|
"learning_rate": 3.7796447300922724e-06, |
|
"log_odds_chosen": 0.8750826120376587, |
|
"log_odds_ratio": -0.42914777994155884, |
|
"logits/chosen": 358.5318603515625, |
|
"logits/rejected": 399.3114929199219, |
|
"logps/chosen": -0.6476485133171082, |
|
"logps/rejected": -1.1458537578582764, |
|
"loss": 1.0769, |
|
"nll_loss": 1.1138975620269775, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03238242492079735, |
|
"rewards/margins": 0.02491025999188423, |
|
"rewards/rejected": -0.05729268863797188, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0730253353204173, |
|
"grad_norm": 22.750883102416992, |
|
"learning_rate": 3.72677996249965e-06, |
|
"log_odds_chosen": 0.8333228826522827, |
|
"log_odds_ratio": -0.43526044487953186, |
|
"logits/chosen": 354.4723205566406, |
|
"logits/rejected": 329.74591064453125, |
|
"logps/chosen": -0.789750874042511, |
|
"logps/rejected": -1.287760853767395, |
|
"loss": 1.132, |
|
"nll_loss": 1.2151093482971191, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03948754072189331, |
|
"rewards/margins": 0.02490049973130226, |
|
"rewards/rejected": -0.06438804417848587, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1028315946348732, |
|
"grad_norm": 20.229358673095703, |
|
"learning_rate": 3.6760731104690393e-06, |
|
"log_odds_chosen": 1.0057324171066284, |
|
"log_odds_ratio": -0.3837296664714813, |
|
"logits/chosen": 384.34808349609375, |
|
"logits/rejected": 376.38800048828125, |
|
"logps/chosen": -0.6548343896865845, |
|
"logps/rejected": -1.1811447143554688, |
|
"loss": 1.0221, |
|
"nll_loss": 0.9857061505317688, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.032741717994213104, |
|
"rewards/margins": 0.026315515860915184, |
|
"rewards/rejected": -0.05905723571777344, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.1326378539493294, |
|
"grad_norm": 18.751834869384766, |
|
"learning_rate": 3.6273812505500587e-06, |
|
"log_odds_chosen": 0.6209810972213745, |
|
"log_odds_ratio": -0.5106909275054932, |
|
"logits/chosen": 358.50823974609375, |
|
"logits/rejected": 404.4180603027344, |
|
"logps/chosen": -0.7595417499542236, |
|
"logps/rejected": -1.1261508464813232, |
|
"loss": 1.0914, |
|
"nll_loss": 1.0129649639129639, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03797708824276924, |
|
"rewards/margins": 0.018330451101064682, |
|
"rewards/rejected": -0.05630754306912422, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1624441132637853, |
|
"grad_norm": 20.339866638183594, |
|
"learning_rate": 3.5805743701971648e-06, |
|
"log_odds_chosen": 0.8648549914360046, |
|
"log_odds_ratio": -0.40149006247520447, |
|
"logits/chosen": 381.13031005859375, |
|
"logits/rejected": 395.5570983886719, |
|
"logps/chosen": -0.8033710718154907, |
|
"logps/rejected": -1.2736122608184814, |
|
"loss": 1.1227, |
|
"nll_loss": 1.1343204975128174, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04016854614019394, |
|
"rewards/margins": 0.02351205423474312, |
|
"rewards/rejected": -0.06368060410022736, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1922503725782414, |
|
"grad_norm": 20.895063400268555, |
|
"learning_rate": 3.5355339059327378e-06, |
|
"log_odds_chosen": 0.9302545785903931, |
|
"log_odds_ratio": -0.4023068845272064, |
|
"logits/chosen": 408.6002197265625, |
|
"logits/rejected": 393.536865234375, |
|
"logps/chosen": -0.7376815676689148, |
|
"logps/rejected": -1.2836555242538452, |
|
"loss": 1.0834, |
|
"nll_loss": 1.019555687904358, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03688408061861992, |
|
"rewards/margins": 0.02729869820177555, |
|
"rewards/rejected": -0.06418277323246002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2220566318926975, |
|
"grad_norm": 21.968069076538086, |
|
"learning_rate": 3.4921514788478916e-06, |
|
"log_odds_chosen": 1.1145693063735962, |
|
"log_odds_ratio": -0.38622182607650757, |
|
"logits/chosen": 364.79913330078125, |
|
"logits/rejected": 359.30718994140625, |
|
"logps/chosen": -0.6945966482162476, |
|
"logps/rejected": -1.2616204023361206, |
|
"loss": 1.0621, |
|
"nll_loss": 1.079245686531067, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.03472983092069626, |
|
"rewards/margins": 0.028351187705993652, |
|
"rewards/rejected": -0.06308101862668991, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.2518628912071534, |
|
"grad_norm": 19.83363914489746, |
|
"learning_rate": 3.450327796711771e-06, |
|
"log_odds_chosen": 1.1763904094696045, |
|
"log_odds_ratio": -0.34168320894241333, |
|
"logits/chosen": 371.95068359375, |
|
"logits/rejected": 400.94305419921875, |
|
"logps/chosen": -0.6090874075889587, |
|
"logps/rejected": -1.2537710666656494, |
|
"loss": 1.0413, |
|
"nll_loss": 0.9631906747817993, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.030454367399215698, |
|
"rewards/margins": 0.03223418444395065, |
|
"rewards/rejected": -0.06268856674432755, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2816691505216096, |
|
"grad_norm": 22.73797035217285, |
|
"learning_rate": 3.409971697352368e-06, |
|
"log_odds_chosen": 1.0536540746688843, |
|
"log_odds_ratio": -0.3665863871574402, |
|
"logits/chosen": 392.6047058105469, |
|
"logits/rejected": 377.4068603515625, |
|
"logps/chosen": -0.7370086908340454, |
|
"logps/rejected": -1.3404157161712646, |
|
"loss": 1.0487, |
|
"nll_loss": 1.0565564632415771, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.03685043752193451, |
|
"rewards/margins": 0.030170351266860962, |
|
"rewards/rejected": -0.06702078878879547, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 13.09876537322998, |
|
"learning_rate": 3.3709993123162106e-06, |
|
"log_odds_chosen": 0.7300616502761841, |
|
"log_odds_ratio": -0.4766615033149719, |
|
"logits/chosen": 384.1726989746094, |
|
"logits/rejected": 378.66851806640625, |
|
"logps/chosen": -0.7808234691619873, |
|
"logps/rejected": -1.1460365056991577, |
|
"loss": 1.0819, |
|
"nll_loss": 1.038731336593628, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0390411801636219, |
|
"rewards/margins": 0.0182606503367424, |
|
"rewards/rejected": -0.057301826775074005, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3412816691505216, |
|
"grad_norm": 29.453706741333008, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"log_odds_chosen": 0.4621034562587738, |
|
"log_odds_ratio": -0.5440367460250854, |
|
"logits/chosen": 385.5031433105469, |
|
"logits/rejected": 378.17987060546875, |
|
"logps/chosen": -0.8730419278144836, |
|
"logps/rejected": -1.139762043952942, |
|
"loss": 1.0496, |
|
"nll_loss": 1.1089845895767212, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04365209490060806, |
|
"rewards/margins": 0.01333601027727127, |
|
"rewards/rejected": -0.056988101452589035, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.3710879284649775, |
|
"grad_norm": 24.137882232666016, |
|
"learning_rate": 3.296902366978936e-06, |
|
"log_odds_chosen": 1.0880992412567139, |
|
"log_odds_ratio": -0.37469881772994995, |
|
"logits/chosen": 356.7733459472656, |
|
"logits/rejected": 376.2106628417969, |
|
"logps/chosen": -0.6375613808631897, |
|
"logps/rejected": -1.2090116739273071, |
|
"loss": 1.0368, |
|
"nll_loss": 0.927442729473114, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.031878065317869186, |
|
"rewards/margins": 0.02857252024114132, |
|
"rewards/rejected": -0.06045059114694595, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4008941877794336, |
|
"grad_norm": 19.043012619018555, |
|
"learning_rate": 3.2616403652672114e-06, |
|
"log_odds_chosen": 1.1069047451019287, |
|
"log_odds_ratio": -0.39715421199798584, |
|
"logits/chosen": 377.45684814453125, |
|
"logits/rejected": 391.23175048828125, |
|
"logps/chosen": -0.6500628590583801, |
|
"logps/rejected": -1.3308535814285278, |
|
"loss": 1.0109, |
|
"nll_loss": 0.9406328201293945, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.032503142952919006, |
|
"rewards/margins": 0.034039538353681564, |
|
"rewards/rejected": -0.06654268503189087, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.4307004470938898, |
|
"grad_norm": 18.564252853393555, |
|
"learning_rate": 3.2274861218395142e-06, |
|
"log_odds_chosen": 0.8188554048538208, |
|
"log_odds_ratio": -0.4366012513637543, |
|
"logits/chosen": 400.0711364746094, |
|
"logits/rejected": 406.6979675292969, |
|
"logps/chosen": -0.7228484153747559, |
|
"logps/rejected": -1.1837232112884521, |
|
"loss": 1.0716, |
|
"nll_loss": 1.032801866531372, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03614242747426033, |
|
"rewards/margins": 0.023043744266033173, |
|
"rewards/rejected": -0.05918616056442261, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4605067064083457, |
|
"grad_norm": 13.215555191040039, |
|
"learning_rate": 3.1943828249997e-06, |
|
"log_odds_chosen": 0.9353200793266296, |
|
"log_odds_ratio": -0.4173661172389984, |
|
"logits/chosen": 397.68170166015625, |
|
"logits/rejected": 386.11883544921875, |
|
"logps/chosen": -0.6454007029533386, |
|
"logps/rejected": -1.1329607963562012, |
|
"loss": 1.0931, |
|
"nll_loss": 1.0978925228118896, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03227003663778305, |
|
"rewards/margins": 0.024378007277846336, |
|
"rewards/rejected": -0.056648045778274536, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4903129657228018, |
|
"grad_norm": 15.847436904907227, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 1.0629552602767944, |
|
"log_odds_ratio": -0.4346255660057068, |
|
"logits/chosen": 370.0399475097656, |
|
"logits/rejected": 377.7971496582031, |
|
"logps/chosen": -0.6677332520484924, |
|
"logps/rejected": -1.2528654336929321, |
|
"loss": 0.9948, |
|
"nll_loss": 0.9116696119308472, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03338665887713432, |
|
"rewards/margins": 0.029256608337163925, |
|
"rewards/rejected": -0.06264327466487885, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.520119225037258, |
|
"grad_norm": 20.606616973876953, |
|
"learning_rate": 3.131121455425748e-06, |
|
"log_odds_chosen": 1.0881011486053467, |
|
"log_odds_ratio": -0.33976244926452637, |
|
"logits/chosen": 390.563720703125, |
|
"logits/rejected": 393.47064208984375, |
|
"logps/chosen": -0.6047049760818481, |
|
"logps/rejected": -1.1917129755020142, |
|
"loss": 1.0504, |
|
"nll_loss": 0.9429427981376648, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.030235249549150467, |
|
"rewards/margins": 0.0293504036962986, |
|
"rewards/rejected": -0.05958564952015877, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.5499254843517138, |
|
"grad_norm": 35.40441131591797, |
|
"learning_rate": 3.1008683647302113e-06, |
|
"log_odds_chosen": 0.8506741523742676, |
|
"log_odds_ratio": -0.4449694752693176, |
|
"logits/chosen": 372.16888427734375, |
|
"logits/rejected": 413.76153564453125, |
|
"logps/chosen": -0.8014513254165649, |
|
"logps/rejected": -1.3543529510498047, |
|
"loss": 1.0248, |
|
"nll_loss": 1.0251777172088623, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.040072567760944366, |
|
"rewards/margins": 0.027645081281661987, |
|
"rewards/rejected": -0.06771764904260635, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5797317436661698, |
|
"grad_norm": 13.316988945007324, |
|
"learning_rate": 3.0714755841697565e-06, |
|
"log_odds_chosen": 1.0472757816314697, |
|
"log_odds_ratio": -0.4307102560997009, |
|
"logits/chosen": 383.9051513671875, |
|
"logits/rejected": 406.1117248535156, |
|
"logps/chosen": -0.6818675398826599, |
|
"logps/rejected": -1.2686574459075928, |
|
"loss": 1.1204, |
|
"nll_loss": 1.0089762210845947, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.034093379974365234, |
|
"rewards/margins": 0.029339497908949852, |
|
"rewards/rejected": -0.06343287974596024, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6095380029806259, |
|
"grad_norm": 17.495107650756836, |
|
"learning_rate": 3.0429030972509227e-06, |
|
"log_odds_chosen": 0.9306485056877136, |
|
"log_odds_ratio": -0.4013773798942566, |
|
"logits/chosen": 370.3818054199219, |
|
"logits/rejected": 381.3802490234375, |
|
"logps/chosen": -0.7612948417663574, |
|
"logps/rejected": -1.283376932144165, |
|
"loss": 1.0864, |
|
"nll_loss": 1.1147105693817139, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03806474059820175, |
|
"rewards/margins": 0.026104098185896873, |
|
"rewards/rejected": -0.06416884064674377, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 14.062923431396484, |
|
"learning_rate": 3.0151134457776365e-06, |
|
"log_odds_chosen": 0.8347261548042297, |
|
"log_odds_ratio": -0.4390513002872467, |
|
"logits/chosen": 361.4908752441406, |
|
"logits/rejected": 350.319091796875, |
|
"logps/chosen": -0.6371272802352905, |
|
"logps/rejected": -1.0568915605545044, |
|
"loss": 1.0712, |
|
"nll_loss": 0.9875114560127258, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.031856365501880646, |
|
"rewards/margins": 0.020988214761018753, |
|
"rewards/rejected": -0.0528445765376091, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.669150521609538, |
|
"grad_norm": 14.235246658325195, |
|
"learning_rate": 2.988071523335984e-06, |
|
"log_odds_chosen": 0.8683498501777649, |
|
"log_odds_ratio": -0.5000298619270325, |
|
"logits/chosen": 403.158935546875, |
|
"logits/rejected": 391.2458190917969, |
|
"logps/chosen": -0.6794577240943909, |
|
"logps/rejected": -1.190443754196167, |
|
"loss": 1.0475, |
|
"nll_loss": 1.049759864807129, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.033972885459661484, |
|
"rewards/margins": 0.025549303740262985, |
|
"rewards/rejected": -0.05952219292521477, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.698956780923994, |
|
"grad_norm": 14.518519401550293, |
|
"learning_rate": 2.961744388795462e-06, |
|
"log_odds_chosen": 0.9579475522041321, |
|
"log_odds_ratio": -0.3945266008377075, |
|
"logits/chosen": 368.3428649902344, |
|
"logits/rejected": 374.80645751953125, |
|
"logps/chosen": -0.6118819117546082, |
|
"logps/rejected": -1.1229194402694702, |
|
"loss": 0.9917, |
|
"nll_loss": 0.9298090934753418, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.030594095587730408, |
|
"rewards/margins": 0.025551876053214073, |
|
"rewards/rejected": -0.05614597350358963, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7287630402384502, |
|
"grad_norm": 16.039731979370117, |
|
"learning_rate": 2.9361010975735177e-06, |
|
"log_odds_chosen": 0.8852699398994446, |
|
"log_odds_ratio": -0.41907158493995667, |
|
"logits/chosen": 385.8910217285156, |
|
"logits/rejected": 422.97454833984375, |
|
"logps/chosen": -0.8015801310539246, |
|
"logps/rejected": -1.3009235858917236, |
|
"loss": 1.0643, |
|
"nll_loss": 1.0100016593933105, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04007900878787041, |
|
"rewards/margins": 0.02496717870235443, |
|
"rewards/rejected": -0.06504618376493454, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.758569299552906, |
|
"grad_norm": 17.417320251464844, |
|
"learning_rate": 2.9111125486979104e-06, |
|
"log_odds_chosen": 0.8097723722457886, |
|
"log_odds_ratio": -0.4489704966545105, |
|
"logits/chosen": 363.5550231933594, |
|
"logits/rejected": 407.45367431640625, |
|
"logps/chosen": -0.7277875542640686, |
|
"logps/rejected": -1.1767876148223877, |
|
"loss": 1.0644, |
|
"nll_loss": 1.0175808668136597, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03638937696814537, |
|
"rewards/margins": 0.022450000047683716, |
|
"rewards/rejected": -0.058839380741119385, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.788375558867362, |
|
"grad_norm": 22.727943420410156, |
|
"learning_rate": 2.8867513459481293e-06, |
|
"log_odds_chosen": 1.2782224416732788, |
|
"log_odds_ratio": -0.3165340721607208, |
|
"logits/chosen": 403.18780517578125, |
|
"logits/rejected": 379.86224365234375, |
|
"logps/chosen": -0.6022372245788574, |
|
"logps/rejected": -1.2621891498565674, |
|
"loss": 1.0012, |
|
"nll_loss": 0.9228881597518921, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.03011186420917511, |
|
"rewards/margins": 0.032997600734233856, |
|
"rewards/rejected": -0.06310946494340897, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 13.393155097961426, |
|
"learning_rate": 2.862991671569341e-06, |
|
"log_odds_chosen": 0.5560621619224548, |
|
"log_odds_ratio": -0.5250486135482788, |
|
"logits/chosen": 394.03631591796875, |
|
"logits/rejected": 403.3617858886719, |
|
"logps/chosen": -0.9106165170669556, |
|
"logps/rejected": -1.2179043292999268, |
|
"loss": 1.0386, |
|
"nll_loss": 1.1626732349395752, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.045530833303928375, |
|
"rewards/margins": 0.015364391729235649, |
|
"rewards/rejected": -0.060895223170518875, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.8479880774962743, |
|
"grad_norm": 14.096085548400879, |
|
"learning_rate": 2.839809171235324e-06, |
|
"log_odds_chosen": 1.0126060247421265, |
|
"log_odds_ratio": -0.4341171383857727, |
|
"logits/chosen": 378.22705078125, |
|
"logits/rejected": 388.7279357910156, |
|
"logps/chosen": -0.6974117159843445, |
|
"logps/rejected": -1.3275178670883179, |
|
"loss": 1.0991, |
|
"nll_loss": 1.0783545970916748, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.034870583564043045, |
|
"rewards/margins": 0.03150530904531479, |
|
"rewards/rejected": -0.06637589633464813, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.8777943368107302, |
|
"grad_norm": 15.438323974609375, |
|
"learning_rate": 2.817180849095055e-06, |
|
"log_odds_chosen": 0.4888283610343933, |
|
"log_odds_ratio": -0.5892666578292847, |
|
"logits/chosen": 354.91192626953125, |
|
"logits/rejected": 373.19049072265625, |
|
"logps/chosen": -1.0054099559783936, |
|
"logps/rejected": -1.3448001146316528, |
|
"loss": 1.0997, |
|
"nll_loss": 1.2546958923339844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05027049034833908, |
|
"rewards/margins": 0.01696951314806938, |
|
"rewards/rejected": -0.06724000722169876, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.9076005961251863, |
|
"grad_norm": 15.382440567016602, |
|
"learning_rate": 2.7950849718747376e-06, |
|
"log_odds_chosen": 1.0956491231918335, |
|
"log_odds_ratio": -0.3748942017555237, |
|
"logits/chosen": 376.21466064453125, |
|
"logits/rejected": 396.38897705078125, |
|
"logps/chosen": -0.6471365690231323, |
|
"logps/rejected": -1.257728934288025, |
|
"loss": 0.986, |
|
"nll_loss": 0.9363555908203125, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.032356828451156616, |
|
"rewards/margins": 0.03052961453795433, |
|
"rewards/rejected": -0.06288645416498184, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9374068554396424, |
|
"grad_norm": 36.433021545410156, |
|
"learning_rate": 2.773500981126146e-06, |
|
"log_odds_chosen": 1.154837965965271, |
|
"log_odds_ratio": -0.362586110830307, |
|
"logits/chosen": 373.2748107910156, |
|
"logits/rejected": 404.8694152832031, |
|
"logps/chosen": -0.705539882183075, |
|
"logps/rejected": -1.3716325759887695, |
|
"loss": 1.0139, |
|
"nll_loss": 0.9342381358146667, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.03527699410915375, |
|
"rewards/margins": 0.03330463916063309, |
|
"rewards/rejected": -0.06858162581920624, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 20.0263671875, |
|
"learning_rate": 2.752409412815902e-06, |
|
"log_odds_chosen": 0.8623636960983276, |
|
"log_odds_ratio": -0.414236456155777, |
|
"logits/chosen": 370.912841796875, |
|
"logits/rejected": 377.9576721191406, |
|
"logps/chosen": -0.7194432020187378, |
|
"logps/rejected": -1.2195098400115967, |
|
"loss": 1.0256, |
|
"nll_loss": 0.8793627023696899, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03597215935587883, |
|
"rewards/margins": 0.025003332644701004, |
|
"rewards/rejected": -0.060975492000579834, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9970193740685542, |
|
"grad_norm": 24.618507385253906, |
|
"learning_rate": 2.7317918235407652e-06, |
|
"log_odds_chosen": 0.5057398080825806, |
|
"log_odds_ratio": -0.5592184662818909, |
|
"logits/chosen": 395.17340087890625, |
|
"logits/rejected": 387.1885986328125, |
|
"logps/chosen": -0.9086158871650696, |
|
"logps/rejected": -1.1841217279434204, |
|
"loss": 1.1098, |
|
"nll_loss": 1.2389247417449951, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04543079435825348, |
|
"rewards/margins": 0.013775287196040154, |
|
"rewards/rejected": -0.05920607969164848, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.9970193740685542, |
|
"eval_log_odds_chosen": 0.21048486232757568, |
|
"eval_log_odds_ratio": -0.7227855920791626, |
|
"eval_logits/chosen": 315.02960205078125, |
|
"eval_logits/rejected": 286.43115234375, |
|
"eval_logps/chosen": -1.0353137254714966, |
|
"eval_logps/rejected": -1.1580623388290405, |
|
"eval_loss": 1.4451346397399902, |
|
"eval_nll_loss": 1.3838590383529663, |
|
"eval_rewards/accuracies": 0.5467625856399536, |
|
"eval_rewards/chosen": -0.05176568776369095, |
|
"eval_rewards/margins": 0.006137436721473932, |
|
"eval_rewards/rejected": -0.05790312588214874, |
|
"eval_runtime": 112.1251, |
|
"eval_samples_per_second": 4.932, |
|
"eval_steps_per_second": 1.24, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.0268256333830106, |
|
"grad_norm": 17.44247817993164, |
|
"learning_rate": 2.711630722733202e-06, |
|
"log_odds_chosen": 1.980719804763794, |
|
"log_odds_ratio": -0.21638807654380798, |
|
"logits/chosen": 392.9175109863281, |
|
"logits/rejected": 369.302490234375, |
|
"logps/chosen": -0.39937111735343933, |
|
"logps/rejected": -1.395355224609375, |
|
"loss": 0.6343, |
|
"nll_loss": 0.7234522700309753, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.019968556240200996, |
|
"rewards/margins": 0.04979920759797096, |
|
"rewards/rejected": -0.06976776570081711, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0566318926974665, |
|
"grad_norm": 11.428772926330566, |
|
"learning_rate": 2.691909510290828e-06, |
|
"log_odds_chosen": 2.5441951751708984, |
|
"log_odds_ratio": -0.12063421308994293, |
|
"logits/chosen": 354.2935485839844, |
|
"logits/rejected": 359.0185852050781, |
|
"logps/chosen": -0.3628384470939636, |
|
"logps/rejected": -1.6579961776733398, |
|
"loss": 0.5571, |
|
"nll_loss": 0.5666171312332153, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01814192347228527, |
|
"rewards/margins": 0.06475789844989777, |
|
"rewards/rejected": -0.08289982378482819, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.0864381520119224, |
|
"grad_norm": 13.283677101135254, |
|
"learning_rate": 2.6726124191242444e-06, |
|
"log_odds_chosen": 2.592142343521118, |
|
"log_odds_ratio": -0.11488159000873566, |
|
"logits/chosen": 353.8732604980469, |
|
"logits/rejected": 388.585693359375, |
|
"logps/chosen": -0.3672012686729431, |
|
"logps/rejected": -1.8615690469741821, |
|
"loss": 0.5687, |
|
"nll_loss": 0.5486581921577454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.018360063433647156, |
|
"rewards/margins": 0.07471838593482971, |
|
"rewards/rejected": -0.09307844936847687, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1162444113263787, |
|
"grad_norm": 12.212410926818848, |
|
"learning_rate": 2.6537244621713765e-06, |
|
"log_odds_chosen": 2.209368944168091, |
|
"log_odds_ratio": -0.15512482821941376, |
|
"logits/chosen": 352.80633544921875, |
|
"logits/rejected": 371.6228942871094, |
|
"logps/chosen": -0.3736402690410614, |
|
"logps/rejected": -1.5454914569854736, |
|
"loss": 0.5485, |
|
"nll_loss": 0.609760582447052, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01868201419711113, |
|
"rewards/margins": 0.058592550456523895, |
|
"rewards/rejected": -0.07727457582950592, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.1460506706408347, |
|
"grad_norm": 12.874505043029785, |
|
"learning_rate": 2.6352313834736496e-06, |
|
"log_odds_chosen": 2.694078207015991, |
|
"log_odds_ratio": -0.11345534026622772, |
|
"logits/chosen": 355.081787109375, |
|
"logits/rejected": 400.65533447265625, |
|
"logps/chosen": -0.3401663899421692, |
|
"logps/rejected": -1.6482181549072266, |
|
"loss": 0.5505, |
|
"nll_loss": 0.5371311902999878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01700832135975361, |
|
"rewards/margins": 0.06540258973836899, |
|
"rewards/rejected": -0.08241091668605804, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.1758569299552906, |
|
"grad_norm": 12.150455474853516, |
|
"learning_rate": 2.6171196129510684e-06, |
|
"log_odds_chosen": 2.1292691230773926, |
|
"log_odds_ratio": -0.15649950504302979, |
|
"logits/chosen": 340.80157470703125, |
|
"logits/rejected": 330.2677001953125, |
|
"logps/chosen": -0.3447723984718323, |
|
"logps/rejected": -1.3634696006774902, |
|
"loss": 0.5401, |
|
"nll_loss": 0.5159801840782166, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.017238620668649673, |
|
"rewards/margins": 0.05093486234545708, |
|
"rewards/rejected": -0.06817348301410675, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.2056631892697465, |
|
"grad_norm": 15.934440612792969, |
|
"learning_rate": 2.599376224550182e-06, |
|
"log_odds_chosen": 2.0337166786193848, |
|
"log_odds_ratio": -0.19345471262931824, |
|
"logits/chosen": 315.1424560546875, |
|
"logits/rejected": 338.2904968261719, |
|
"logps/chosen": -0.3659020662307739, |
|
"logps/rejected": -1.4170308113098145, |
|
"loss": 0.5707, |
|
"nll_loss": 0.5888785719871521, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.018295101821422577, |
|
"rewards/margins": 0.052556443959474564, |
|
"rewards/rejected": -0.07085154205560684, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.235469448584203, |
|
"grad_norm": 13.303545951843262, |
|
"learning_rate": 2.5819888974716113e-06, |
|
"log_odds_chosen": 1.9749561548233032, |
|
"log_odds_ratio": -0.1846763789653778, |
|
"logits/chosen": 365.7724304199219, |
|
"logits/rejected": 387.26141357421875, |
|
"logps/chosen": -0.42183151841163635, |
|
"logps/rejected": -1.4507567882537842, |
|
"loss": 0.6027, |
|
"nll_loss": 0.5997955203056335, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.021091576665639877, |
|
"rewards/margins": 0.05144626647233963, |
|
"rewards/rejected": -0.07253783941268921, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.2652757078986587, |
|
"grad_norm": 18.135498046875, |
|
"learning_rate": 2.564945880212886e-06, |
|
"log_odds_chosen": 2.311295509338379, |
|
"log_odds_ratio": -0.12876024842262268, |
|
"logits/chosen": 364.0061950683594, |
|
"logits/rejected": 350.2301330566406, |
|
"logps/chosen": -0.29145348072052, |
|
"logps/rejected": -1.3336101770401, |
|
"loss": 0.5545, |
|
"nll_loss": 0.5340723991394043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.014572675339877605, |
|
"rewards/margins": 0.05210784077644348, |
|
"rewards/rejected": -0.06668051332235336, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2950819672131146, |
|
"grad_norm": 10.94619369506836, |
|
"learning_rate": 2.5482359571881276e-06, |
|
"log_odds_chosen": 2.5354793071746826, |
|
"log_odds_ratio": -0.115506611764431, |
|
"logits/chosen": 353.3926696777344, |
|
"logits/rejected": 348.86944580078125, |
|
"logps/chosen": -0.2818690240383148, |
|
"logps/rejected": -1.487006425857544, |
|
"loss": 0.5179, |
|
"nll_loss": 0.476929247379303, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.014093451201915741, |
|
"rewards/margins": 0.06025686860084534, |
|
"rewards/rejected": -0.07435031235218048, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.3248882265275705, |
|
"grad_norm": 12.89717960357666, |
|
"learning_rate": 2.5318484177091667e-06, |
|
"log_odds_chosen": 2.246914863586426, |
|
"log_odds_ratio": -0.13051298260688782, |
|
"logits/chosen": 370.3692626953125, |
|
"logits/rejected": 393.1583557128906, |
|
"logps/chosen": -0.37999650835990906, |
|
"logps/rejected": -1.5727269649505615, |
|
"loss": 0.5955, |
|
"nll_loss": 0.6084927320480347, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.018999826163053513, |
|
"rewards/margins": 0.05963651463389397, |
|
"rewards/rejected": -0.07863634079694748, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.354694485842027, |
|
"grad_norm": 9.882362365722656, |
|
"learning_rate": 2.515773027133138e-06, |
|
"log_odds_chosen": 2.3919968605041504, |
|
"log_odds_ratio": -0.13801579177379608, |
|
"logits/chosen": 369.07232666015625, |
|
"logits/rejected": 362.56475830078125, |
|
"logps/chosen": -0.2836388051509857, |
|
"logps/rejected": -1.353062391281128, |
|
"loss": 0.5206, |
|
"nll_loss": 0.473809152841568, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.014181938953697681, |
|
"rewards/margins": 0.05347117781639099, |
|
"rewards/rejected": -0.0676531195640564, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.384500745156483, |
|
"grad_norm": 20.866735458374023, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 2.305642604827881, |
|
"log_odds_ratio": -0.17361058294773102, |
|
"logits/chosen": 367.1854553222656, |
|
"logits/rejected": 388.62860107421875, |
|
"logps/chosen": -0.37132248282432556, |
|
"logps/rejected": -1.6480903625488281, |
|
"loss": 0.5804, |
|
"nll_loss": 0.5412487387657166, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.018566126003861427, |
|
"rewards/margins": 0.0638383999466896, |
|
"rewards/rejected": -0.08240451663732529, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.4143070044709387, |
|
"grad_norm": 17.410255432128906, |
|
"learning_rate": 2.484519974999767e-06, |
|
"log_odds_chosen": 2.341656446456909, |
|
"log_odds_ratio": -0.18742091953754425, |
|
"logits/chosen": 417.4825744628906, |
|
"logits/rejected": 384.49346923828125, |
|
"logps/chosen": -0.38954219222068787, |
|
"logps/rejected": -1.552782416343689, |
|
"loss": 0.5795, |
|
"nll_loss": 0.5449979305267334, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.019477110356092453, |
|
"rewards/margins": 0.05816201493144035, |
|
"rewards/rejected": -0.07763911783695221, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.444113263785395, |
|
"grad_norm": 11.311455726623535, |
|
"learning_rate": 2.4693239916239746e-06, |
|
"log_odds_chosen": 2.352574586868286, |
|
"log_odds_ratio": -0.18433162569999695, |
|
"logits/chosen": 365.95965576171875, |
|
"logits/rejected": 380.1703186035156, |
|
"logps/chosen": -0.37695974111557007, |
|
"logps/rejected": -1.5367991924285889, |
|
"loss": 0.5696, |
|
"nll_loss": 0.5719352960586548, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018847983330488205, |
|
"rewards/margins": 0.05799197405576706, |
|
"rewards/rejected": -0.07683996111154556, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.473919523099851, |
|
"grad_norm": 11.967494010925293, |
|
"learning_rate": 2.4544034683690802e-06, |
|
"log_odds_chosen": 2.2503182888031006, |
|
"log_odds_ratio": -0.15851208567619324, |
|
"logits/chosen": 364.34222412109375, |
|
"logits/rejected": 394.3598327636719, |
|
"logps/chosen": -0.3465135991573334, |
|
"logps/rejected": -1.4553066492080688, |
|
"loss": 0.5766, |
|
"nll_loss": 0.5365554690361023, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01732568070292473, |
|
"rewards/margins": 0.05543965846300125, |
|
"rewards/rejected": -0.07276533544063568, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.503725782414307, |
|
"grad_norm": 11.675920486450195, |
|
"learning_rate": 2.4397501823713327e-06, |
|
"log_odds_chosen": 2.0490882396698, |
|
"log_odds_ratio": -0.1818782538175583, |
|
"logits/chosen": 367.0909423828125, |
|
"logits/rejected": 343.985107421875, |
|
"logps/chosen": -0.36017632484436035, |
|
"logps/rejected": -1.39711594581604, |
|
"loss": 0.554, |
|
"nll_loss": 0.6418091654777527, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018008816987276077, |
|
"rewards/margins": 0.05184697359800339, |
|
"rewards/rejected": -0.06985578685998917, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.533532041728763, |
|
"grad_norm": 11.233902931213379, |
|
"learning_rate": 2.4253562503633297e-06, |
|
"log_odds_chosen": 2.5332672595977783, |
|
"log_odds_ratio": -0.10215308517217636, |
|
"logits/chosen": 365.8087463378906, |
|
"logits/rejected": 362.74371337890625, |
|
"logps/chosen": -0.3472338318824768, |
|
"logps/rejected": -1.7049144506454468, |
|
"loss": 0.5363, |
|
"nll_loss": 0.5403138399124146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01736168935894966, |
|
"rewards/margins": 0.06788404285907745, |
|
"rewards/rejected": -0.08524572849273682, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.563338301043219, |
|
"grad_norm": 16.26917266845703, |
|
"learning_rate": 2.411214110852061e-06, |
|
"log_odds_chosen": 2.512302875518799, |
|
"log_odds_ratio": -0.1274806559085846, |
|
"logits/chosen": 365.8606262207031, |
|
"logits/rejected": 377.60894775390625, |
|
"logps/chosen": -0.30852970480918884, |
|
"logps/rejected": -1.5747673511505127, |
|
"loss": 0.551, |
|
"nll_loss": 0.5144289135932922, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015426484867930412, |
|
"rewards/margins": 0.06331188976764679, |
|
"rewards/rejected": -0.07873837649822235, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.593144560357675, |
|
"grad_norm": 13.473649024963379, |
|
"learning_rate": 2.3973165074269213e-06, |
|
"log_odds_chosen": 2.2823190689086914, |
|
"log_odds_ratio": -0.1513710767030716, |
|
"logits/chosen": 372.6357421875, |
|
"logits/rejected": 341.8959045410156, |
|
"logps/chosen": -0.3947034776210785, |
|
"logps/rejected": -1.5539586544036865, |
|
"loss": 0.5703, |
|
"nll_loss": 0.5524027943611145, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.019735176116228104, |
|
"rewards/margins": 0.05796275660395622, |
|
"rewards/rejected": -0.07769793272018433, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.6229508196721314, |
|
"grad_norm": 15.039813041687012, |
|
"learning_rate": 2.3836564731139807e-06, |
|
"log_odds_chosen": 2.4606306552886963, |
|
"log_odds_ratio": -0.11453738063573837, |
|
"logits/chosen": 356.7464599609375, |
|
"logits/rejected": 367.7265930175781, |
|
"logps/chosen": -0.27872234582901, |
|
"logps/rejected": -1.4566452503204346, |
|
"loss": 0.5883, |
|
"nll_loss": 0.5446338653564453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0139361172914505, |
|
"rewards/margins": 0.05889614298939705, |
|
"rewards/rejected": -0.07283225655555725, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.6527570789865873, |
|
"grad_norm": 13.054855346679688, |
|
"learning_rate": 2.3702273156998867e-06, |
|
"log_odds_chosen": 2.510906219482422, |
|
"log_odds_ratio": -0.11371259391307831, |
|
"logits/chosen": 337.3484191894531, |
|
"logits/rejected": 373.2784423828125, |
|
"logps/chosen": -0.35476621985435486, |
|
"logps/rejected": -1.7162315845489502, |
|
"loss": 0.5632, |
|
"nll_loss": 0.5669391751289368, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.017738312482833862, |
|
"rewards/margins": 0.06807325780391693, |
|
"rewards/rejected": -0.08581157773733139, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.682563338301043, |
|
"grad_norm": 12.158041954040527, |
|
"learning_rate": 2.357022603955159e-06, |
|
"log_odds_chosen": 2.407587766647339, |
|
"log_odds_ratio": -0.11502983421087265, |
|
"logits/chosen": 363.87554931640625, |
|
"logits/rejected": 364.67071533203125, |
|
"logps/chosen": -0.4218372404575348, |
|
"logps/rejected": -1.7667124271392822, |
|
"loss": 0.5905, |
|
"nll_loss": 0.5684647560119629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.02109185978770256, |
|
"rewards/margins": 0.06724376231431961, |
|
"rewards/rejected": -0.08833561837673187, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.712369597615499, |
|
"grad_norm": 14.808917045593262, |
|
"learning_rate": 2.3440361546924774e-06, |
|
"log_odds_chosen": 2.5720152854919434, |
|
"log_odds_ratio": -0.1182328313589096, |
|
"logits/chosen": 389.94683837890625, |
|
"logits/rejected": 369.15606689453125, |
|
"logps/chosen": -0.3745032250881195, |
|
"logps/rejected": -1.6708095073699951, |
|
"loss": 0.6099, |
|
"nll_loss": 0.5873192548751831, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018725162371993065, |
|
"rewards/margins": 0.06481531262397766, |
|
"rewards/rejected": -0.08354047685861588, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.742175856929955, |
|
"grad_norm": 15.351086616516113, |
|
"learning_rate": 2.3312620206007847e-06, |
|
"log_odds_chosen": 2.6212141513824463, |
|
"log_odds_ratio": -0.12157906591892242, |
|
"logits/chosen": 381.9286193847656, |
|
"logits/rejected": 401.04998779296875, |
|
"logps/chosen": -0.336896151304245, |
|
"logps/rejected": -1.7683613300323486, |
|
"loss": 0.5888, |
|
"nll_loss": 0.6308404207229614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01684480905532837, |
|
"rewards/margins": 0.07157325744628906, |
|
"rewards/rejected": -0.08841806650161743, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.7719821162444114, |
|
"grad_norm": 13.619884490966797, |
|
"learning_rate": 2.3186944788008413e-06, |
|
"log_odds_chosen": 2.487888813018799, |
|
"log_odds_ratio": -0.1321084201335907, |
|
"logits/chosen": 378.2283630371094, |
|
"logits/rejected": 382.45391845703125, |
|
"logps/chosen": -0.3096372187137604, |
|
"logps/rejected": -1.5240898132324219, |
|
"loss": 0.5702, |
|
"nll_loss": 0.5487266778945923, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015481861308217049, |
|
"rewards/margins": 0.060722626745700836, |
|
"rewards/rejected": -0.07620447874069214, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.8017883755588673, |
|
"grad_norm": 11.559633255004883, |
|
"learning_rate": 2.3063280200722128e-06, |
|
"log_odds_chosen": 2.1641154289245605, |
|
"log_odds_ratio": -0.19840756058692932, |
|
"logits/chosen": 382.61077880859375, |
|
"logits/rejected": 354.3682556152344, |
|
"logps/chosen": -0.408609539270401, |
|
"logps/rejected": -1.5154647827148438, |
|
"loss": 0.5838, |
|
"nll_loss": 0.5971536636352539, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.02043047733604908, |
|
"rewards/margins": 0.05534275621175766, |
|
"rewards/rejected": -0.07577323168516159, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.8315946348733236, |
|
"grad_norm": 12.24728012084961, |
|
"learning_rate": 2.2941573387056174e-06, |
|
"log_odds_chosen": 2.448145866394043, |
|
"log_odds_ratio": -0.14108145236968994, |
|
"logits/chosen": 352.58197021484375, |
|
"logits/rejected": 376.586181640625, |
|
"logps/chosen": -0.3962209224700928, |
|
"logps/rejected": -1.6542632579803467, |
|
"loss": 0.5532, |
|
"nll_loss": 0.5462762713432312, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01981104537844658, |
|
"rewards/margins": 0.06290213763713837, |
|
"rewards/rejected": -0.08271317183971405, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.8614008941877795, |
|
"grad_norm": 11.175488471984863, |
|
"learning_rate": 2.2821773229381924e-06, |
|
"log_odds_chosen": 2.349735736846924, |
|
"log_odds_ratio": -0.11864028871059418, |
|
"logits/chosen": 361.24639892578125, |
|
"logits/rejected": 402.2587585449219, |
|
"logps/chosen": -0.33937591314315796, |
|
"logps/rejected": -1.5452320575714111, |
|
"loss": 0.5077, |
|
"nll_loss": 0.4929002821445465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01696879416704178, |
|
"rewards/margins": 0.060292817652225494, |
|
"rewards/rejected": -0.07726161181926727, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.8912071535022354, |
|
"grad_norm": 12.026611328125, |
|
"learning_rate": 2.270383045932499e-06, |
|
"log_odds_chosen": 2.4791646003723145, |
|
"log_odds_ratio": -0.12428289651870728, |
|
"logits/chosen": 358.9771728515625, |
|
"logits/rejected": 380.42901611328125, |
|
"logps/chosen": -0.3796696364879608, |
|
"logps/rejected": -1.7137501239776611, |
|
"loss": 0.5514, |
|
"nll_loss": 0.5423077344894409, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01898348517715931, |
|
"rewards/margins": 0.06670401990413666, |
|
"rewards/rejected": -0.08568750321865082, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.9210134128166914, |
|
"grad_norm": 12.008419036865234, |
|
"learning_rate": 2.2587697572631284e-06, |
|
"log_odds_chosen": 2.308088541030884, |
|
"log_odds_ratio": -0.1636713743209839, |
|
"logits/chosen": 370.170654296875, |
|
"logits/rejected": 335.97857666015625, |
|
"logps/chosen": -0.41963282227516174, |
|
"logps/rejected": -1.61661696434021, |
|
"loss": 0.6122, |
|
"nll_loss": 0.5415998697280884, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.020981641486287117, |
|
"rewards/margins": 0.05984921008348465, |
|
"rewards/rejected": -0.08083084970712662, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.9508196721311473, |
|
"grad_norm": 11.31982135772705, |
|
"learning_rate": 2.2473328748774737e-06, |
|
"log_odds_chosen": 2.167809247970581, |
|
"log_odds_ratio": -0.174642875790596, |
|
"logits/chosen": 364.75048828125, |
|
"logits/rejected": 393.1929626464844, |
|
"logps/chosen": -0.4049316346645355, |
|
"logps/rejected": -1.4748752117156982, |
|
"loss": 0.5603, |
|
"nll_loss": 0.5929296612739563, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.020246583968400955, |
|
"rewards/margins": 0.05349717289209366, |
|
"rewards/rejected": -0.07374376058578491, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.9806259314456036, |
|
"grad_norm": 13.445329666137695, |
|
"learning_rate": 2.23606797749979e-06, |
|
"log_odds_chosen": 2.3928182125091553, |
|
"log_odds_ratio": -0.14649812877178192, |
|
"logits/chosen": 374.05535888671875, |
|
"logits/rejected": 372.2560119628906, |
|
"logps/chosen": -0.34778839349746704, |
|
"logps/rejected": -1.5306968688964844, |
|
"loss": 0.5921, |
|
"nll_loss": 0.5048509836196899, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.017389420419931412, |
|
"rewards/margins": 0.059145428240299225, |
|
"rewards/rejected": -0.07653484493494034, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.9865871833084947, |
|
"eval_log_odds_chosen": 0.28559842705726624, |
|
"eval_log_odds_ratio": -0.6970076560974121, |
|
"eval_logits/chosen": 297.1682434082031, |
|
"eval_logits/rejected": 268.0281982421875, |
|
"eval_logps/chosen": -1.1085351705551147, |
|
"eval_logps/rejected": -1.2919707298278809, |
|
"eval_loss": 1.5517091751098633, |
|
"eval_nll_loss": 1.4855411052703857, |
|
"eval_rewards/accuracies": 0.5611510872840881, |
|
"eval_rewards/chosen": -0.055426761507987976, |
|
"eval_rewards/margins": 0.009171773679554462, |
|
"eval_rewards/rejected": -0.06459853798151016, |
|
"eval_runtime": 112.1561, |
|
"eval_samples_per_second": 4.931, |
|
"eval_steps_per_second": 1.239, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.9865871833084947, |
|
"step": 501, |
|
"total_flos": 0.0, |
|
"train_loss": 1.4570662823027956, |
|
"train_runtime": 13599.7579, |
|
"train_samples_per_second": 1.183, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 501, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|