gemma-7b-orpo-low-quality / trainer_state.json
silviasapora's picture
Model save
3f81b20 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9865871833084947,
"eval_steps": 500,
"global_step": 501,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.029806259314456036,
"grad_norm": 1762.8857421875,
"learning_rate": 2.5000000000000004e-07,
"log_odds_chosen": -0.22333388030529022,
"log_odds_ratio": -1.0081762075424194,
"logits/chosen": 204.30679321289062,
"logits/rejected": 202.9920654296875,
"logps/chosen": -14.826652526855469,
"logps/rejected": -14.603320121765137,
"loss": 14.961,
"nll_loss": 14.546102523803711,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.7413326501846313,
"rewards/margins": -0.011166660115122795,
"rewards/rejected": -0.7301660776138306,
"step": 5
},
{
"epoch": 0.05961251862891207,
"grad_norm": 1195.5567626953125,
"learning_rate": 5.000000000000001e-07,
"log_odds_chosen": 0.25514093041419983,
"log_odds_ratio": -0.770182192325592,
"logits/chosen": 219.4593505859375,
"logits/rejected": 223.51095581054688,
"logps/chosen": -12.235333442687988,
"logps/rejected": -12.489803314208984,
"loss": 12.6124,
"nll_loss": 12.337944984436035,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.6117666363716125,
"rewards/margins": 0.012723559513688087,
"rewards/rejected": -0.6244901418685913,
"step": 10
},
{
"epoch": 0.08941877794336811,
"grad_norm": 721.7440185546875,
"learning_rate": 7.5e-07,
"log_odds_chosen": 0.04993244633078575,
"log_odds_ratio": -0.7743036150932312,
"logits/chosen": 281.7969055175781,
"logits/rejected": 260.814453125,
"logps/chosen": -7.967254638671875,
"logps/rejected": -8.01715087890625,
"loss": 8.2807,
"nll_loss": 7.958427429199219,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.3983627259731293,
"rewards/margins": 0.0024948143400251865,
"rewards/rejected": -0.4008575975894928,
"step": 15
},
{
"epoch": 0.11922503725782414,
"grad_norm": 213.13336181640625,
"learning_rate": 1.0000000000000002e-06,
"log_odds_chosen": -0.1490481197834015,
"log_odds_ratio": -0.95225590467453,
"logits/chosen": 280.4493103027344,
"logits/rejected": 274.66717529296875,
"logps/chosen": -5.374236583709717,
"logps/rejected": -5.226569175720215,
"loss": 5.4432,
"nll_loss": 5.450861930847168,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.2687118351459503,
"rewards/margins": -0.007383383810520172,
"rewards/rejected": -0.26132842898368835,
"step": 20
},
{
"epoch": 0.14903129657228018,
"grad_norm": 154.36373901367188,
"learning_rate": 1.25e-06,
"log_odds_chosen": -0.05349766090512276,
"log_odds_ratio": -0.8921065330505371,
"logits/chosen": 297.8148193359375,
"logits/rejected": 307.04766845703125,
"logps/chosen": -3.2826087474823,
"logps/rejected": -3.2111122608184814,
"loss": 3.5,
"nll_loss": 3.3887104988098145,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.16413041949272156,
"rewards/margins": -0.0035748339723795652,
"rewards/rejected": -0.16055560111999512,
"step": 25
},
{
"epoch": 0.17883755588673622,
"grad_norm": 80.20259094238281,
"learning_rate": 1.5e-06,
"log_odds_chosen": -0.07229617983102798,
"log_odds_ratio": -0.8916282653808594,
"logits/chosen": 345.52191162109375,
"logits/rejected": 374.13287353515625,
"logps/chosen": -2.6274566650390625,
"logps/rejected": -2.530172348022461,
"loss": 2.5601,
"nll_loss": 2.645339012145996,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.1313728392124176,
"rewards/margins": -0.004864226561039686,
"rewards/rejected": -0.12650862336158752,
"step": 30
},
{
"epoch": 0.20864381520119224,
"grad_norm": 41.495731353759766,
"learning_rate": 1.75e-06,
"log_odds_chosen": 0.1673038899898529,
"log_odds_ratio": -0.7395197153091431,
"logits/chosen": 379.2995300292969,
"logits/rejected": 367.61065673828125,
"logps/chosen": -1.7991399765014648,
"logps/rejected": -1.9078947305679321,
"loss": 2.1231,
"nll_loss": 1.9985812902450562,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.08995698392391205,
"rewards/margins": 0.0054377405904233456,
"rewards/rejected": -0.09539473056793213,
"step": 35
},
{
"epoch": 0.23845007451564829,
"grad_norm": 57.26367950439453,
"learning_rate": 2.0000000000000003e-06,
"log_odds_chosen": 0.02127310074865818,
"log_odds_ratio": -0.7780741453170776,
"logits/chosen": 371.747802734375,
"logits/rejected": 370.3223571777344,
"logps/chosen": -1.6784114837646484,
"logps/rejected": -1.6915397644042969,
"loss": 1.9474,
"nll_loss": 2.0377304553985596,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08392057567834854,
"rewards/margins": 0.0006564242066815495,
"rewards/rejected": -0.08457700163125992,
"step": 40
},
{
"epoch": 0.26825633383010433,
"grad_norm": 48.953094482421875,
"learning_rate": 2.25e-06,
"log_odds_chosen": 0.06037778779864311,
"log_odds_ratio": -0.7294493317604065,
"logits/chosen": 385.0721740722656,
"logits/rejected": 395.3931884765625,
"logps/chosen": -1.5469728708267212,
"logps/rejected": -1.5890170335769653,
"loss": 1.8679,
"nll_loss": 1.742649793624878,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.07734864205121994,
"rewards/margins": 0.0021022059954702854,
"rewards/rejected": -0.07945084571838379,
"step": 45
},
{
"epoch": 0.29806259314456035,
"grad_norm": 85.16621398925781,
"learning_rate": 2.5e-06,
"log_odds_chosen": 0.22148697078227997,
"log_odds_ratio": -0.6563897728919983,
"logits/chosen": 395.87554931640625,
"logits/rejected": 417.33563232421875,
"logps/chosen": -1.4042726755142212,
"logps/rejected": -1.5677330493927002,
"loss": 1.8511,
"nll_loss": 1.8633716106414795,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.07021363079547882,
"rewards/margins": 0.008173028007149696,
"rewards/rejected": -0.07838664948940277,
"step": 50
},
{
"epoch": 0.32786885245901637,
"grad_norm": 36.78052520751953,
"learning_rate": 2.7500000000000004e-06,
"log_odds_chosen": 0.04750330001115799,
"log_odds_ratio": -0.7403008341789246,
"logits/chosen": 383.05865478515625,
"logits/rejected": 376.47137451171875,
"logps/chosen": -1.4311497211456299,
"logps/rejected": -1.4584500789642334,
"loss": 1.8524,
"nll_loss": 1.9031813144683838,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.07155750691890717,
"rewards/margins": 0.0013650130713358521,
"rewards/rejected": -0.07292251288890839,
"step": 55
},
{
"epoch": 0.35767511177347244,
"grad_norm": 43.11362838745117,
"learning_rate": 3e-06,
"log_odds_chosen": 0.15154634416103363,
"log_odds_ratio": -0.6628466844558716,
"logits/chosen": 388.72491455078125,
"logits/rejected": 380.75030517578125,
"logps/chosen": -1.324789047241211,
"logps/rejected": -1.4295395612716675,
"loss": 1.6907,
"nll_loss": 1.754913568496704,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.06623945385217667,
"rewards/margins": 0.005237526725977659,
"rewards/rejected": -0.07147698104381561,
"step": 60
},
{
"epoch": 0.38748137108792846,
"grad_norm": 29.449420928955078,
"learning_rate": 3.2500000000000002e-06,
"log_odds_chosen": 0.0873890295624733,
"log_odds_ratio": -0.710555911064148,
"logits/chosen": 387.2967834472656,
"logits/rejected": 388.5743103027344,
"logps/chosen": -1.249342679977417,
"logps/rejected": -1.2920448780059814,
"loss": 1.5953,
"nll_loss": 1.5086474418640137,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.06246713548898697,
"rewards/margins": 0.0021351135801523924,
"rewards/rejected": -0.06460224092006683,
"step": 65
},
{
"epoch": 0.4172876304023845,
"grad_norm": 66.8738784790039,
"learning_rate": 3.5e-06,
"log_odds_chosen": 0.049095284193754196,
"log_odds_ratio": -0.7218947410583496,
"logits/chosen": 375.4095153808594,
"logits/rejected": 383.84027099609375,
"logps/chosen": -1.3798081874847412,
"logps/rejected": -1.4165852069854736,
"loss": 1.632,
"nll_loss": 1.642600655555725,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.06899039447307587,
"rewards/margins": 0.001838861615397036,
"rewards/rejected": -0.07082925736904144,
"step": 70
},
{
"epoch": 0.44709388971684055,
"grad_norm": 24.510610580444336,
"learning_rate": 3.7500000000000005e-06,
"log_odds_chosen": 0.21395280957221985,
"log_odds_ratio": -0.6359378099441528,
"logits/chosen": 395.4688415527344,
"logits/rejected": 382.9261169433594,
"logps/chosen": -1.1935937404632568,
"logps/rejected": -1.337820291519165,
"loss": 1.5629,
"nll_loss": 1.5003348588943481,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.05967969447374344,
"rewards/margins": 0.00721132755279541,
"rewards/rejected": -0.06689102202653885,
"step": 75
},
{
"epoch": 0.47690014903129657,
"grad_norm": 30.089900970458984,
"learning_rate": 4.000000000000001e-06,
"log_odds_chosen": 0.20370396971702576,
"log_odds_ratio": -0.6502530574798584,
"logits/chosen": 382.20904541015625,
"logits/rejected": 403.7727355957031,
"logps/chosen": -1.17880117893219,
"logps/rejected": -1.3107407093048096,
"loss": 1.5995,
"nll_loss": 1.6122217178344727,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.058940064162015915,
"rewards/margins": 0.006596976425498724,
"rewards/rejected": -0.06553704291582108,
"step": 80
},
{
"epoch": 0.5067064083457526,
"grad_norm": 165.75381469726562,
"learning_rate": 4.25e-06,
"log_odds_chosen": 0.07357416301965714,
"log_odds_ratio": -0.8076593279838562,
"logits/chosen": 408.95843505859375,
"logits/rejected": 394.03826904296875,
"logps/chosen": -1.4526355266571045,
"logps/rejected": -1.4595062732696533,
"loss": 1.6746,
"nll_loss": 1.7690614461898804,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.07263178378343582,
"rewards/margins": 0.0003435421676840633,
"rewards/rejected": -0.07297532260417938,
"step": 85
},
{
"epoch": 0.5365126676602087,
"grad_norm": 45.735618591308594,
"learning_rate": 4.5e-06,
"log_odds_chosen": 0.5337249040603638,
"log_odds_ratio": -0.5693989396095276,
"logits/chosen": 402.0947570800781,
"logits/rejected": 416.75689697265625,
"logps/chosen": -1.3862842321395874,
"logps/rejected": -1.796555757522583,
"loss": 1.5211,
"nll_loss": 1.5622494220733643,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.06931421905755997,
"rewards/margins": 0.020513568073511124,
"rewards/rejected": -0.0898277759552002,
"step": 90
},
{
"epoch": 0.5663189269746647,
"grad_norm": 43.20003890991211,
"learning_rate": 4.75e-06,
"log_odds_chosen": 0.18776021897792816,
"log_odds_ratio": -0.6678361892700195,
"logits/chosen": 367.4861145019531,
"logits/rejected": 380.6282958984375,
"logps/chosen": -1.1577775478363037,
"logps/rejected": -1.240468978881836,
"loss": 1.5718,
"nll_loss": 1.4726136922836304,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.05788888409733772,
"rewards/margins": 0.004134564660489559,
"rewards/rejected": -0.06202344968914986,
"step": 95
},
{
"epoch": 0.5961251862891207,
"grad_norm": 48.09437561035156,
"learning_rate": 5e-06,
"log_odds_chosen": 0.23021917045116425,
"log_odds_ratio": -0.6669245958328247,
"logits/chosen": 398.15692138671875,
"logits/rejected": 436.06280517578125,
"logps/chosen": -1.3762584924697876,
"logps/rejected": -1.5756226778030396,
"loss": 1.6621,
"nll_loss": 1.676337480545044,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.06881292164325714,
"rewards/margins": 0.009968215599656105,
"rewards/rejected": -0.0787811428308487,
"step": 100
},
{
"epoch": 0.6259314456035767,
"grad_norm": 27.461023330688477,
"learning_rate": 4.8795003647426654e-06,
"log_odds_chosen": 0.25321143865585327,
"log_odds_ratio": -0.6335381269454956,
"logits/chosen": 394.9198303222656,
"logits/rejected": 407.670166015625,
"logps/chosen": -1.1359978914260864,
"logps/rejected": -1.282949686050415,
"loss": 1.5569,
"nll_loss": 1.5841158628463745,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.05679989606142044,
"rewards/margins": 0.007347588427364826,
"rewards/rejected": -0.0641474798321724,
"step": 105
},
{
"epoch": 0.6557377049180327,
"grad_norm": 58.5862922668457,
"learning_rate": 4.767312946227961e-06,
"log_odds_chosen": 0.17413778603076935,
"log_odds_ratio": -0.6657994985580444,
"logits/chosen": 372.2387390136719,
"logits/rejected": 370.97259521484375,
"logps/chosen": -1.1112958192825317,
"logps/rejected": -1.2337472438812256,
"loss": 1.5196,
"nll_loss": 1.5138860940933228,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.05556480213999748,
"rewards/margins": 0.00612256396561861,
"rewards/rejected": -0.06168735772371292,
"step": 110
},
{
"epoch": 0.6855439642324889,
"grad_norm": 25.225566864013672,
"learning_rate": 4.662524041201569e-06,
"log_odds_chosen": 0.2932291030883789,
"log_odds_ratio": -0.6261448264122009,
"logits/chosen": 398.36285400390625,
"logits/rejected": 405.1409912109375,
"logps/chosen": -0.9624778032302856,
"logps/rejected": -1.100894570350647,
"loss": 1.4976,
"nll_loss": 1.4066407680511475,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.04812389984726906,
"rewards/margins": 0.0069208345375955105,
"rewards/rejected": -0.05504472926259041,
"step": 115
},
{
"epoch": 0.7153502235469449,
"grad_norm": 25.138811111450195,
"learning_rate": 4.564354645876385e-06,
"log_odds_chosen": 0.30031102895736694,
"log_odds_ratio": -0.6141648292541504,
"logits/chosen": 381.42999267578125,
"logits/rejected": 381.4985656738281,
"logps/chosen": -1.05239999294281,
"logps/rejected": -1.2082456350326538,
"loss": 1.5521,
"nll_loss": 1.5355098247528076,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.05262000486254692,
"rewards/margins": 0.007792273070663214,
"rewards/rejected": -0.06041227653622627,
"step": 120
},
{
"epoch": 0.7451564828614009,
"grad_norm": 19.848705291748047,
"learning_rate": 4.47213595499958e-06,
"log_odds_chosen": 0.05417771264910698,
"log_odds_ratio": -0.7723890542984009,
"logits/chosen": 375.4615173339844,
"logits/rejected": 388.3155517578125,
"logps/chosen": -1.1864535808563232,
"logps/rejected": -1.1864855289459229,
"loss": 1.4682,
"nll_loss": 1.473937749862671,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.05932268500328064,
"rewards/margins": 1.5988014183676569e-06,
"rewards/rejected": -0.05932428315281868,
"step": 125
},
{
"epoch": 0.7749627421758569,
"grad_norm": 30.878917694091797,
"learning_rate": 4.385290096535147e-06,
"log_odds_chosen": 0.1284504234790802,
"log_odds_ratio": -0.6890888214111328,
"logits/chosen": 400.09014892578125,
"logits/rejected": 389.0010070800781,
"logps/chosen": -1.1370112895965576,
"logps/rejected": -1.1725587844848633,
"loss": 1.5141,
"nll_loss": 1.4747650623321533,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.05685057118535042,
"rewards/margins": 0.0017773698782548308,
"rewards/rejected": -0.05862794071435928,
"step": 130
},
{
"epoch": 0.8047690014903129,
"grad_norm": 34.69911575317383,
"learning_rate": 4.303314829119352e-06,
"log_odds_chosen": 0.07419878244400024,
"log_odds_ratio": -0.7176602482795715,
"logits/chosen": 412.095703125,
"logits/rejected": 414.66827392578125,
"logps/chosen": -1.1232882738113403,
"logps/rejected": -1.1864019632339478,
"loss": 1.5359,
"nll_loss": 1.5837700366973877,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.05616441369056702,
"rewards/margins": 0.0031556878238916397,
"rewards/rejected": -0.059320103377103806,
"step": 135
},
{
"epoch": 0.834575260804769,
"grad_norm": 33.93345642089844,
"learning_rate": 4.2257712736425835e-06,
"log_odds_chosen": -0.04845789074897766,
"log_odds_ratio": -0.7893471121788025,
"logits/chosen": 398.22607421875,
"logits/rejected": 404.393798828125,
"logps/chosen": -1.119332194328308,
"logps/rejected": -1.0812653303146362,
"loss": 1.5122,
"nll_loss": 1.6213722229003906,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.05596661567687988,
"rewards/margins": -0.0019033461576327682,
"rewards/rejected": -0.05406326800584793,
"step": 140
},
{
"epoch": 0.8643815201192251,
"grad_norm": 22.562604904174805,
"learning_rate": 4.1522739926869985e-06,
"log_odds_chosen": -0.06688841432332993,
"log_odds_ratio": -0.7556332349777222,
"logits/chosen": 395.27984619140625,
"logits/rejected": 398.4122009277344,
"logps/chosen": -1.2002326250076294,
"logps/rejected": -1.1435927152633667,
"loss": 1.5121,
"nll_loss": 1.514585256576538,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.06001163646578789,
"rewards/margins": -0.0028319929260760546,
"rewards/rejected": -0.05717964097857475,
"step": 145
},
{
"epoch": 0.8941877794336811,
"grad_norm": 38.268333435058594,
"learning_rate": 4.082482904638631e-06,
"log_odds_chosen": 0.3597918152809143,
"log_odds_ratio": -0.5650432705879211,
"logits/chosen": 401.6814270019531,
"logits/rejected": 418.9139709472656,
"logps/chosen": -1.0605757236480713,
"logps/rejected": -1.296025037765503,
"loss": 1.4755,
"nll_loss": 1.387669324874878,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.053028784692287445,
"rewards/margins": 0.011772466823458672,
"rewards/rejected": -0.06480124592781067,
"step": 150
},
{
"epoch": 0.9239940387481371,
"grad_norm": 35.649044036865234,
"learning_rate": 4.016096644512495e-06,
"log_odds_chosen": 0.11360454559326172,
"log_odds_ratio": -0.6917680501937866,
"logits/chosen": 380.48785400390625,
"logits/rejected": 395.10772705078125,
"logps/chosen": -1.1738497018814087,
"logps/rejected": -1.2541792392730713,
"loss": 1.4352,
"nll_loss": 1.3315799236297607,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.058692485094070435,
"rewards/margins": 0.004016467835754156,
"rewards/rejected": -0.06270895153284073,
"step": 155
},
{
"epoch": 0.9538002980625931,
"grad_norm": 37.8629035949707,
"learning_rate": 3.952847075210474e-06,
"log_odds_chosen": 0.04191911593079567,
"log_odds_ratio": -0.7673999071121216,
"logits/chosen": 384.6130065917969,
"logits/rejected": 430.66485595703125,
"logps/chosen": -1.0005159378051758,
"logps/rejected": -1.0551975965499878,
"loss": 1.408,
"nll_loss": 1.3416965007781982,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.05002579838037491,
"rewards/margins": 0.00273408112116158,
"rewards/rejected": -0.05275987833738327,
"step": 160
},
{
"epoch": 0.9836065573770492,
"grad_norm": 19.95792007446289,
"learning_rate": 3.892494720807615e-06,
"log_odds_chosen": 0.05066202953457832,
"log_odds_ratio": -0.7182776927947998,
"logits/chosen": 395.8006591796875,
"logits/rejected": 408.99554443359375,
"logps/chosen": -1.0879595279693604,
"logps/rejected": -1.125816822052002,
"loss": 1.436,
"nll_loss": 1.3948609828948975,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.05439797043800354,
"rewards/margins": 0.0018928736681118608,
"rewards/rejected": -0.056290846318006516,
"step": 165
},
{
"epoch": 0.9955290611028316,
"eval_log_odds_chosen": 0.1983117312192917,
"eval_log_odds_ratio": -0.6895310282707214,
"eval_logits/chosen": 318.3812255859375,
"eval_logits/rejected": 288.9291687011719,
"eval_logps/chosen": -1.0157941579818726,
"eval_logps/rejected": -1.1419692039489746,
"eval_loss": 1.467863917350769,
"eval_nll_loss": 1.4121437072753906,
"eval_rewards/accuracies": 0.5467625856399536,
"eval_rewards/chosen": -0.05078971013426781,
"eval_rewards/margins": 0.006308753043413162,
"eval_rewards/rejected": -0.05709846317768097,
"eval_runtime": 112.1639,
"eval_samples_per_second": 4.93,
"eval_steps_per_second": 1.239,
"step": 167
},
{
"epoch": 1.0134128166915053,
"grad_norm": 16.564281463623047,
"learning_rate": 3.834824944236852e-06,
"log_odds_chosen": 0.39181432127952576,
"log_odds_ratio": -0.5932676196098328,
"logits/chosen": 378.3958435058594,
"logits/rejected": 403.1106262207031,
"logps/chosen": -0.9357401132583618,
"logps/rejected": -1.1598111391067505,
"loss": 1.2992,
"nll_loss": 1.1567914485931396,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.04678700119256973,
"rewards/margins": 0.011203557252883911,
"rewards/rejected": -0.05799056217074394,
"step": 170
},
{
"epoch": 1.0432190760059612,
"grad_norm": 24.491374969482422,
"learning_rate": 3.7796447300922724e-06,
"log_odds_chosen": 0.8750826120376587,
"log_odds_ratio": -0.42914777994155884,
"logits/chosen": 358.5318603515625,
"logits/rejected": 399.3114929199219,
"logps/chosen": -0.6476485133171082,
"logps/rejected": -1.1458537578582764,
"loss": 1.0769,
"nll_loss": 1.1138975620269775,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03238242492079735,
"rewards/margins": 0.02491025999188423,
"rewards/rejected": -0.05729268863797188,
"step": 175
},
{
"epoch": 1.0730253353204173,
"grad_norm": 22.750883102416992,
"learning_rate": 3.72677996249965e-06,
"log_odds_chosen": 0.8333228826522827,
"log_odds_ratio": -0.43526044487953186,
"logits/chosen": 354.4723205566406,
"logits/rejected": 329.74591064453125,
"logps/chosen": -0.789750874042511,
"logps/rejected": -1.287760853767395,
"loss": 1.132,
"nll_loss": 1.2151093482971191,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03948754072189331,
"rewards/margins": 0.02490049973130226,
"rewards/rejected": -0.06438804417848587,
"step": 180
},
{
"epoch": 1.1028315946348732,
"grad_norm": 20.229358673095703,
"learning_rate": 3.6760731104690393e-06,
"log_odds_chosen": 1.0057324171066284,
"log_odds_ratio": -0.3837296664714813,
"logits/chosen": 384.34808349609375,
"logits/rejected": 376.38800048828125,
"logps/chosen": -0.6548343896865845,
"logps/rejected": -1.1811447143554688,
"loss": 1.0221,
"nll_loss": 0.9857061505317688,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.032741717994213104,
"rewards/margins": 0.026315515860915184,
"rewards/rejected": -0.05905723571777344,
"step": 185
},
{
"epoch": 1.1326378539493294,
"grad_norm": 18.751834869384766,
"learning_rate": 3.6273812505500587e-06,
"log_odds_chosen": 0.6209810972213745,
"log_odds_ratio": -0.5106909275054932,
"logits/chosen": 358.50823974609375,
"logits/rejected": 404.4180603027344,
"logps/chosen": -0.7595417499542236,
"logps/rejected": -1.1261508464813232,
"loss": 1.0914,
"nll_loss": 1.0129649639129639,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.03797708824276924,
"rewards/margins": 0.018330451101064682,
"rewards/rejected": -0.05630754306912422,
"step": 190
},
{
"epoch": 1.1624441132637853,
"grad_norm": 20.339866638183594,
"learning_rate": 3.5805743701971648e-06,
"log_odds_chosen": 0.8648549914360046,
"log_odds_ratio": -0.40149006247520447,
"logits/chosen": 381.13031005859375,
"logits/rejected": 395.5570983886719,
"logps/chosen": -0.8033710718154907,
"logps/rejected": -1.2736122608184814,
"loss": 1.1227,
"nll_loss": 1.1343204975128174,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.04016854614019394,
"rewards/margins": 0.02351205423474312,
"rewards/rejected": -0.06368060410022736,
"step": 195
},
{
"epoch": 1.1922503725782414,
"grad_norm": 20.895063400268555,
"learning_rate": 3.5355339059327378e-06,
"log_odds_chosen": 0.9302545785903931,
"log_odds_ratio": -0.4023068845272064,
"logits/chosen": 408.6002197265625,
"logits/rejected": 393.536865234375,
"logps/chosen": -0.7376815676689148,
"logps/rejected": -1.2836555242538452,
"loss": 1.0834,
"nll_loss": 1.019555687904358,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03688408061861992,
"rewards/margins": 0.02729869820177555,
"rewards/rejected": -0.06418277323246002,
"step": 200
},
{
"epoch": 1.2220566318926975,
"grad_norm": 21.968069076538086,
"learning_rate": 3.4921514788478916e-06,
"log_odds_chosen": 1.1145693063735962,
"log_odds_ratio": -0.38622182607650757,
"logits/chosen": 364.79913330078125,
"logits/rejected": 359.30718994140625,
"logps/chosen": -0.6945966482162476,
"logps/rejected": -1.2616204023361206,
"loss": 1.0621,
"nll_loss": 1.079245686531067,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.03472983092069626,
"rewards/margins": 0.028351187705993652,
"rewards/rejected": -0.06308101862668991,
"step": 205
},
{
"epoch": 1.2518628912071534,
"grad_norm": 19.83363914489746,
"learning_rate": 3.450327796711771e-06,
"log_odds_chosen": 1.1763904094696045,
"log_odds_ratio": -0.34168320894241333,
"logits/chosen": 371.95068359375,
"logits/rejected": 400.94305419921875,
"logps/chosen": -0.6090874075889587,
"logps/rejected": -1.2537710666656494,
"loss": 1.0413,
"nll_loss": 0.9631906747817993,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.030454367399215698,
"rewards/margins": 0.03223418444395065,
"rewards/rejected": -0.06268856674432755,
"step": 210
},
{
"epoch": 1.2816691505216096,
"grad_norm": 22.73797035217285,
"learning_rate": 3.409971697352368e-06,
"log_odds_chosen": 1.0536540746688843,
"log_odds_ratio": -0.3665863871574402,
"logits/chosen": 392.6047058105469,
"logits/rejected": 377.4068603515625,
"logps/chosen": -0.7370086908340454,
"logps/rejected": -1.3404157161712646,
"loss": 1.0487,
"nll_loss": 1.0565564632415771,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.03685043752193451,
"rewards/margins": 0.030170351266860962,
"rewards/rejected": -0.06702078878879547,
"step": 215
},
{
"epoch": 1.3114754098360657,
"grad_norm": 13.09876537322998,
"learning_rate": 3.3709993123162106e-06,
"log_odds_chosen": 0.7300616502761841,
"log_odds_ratio": -0.4766615033149719,
"logits/chosen": 384.1726989746094,
"logits/rejected": 378.66851806640625,
"logps/chosen": -0.7808234691619873,
"logps/rejected": -1.1460365056991577,
"loss": 1.0819,
"nll_loss": 1.038731336593628,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.0390411801636219,
"rewards/margins": 0.0182606503367424,
"rewards/rejected": -0.057301826775074005,
"step": 220
},
{
"epoch": 1.3412816691505216,
"grad_norm": 29.453706741333008,
"learning_rate": 3.3333333333333333e-06,
"log_odds_chosen": 0.4621034562587738,
"log_odds_ratio": -0.5440367460250854,
"logits/chosen": 385.5031433105469,
"logits/rejected": 378.17987060546875,
"logps/chosen": -0.8730419278144836,
"logps/rejected": -1.139762043952942,
"loss": 1.0496,
"nll_loss": 1.1089845895767212,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.04365209490060806,
"rewards/margins": 0.01333601027727127,
"rewards/rejected": -0.056988101452589035,
"step": 225
},
{
"epoch": 1.3710879284649775,
"grad_norm": 24.137882232666016,
"learning_rate": 3.296902366978936e-06,
"log_odds_chosen": 1.0880992412567139,
"log_odds_ratio": -0.37469881772994995,
"logits/chosen": 356.7733459472656,
"logits/rejected": 376.2106628417969,
"logps/chosen": -0.6375613808631897,
"logps/rejected": -1.2090116739273071,
"loss": 1.0368,
"nll_loss": 0.927442729473114,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.031878065317869186,
"rewards/margins": 0.02857252024114132,
"rewards/rejected": -0.06045059114694595,
"step": 230
},
{
"epoch": 1.4008941877794336,
"grad_norm": 19.043012619018555,
"learning_rate": 3.2616403652672114e-06,
"log_odds_chosen": 1.1069047451019287,
"log_odds_ratio": -0.39715421199798584,
"logits/chosen": 377.45684814453125,
"logits/rejected": 391.23175048828125,
"logps/chosen": -0.6500628590583801,
"logps/rejected": -1.3308535814285278,
"loss": 1.0109,
"nll_loss": 0.9406328201293945,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.032503142952919006,
"rewards/margins": 0.034039538353681564,
"rewards/rejected": -0.06654268503189087,
"step": 235
},
{
"epoch": 1.4307004470938898,
"grad_norm": 18.564252853393555,
"learning_rate": 3.2274861218395142e-06,
"log_odds_chosen": 0.8188554048538208,
"log_odds_ratio": -0.4366012513637543,
"logits/chosen": 400.0711364746094,
"logits/rejected": 406.6979675292969,
"logps/chosen": -0.7228484153747559,
"logps/rejected": -1.1837232112884521,
"loss": 1.0716,
"nll_loss": 1.032801866531372,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.03614242747426033,
"rewards/margins": 0.023043744266033173,
"rewards/rejected": -0.05918616056442261,
"step": 240
},
{
"epoch": 1.4605067064083457,
"grad_norm": 13.215555191040039,
"learning_rate": 3.1943828249997e-06,
"log_odds_chosen": 0.9353200793266296,
"log_odds_ratio": -0.4173661172389984,
"logits/chosen": 397.68170166015625,
"logits/rejected": 386.11883544921875,
"logps/chosen": -0.6454007029533386,
"logps/rejected": -1.1329607963562012,
"loss": 1.0931,
"nll_loss": 1.0978925228118896,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03227003663778305,
"rewards/margins": 0.024378007277846336,
"rewards/rejected": -0.056648045778274536,
"step": 245
},
{
"epoch": 1.4903129657228018,
"grad_norm": 15.847436904907227,
"learning_rate": 3.1622776601683796e-06,
"log_odds_chosen": 1.0629552602767944,
"log_odds_ratio": -0.4346255660057068,
"logits/chosen": 370.0399475097656,
"logits/rejected": 377.7971496582031,
"logps/chosen": -0.6677332520484924,
"logps/rejected": -1.2528654336929321,
"loss": 0.9948,
"nll_loss": 0.9116696119308472,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03338665887713432,
"rewards/margins": 0.029256608337163925,
"rewards/rejected": -0.06264327466487885,
"step": 250
},
{
"epoch": 1.520119225037258,
"grad_norm": 20.606616973876953,
"learning_rate": 3.131121455425748e-06,
"log_odds_chosen": 1.0881011486053467,
"log_odds_ratio": -0.33976244926452637,
"logits/chosen": 390.563720703125,
"logits/rejected": 393.47064208984375,
"logps/chosen": -0.6047049760818481,
"logps/rejected": -1.1917129755020142,
"loss": 1.0504,
"nll_loss": 0.9429427981376648,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.030235249549150467,
"rewards/margins": 0.0293504036962986,
"rewards/rejected": -0.05958564952015877,
"step": 255
},
{
"epoch": 1.5499254843517138,
"grad_norm": 35.40441131591797,
"learning_rate": 3.1008683647302113e-06,
"log_odds_chosen": 0.8506741523742676,
"log_odds_ratio": -0.4449694752693176,
"logits/chosen": 372.16888427734375,
"logits/rejected": 413.76153564453125,
"logps/chosen": -0.8014513254165649,
"logps/rejected": -1.3543529510498047,
"loss": 1.0248,
"nll_loss": 1.0251777172088623,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.040072567760944366,
"rewards/margins": 0.027645081281661987,
"rewards/rejected": -0.06771764904260635,
"step": 260
},
{
"epoch": 1.5797317436661698,
"grad_norm": 13.316988945007324,
"learning_rate": 3.0714755841697565e-06,
"log_odds_chosen": 1.0472757816314697,
"log_odds_ratio": -0.4307102560997009,
"logits/chosen": 383.9051513671875,
"logits/rejected": 406.1117248535156,
"logps/chosen": -0.6818675398826599,
"logps/rejected": -1.2686574459075928,
"loss": 1.1204,
"nll_loss": 1.0089762210845947,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.034093379974365234,
"rewards/margins": 0.029339497908949852,
"rewards/rejected": -0.06343287974596024,
"step": 265
},
{
"epoch": 1.6095380029806259,
"grad_norm": 17.495107650756836,
"learning_rate": 3.0429030972509227e-06,
"log_odds_chosen": 0.9306485056877136,
"log_odds_ratio": -0.4013773798942566,
"logits/chosen": 370.3818054199219,
"logits/rejected": 381.3802490234375,
"logps/chosen": -0.7612948417663574,
"logps/rejected": -1.283376932144165,
"loss": 1.0864,
"nll_loss": 1.1147105693817139,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.03806474059820175,
"rewards/margins": 0.026104098185896873,
"rewards/rejected": -0.06416884064674377,
"step": 270
},
{
"epoch": 1.639344262295082,
"grad_norm": 14.062923431396484,
"learning_rate": 3.0151134457776365e-06,
"log_odds_chosen": 0.8347261548042297,
"log_odds_ratio": -0.4390513002872467,
"logits/chosen": 361.4908752441406,
"logits/rejected": 350.319091796875,
"logps/chosen": -0.6371272802352905,
"logps/rejected": -1.0568915605545044,
"loss": 1.0712,
"nll_loss": 0.9875114560127258,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.031856365501880646,
"rewards/margins": 0.020988214761018753,
"rewards/rejected": -0.0528445765376091,
"step": 275
},
{
"epoch": 1.669150521609538,
"grad_norm": 14.235246658325195,
"learning_rate": 2.988071523335984e-06,
"log_odds_chosen": 0.8683498501777649,
"log_odds_ratio": -0.5000298619270325,
"logits/chosen": 403.158935546875,
"logits/rejected": 391.2458190917969,
"logps/chosen": -0.6794577240943909,
"logps/rejected": -1.190443754196167,
"loss": 1.0475,
"nll_loss": 1.049759864807129,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.033972885459661484,
"rewards/margins": 0.025549303740262985,
"rewards/rejected": -0.05952219292521477,
"step": 280
},
{
"epoch": 1.698956780923994,
"grad_norm": 14.518519401550293,
"learning_rate": 2.961744388795462e-06,
"log_odds_chosen": 0.9579475522041321,
"log_odds_ratio": -0.3945266008377075,
"logits/chosen": 368.3428649902344,
"logits/rejected": 374.80645751953125,
"logps/chosen": -0.6118819117546082,
"logps/rejected": -1.1229194402694702,
"loss": 0.9917,
"nll_loss": 0.9298090934753418,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.030594095587730408,
"rewards/margins": 0.025551876053214073,
"rewards/rejected": -0.05614597350358963,
"step": 285
},
{
"epoch": 1.7287630402384502,
"grad_norm": 16.039731979370117,
"learning_rate": 2.9361010975735177e-06,
"log_odds_chosen": 0.8852699398994446,
"log_odds_ratio": -0.41907158493995667,
"logits/chosen": 385.8910217285156,
"logits/rejected": 422.97454833984375,
"logps/chosen": -0.8015801310539246,
"logps/rejected": -1.3009235858917236,
"loss": 1.0643,
"nll_loss": 1.0100016593933105,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.04007900878787041,
"rewards/margins": 0.02496717870235443,
"rewards/rejected": -0.06504618376493454,
"step": 290
},
{
"epoch": 1.758569299552906,
"grad_norm": 17.417320251464844,
"learning_rate": 2.9111125486979104e-06,
"log_odds_chosen": 0.8097723722457886,
"log_odds_ratio": -0.4489704966545105,
"logits/chosen": 363.5550231933594,
"logits/rejected": 407.45367431640625,
"logps/chosen": -0.7277875542640686,
"logps/rejected": -1.1767876148223877,
"loss": 1.0644,
"nll_loss": 1.0175808668136597,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03638937696814537,
"rewards/margins": 0.022450000047683716,
"rewards/rejected": -0.058839380741119385,
"step": 295
},
{
"epoch": 1.788375558867362,
"grad_norm": 22.727943420410156,
"learning_rate": 2.8867513459481293e-06,
"log_odds_chosen": 1.2782224416732788,
"log_odds_ratio": -0.3165340721607208,
"logits/chosen": 403.18780517578125,
"logits/rejected": 379.86224365234375,
"logps/chosen": -0.6022372245788574,
"logps/rejected": -1.2621891498565674,
"loss": 1.0012,
"nll_loss": 0.9228881597518921,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.03011186420917511,
"rewards/margins": 0.032997600734233856,
"rewards/rejected": -0.06310946494340897,
"step": 300
},
{
"epoch": 1.8181818181818183,
"grad_norm": 13.393155097961426,
"learning_rate": 2.862991671569341e-06,
"log_odds_chosen": 0.5560621619224548,
"log_odds_ratio": -0.5250486135482788,
"logits/chosen": 394.03631591796875,
"logits/rejected": 403.3617858886719,
"logps/chosen": -0.9106165170669556,
"logps/rejected": -1.2179043292999268,
"loss": 1.0386,
"nll_loss": 1.1626732349395752,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.045530833303928375,
"rewards/margins": 0.015364391729235649,
"rewards/rejected": -0.060895223170518875,
"step": 305
},
{
"epoch": 1.8479880774962743,
"grad_norm": 14.096085548400879,
"learning_rate": 2.839809171235324e-06,
"log_odds_chosen": 1.0126060247421265,
"log_odds_ratio": -0.4341171383857727,
"logits/chosen": 378.22705078125,
"logits/rejected": 388.7279357910156,
"logps/chosen": -0.6974117159843445,
"logps/rejected": -1.3275178670883179,
"loss": 1.0991,
"nll_loss": 1.0783545970916748,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.034870583564043045,
"rewards/margins": 0.03150530904531479,
"rewards/rejected": -0.06637589633464813,
"step": 310
},
{
"epoch": 1.8777943368107302,
"grad_norm": 15.438323974609375,
"learning_rate": 2.817180849095055e-06,
"log_odds_chosen": 0.4888283610343933,
"log_odds_ratio": -0.5892666578292847,
"logits/chosen": 354.91192626953125,
"logits/rejected": 373.19049072265625,
"logps/chosen": -1.0054099559783936,
"logps/rejected": -1.3448001146316528,
"loss": 1.0997,
"nll_loss": 1.2546958923339844,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.05027049034833908,
"rewards/margins": 0.01696951314806938,
"rewards/rejected": -0.06724000722169876,
"step": 315
},
{
"epoch": 1.9076005961251863,
"grad_norm": 15.382440567016602,
"learning_rate": 2.7950849718747376e-06,
"log_odds_chosen": 1.0956491231918335,
"log_odds_ratio": -0.3748942017555237,
"logits/chosen": 376.21466064453125,
"logits/rejected": 396.38897705078125,
"logps/chosen": -0.6471365690231323,
"logps/rejected": -1.257728934288025,
"loss": 0.986,
"nll_loss": 0.9363555908203125,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.032356828451156616,
"rewards/margins": 0.03052961453795433,
"rewards/rejected": -0.06288645416498184,
"step": 320
},
{
"epoch": 1.9374068554396424,
"grad_norm": 36.433021545410156,
"learning_rate": 2.773500981126146e-06,
"log_odds_chosen": 1.154837965965271,
"log_odds_ratio": -0.362586110830307,
"logits/chosen": 373.2748107910156,
"logits/rejected": 404.8694152832031,
"logps/chosen": -0.705539882183075,
"logps/rejected": -1.3716325759887695,
"loss": 1.0139,
"nll_loss": 0.9342381358146667,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.03527699410915375,
"rewards/margins": 0.03330463916063309,
"rewards/rejected": -0.06858162581920624,
"step": 325
},
{
"epoch": 1.9672131147540983,
"grad_norm": 20.0263671875,
"learning_rate": 2.752409412815902e-06,
"log_odds_chosen": 0.8623636960983276,
"log_odds_ratio": -0.414236456155777,
"logits/chosen": 370.912841796875,
"logits/rejected": 377.9576721191406,
"logps/chosen": -0.7194432020187378,
"logps/rejected": -1.2195098400115967,
"loss": 1.0256,
"nll_loss": 0.8793627023696899,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03597215935587883,
"rewards/margins": 0.025003332644701004,
"rewards/rejected": -0.060975492000579834,
"step": 330
},
{
"epoch": 1.9970193740685542,
"grad_norm": 24.618507385253906,
"learning_rate": 2.7317918235407652e-06,
"log_odds_chosen": 0.5057398080825806,
"log_odds_ratio": -0.5592184662818909,
"logits/chosen": 395.17340087890625,
"logits/rejected": 387.1885986328125,
"logps/chosen": -0.9086158871650696,
"logps/rejected": -1.1841217279434204,
"loss": 1.1098,
"nll_loss": 1.2389247417449951,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.04543079435825348,
"rewards/margins": 0.013775287196040154,
"rewards/rejected": -0.05920607969164848,
"step": 335
},
{
"epoch": 1.9970193740685542,
"eval_log_odds_chosen": 0.21048486232757568,
"eval_log_odds_ratio": -0.7227855920791626,
"eval_logits/chosen": 315.02960205078125,
"eval_logits/rejected": 286.43115234375,
"eval_logps/chosen": -1.0353137254714966,
"eval_logps/rejected": -1.1580623388290405,
"eval_loss": 1.4451346397399902,
"eval_nll_loss": 1.3838590383529663,
"eval_rewards/accuracies": 0.5467625856399536,
"eval_rewards/chosen": -0.05176568776369095,
"eval_rewards/margins": 0.006137436721473932,
"eval_rewards/rejected": -0.05790312588214874,
"eval_runtime": 112.1251,
"eval_samples_per_second": 4.932,
"eval_steps_per_second": 1.24,
"step": 335
},
{
"epoch": 2.0268256333830106,
"grad_norm": 17.44247817993164,
"learning_rate": 2.711630722733202e-06,
"log_odds_chosen": 1.980719804763794,
"log_odds_ratio": -0.21638807654380798,
"logits/chosen": 392.9175109863281,
"logits/rejected": 369.302490234375,
"logps/chosen": -0.39937111735343933,
"logps/rejected": -1.395355224609375,
"loss": 0.6343,
"nll_loss": 0.7234522700309753,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.019968556240200996,
"rewards/margins": 0.04979920759797096,
"rewards/rejected": -0.06976776570081711,
"step": 340
},
{
"epoch": 2.0566318926974665,
"grad_norm": 11.428772926330566,
"learning_rate": 2.691909510290828e-06,
"log_odds_chosen": 2.5441951751708984,
"log_odds_ratio": -0.12063421308994293,
"logits/chosen": 354.2935485839844,
"logits/rejected": 359.0185852050781,
"logps/chosen": -0.3628384470939636,
"logps/rejected": -1.6579961776733398,
"loss": 0.5571,
"nll_loss": 0.5666171312332153,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.01814192347228527,
"rewards/margins": 0.06475789844989777,
"rewards/rejected": -0.08289982378482819,
"step": 345
},
{
"epoch": 2.0864381520119224,
"grad_norm": 13.283677101135254,
"learning_rate": 2.6726124191242444e-06,
"log_odds_chosen": 2.592142343521118,
"log_odds_ratio": -0.11488159000873566,
"logits/chosen": 353.8732604980469,
"logits/rejected": 388.585693359375,
"logps/chosen": -0.3672012686729431,
"logps/rejected": -1.8615690469741821,
"loss": 0.5687,
"nll_loss": 0.5486581921577454,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.018360063433647156,
"rewards/margins": 0.07471838593482971,
"rewards/rejected": -0.09307844936847687,
"step": 350
},
{
"epoch": 2.1162444113263787,
"grad_norm": 12.212410926818848,
"learning_rate": 2.6537244621713765e-06,
"log_odds_chosen": 2.209368944168091,
"log_odds_ratio": -0.15512482821941376,
"logits/chosen": 352.80633544921875,
"logits/rejected": 371.6228942871094,
"logps/chosen": -0.3736402690410614,
"logps/rejected": -1.5454914569854736,
"loss": 0.5485,
"nll_loss": 0.609760582447052,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.01868201419711113,
"rewards/margins": 0.058592550456523895,
"rewards/rejected": -0.07727457582950592,
"step": 355
},
{
"epoch": 2.1460506706408347,
"grad_norm": 12.874505043029785,
"learning_rate": 2.6352313834736496e-06,
"log_odds_chosen": 2.694078207015991,
"log_odds_ratio": -0.11345534026622772,
"logits/chosen": 355.081787109375,
"logits/rejected": 400.65533447265625,
"logps/chosen": -0.3401663899421692,
"logps/rejected": -1.6482181549072266,
"loss": 0.5505,
"nll_loss": 0.5371311902999878,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01700832135975361,
"rewards/margins": 0.06540258973836899,
"rewards/rejected": -0.08241091668605804,
"step": 360
},
{
"epoch": 2.1758569299552906,
"grad_norm": 12.150455474853516,
"learning_rate": 2.6171196129510684e-06,
"log_odds_chosen": 2.1292691230773926,
"log_odds_ratio": -0.15649950504302979,
"logits/chosen": 340.80157470703125,
"logits/rejected": 330.2677001953125,
"logps/chosen": -0.3447723984718323,
"logps/rejected": -1.3634696006774902,
"loss": 0.5401,
"nll_loss": 0.5159801840782166,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.017238620668649673,
"rewards/margins": 0.05093486234545708,
"rewards/rejected": -0.06817348301410675,
"step": 365
},
{
"epoch": 2.2056631892697465,
"grad_norm": 15.934440612792969,
"learning_rate": 2.599376224550182e-06,
"log_odds_chosen": 2.0337166786193848,
"log_odds_ratio": -0.19345471262931824,
"logits/chosen": 315.1424560546875,
"logits/rejected": 338.2904968261719,
"logps/chosen": -0.3659020662307739,
"logps/rejected": -1.4170308113098145,
"loss": 0.5707,
"nll_loss": 0.5888785719871521,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.018295101821422577,
"rewards/margins": 0.052556443959474564,
"rewards/rejected": -0.07085154205560684,
"step": 370
},
{
"epoch": 2.235469448584203,
"grad_norm": 13.303545951843262,
"learning_rate": 2.5819888974716113e-06,
"log_odds_chosen": 1.9749561548233032,
"log_odds_ratio": -0.1846763789653778,
"logits/chosen": 365.7724304199219,
"logits/rejected": 387.26141357421875,
"logps/chosen": -0.42183151841163635,
"logps/rejected": -1.4507567882537842,
"loss": 0.6027,
"nll_loss": 0.5997955203056335,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.021091576665639877,
"rewards/margins": 0.05144626647233963,
"rewards/rejected": -0.07253783941268921,
"step": 375
},
{
"epoch": 2.2652757078986587,
"grad_norm": 18.135498046875,
"learning_rate": 2.564945880212886e-06,
"log_odds_chosen": 2.311295509338379,
"log_odds_ratio": -0.12876024842262268,
"logits/chosen": 364.0061950683594,
"logits/rejected": 350.2301330566406,
"logps/chosen": -0.29145348072052,
"logps/rejected": -1.3336101770401,
"loss": 0.5545,
"nll_loss": 0.5340723991394043,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.014572675339877605,
"rewards/margins": 0.05210784077644348,
"rewards/rejected": -0.06668051332235336,
"step": 380
},
{
"epoch": 2.2950819672131146,
"grad_norm": 10.94619369506836,
"learning_rate": 2.5482359571881276e-06,
"log_odds_chosen": 2.5354793071746826,
"log_odds_ratio": -0.115506611764431,
"logits/chosen": 353.3926696777344,
"logits/rejected": 348.86944580078125,
"logps/chosen": -0.2818690240383148,
"logps/rejected": -1.487006425857544,
"loss": 0.5179,
"nll_loss": 0.476929247379303,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.014093451201915741,
"rewards/margins": 0.06025686860084534,
"rewards/rejected": -0.07435031235218048,
"step": 385
},
{
"epoch": 2.3248882265275705,
"grad_norm": 12.89717960357666,
"learning_rate": 2.5318484177091667e-06,
"log_odds_chosen": 2.246914863586426,
"log_odds_ratio": -0.13051298260688782,
"logits/chosen": 370.3692626953125,
"logits/rejected": 393.1583557128906,
"logps/chosen": -0.37999650835990906,
"logps/rejected": -1.5727269649505615,
"loss": 0.5955,
"nll_loss": 0.6084927320480347,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.018999826163053513,
"rewards/margins": 0.05963651463389397,
"rewards/rejected": -0.07863634079694748,
"step": 390
},
{
"epoch": 2.354694485842027,
"grad_norm": 9.882362365722656,
"learning_rate": 2.515773027133138e-06,
"log_odds_chosen": 2.3919968605041504,
"log_odds_ratio": -0.13801579177379608,
"logits/chosen": 369.07232666015625,
"logits/rejected": 362.56475830078125,
"logps/chosen": -0.2836388051509857,
"logps/rejected": -1.353062391281128,
"loss": 0.5206,
"nll_loss": 0.473809152841568,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.014181938953697681,
"rewards/margins": 0.05347117781639099,
"rewards/rejected": -0.0676531195640564,
"step": 395
},
{
"epoch": 2.384500745156483,
"grad_norm": 20.866735458374023,
"learning_rate": 2.5e-06,
"log_odds_chosen": 2.305642604827881,
"log_odds_ratio": -0.17361058294773102,
"logits/chosen": 367.1854553222656,
"logits/rejected": 388.62860107421875,
"logps/chosen": -0.37132248282432556,
"logps/rejected": -1.6480903625488281,
"loss": 0.5804,
"nll_loss": 0.5412487387657166,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.018566126003861427,
"rewards/margins": 0.0638383999466896,
"rewards/rejected": -0.08240451663732529,
"step": 400
},
{
"epoch": 2.4143070044709387,
"grad_norm": 17.410255432128906,
"learning_rate": 2.484519974999767e-06,
"log_odds_chosen": 2.341656446456909,
"log_odds_ratio": -0.18742091953754425,
"logits/chosen": 417.4825744628906,
"logits/rejected": 384.49346923828125,
"logps/chosen": -0.38954219222068787,
"logps/rejected": -1.552782416343689,
"loss": 0.5795,
"nll_loss": 0.5449979305267334,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.019477110356092453,
"rewards/margins": 0.05816201493144035,
"rewards/rejected": -0.07763911783695221,
"step": 405
},
{
"epoch": 2.444113263785395,
"grad_norm": 11.311455726623535,
"learning_rate": 2.4693239916239746e-06,
"log_odds_chosen": 2.352574586868286,
"log_odds_ratio": -0.18433162569999695,
"logits/chosen": 365.95965576171875,
"logits/rejected": 380.1703186035156,
"logps/chosen": -0.37695974111557007,
"logps/rejected": -1.5367991924285889,
"loss": 0.5696,
"nll_loss": 0.5719352960586548,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018847983330488205,
"rewards/margins": 0.05799197405576706,
"rewards/rejected": -0.07683996111154556,
"step": 410
},
{
"epoch": 2.473919523099851,
"grad_norm": 11.967494010925293,
"learning_rate": 2.4544034683690802e-06,
"log_odds_chosen": 2.2503182888031006,
"log_odds_ratio": -0.15851208567619324,
"logits/chosen": 364.34222412109375,
"logits/rejected": 394.3598327636719,
"logps/chosen": -0.3465135991573334,
"logps/rejected": -1.4553066492080688,
"loss": 0.5766,
"nll_loss": 0.5365554690361023,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.01732568070292473,
"rewards/margins": 0.05543965846300125,
"rewards/rejected": -0.07276533544063568,
"step": 415
},
{
"epoch": 2.503725782414307,
"grad_norm": 11.675920486450195,
"learning_rate": 2.4397501823713327e-06,
"log_odds_chosen": 2.0490882396698,
"log_odds_ratio": -0.1818782538175583,
"logits/chosen": 367.0909423828125,
"logits/rejected": 343.985107421875,
"logps/chosen": -0.36017632484436035,
"logps/rejected": -1.39711594581604,
"loss": 0.554,
"nll_loss": 0.6418091654777527,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018008816987276077,
"rewards/margins": 0.05184697359800339,
"rewards/rejected": -0.06985578685998917,
"step": 420
},
{
"epoch": 2.533532041728763,
"grad_norm": 11.233902931213379,
"learning_rate": 2.4253562503633297e-06,
"log_odds_chosen": 2.5332672595977783,
"log_odds_ratio": -0.10215308517217636,
"logits/chosen": 365.8087463378906,
"logits/rejected": 362.74371337890625,
"logps/chosen": -0.3472338318824768,
"logps/rejected": -1.7049144506454468,
"loss": 0.5363,
"nll_loss": 0.5403138399124146,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01736168935894966,
"rewards/margins": 0.06788404285907745,
"rewards/rejected": -0.08524572849273682,
"step": 425
},
{
"epoch": 2.563338301043219,
"grad_norm": 16.26917266845703,
"learning_rate": 2.411214110852061e-06,
"log_odds_chosen": 2.512302875518799,
"log_odds_ratio": -0.1274806559085846,
"logits/chosen": 365.8606262207031,
"logits/rejected": 377.60894775390625,
"logps/chosen": -0.30852970480918884,
"logps/rejected": -1.5747673511505127,
"loss": 0.551,
"nll_loss": 0.5144289135932922,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.015426484867930412,
"rewards/margins": 0.06331188976764679,
"rewards/rejected": -0.07873837649822235,
"step": 430
},
{
"epoch": 2.593144560357675,
"grad_norm": 13.473649024963379,
"learning_rate": 2.3973165074269213e-06,
"log_odds_chosen": 2.2823190689086914,
"log_odds_ratio": -0.1513710767030716,
"logits/chosen": 372.6357421875,
"logits/rejected": 341.8959045410156,
"logps/chosen": -0.3947034776210785,
"logps/rejected": -1.5539586544036865,
"loss": 0.5703,
"nll_loss": 0.5524027943611145,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.019735176116228104,
"rewards/margins": 0.05796275660395622,
"rewards/rejected": -0.07769793272018433,
"step": 435
},
{
"epoch": 2.6229508196721314,
"grad_norm": 15.039813041687012,
"learning_rate": 2.3836564731139807e-06,
"log_odds_chosen": 2.4606306552886963,
"log_odds_ratio": -0.11453738063573837,
"logits/chosen": 356.7464599609375,
"logits/rejected": 367.7265930175781,
"logps/chosen": -0.27872234582901,
"logps/rejected": -1.4566452503204346,
"loss": 0.5883,
"nll_loss": 0.5446338653564453,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.0139361172914505,
"rewards/margins": 0.05889614298939705,
"rewards/rejected": -0.07283225655555725,
"step": 440
},
{
"epoch": 2.6527570789865873,
"grad_norm": 13.054855346679688,
"learning_rate": 2.3702273156998867e-06,
"log_odds_chosen": 2.510906219482422,
"log_odds_ratio": -0.11371259391307831,
"logits/chosen": 337.3484191894531,
"logits/rejected": 373.2784423828125,
"logps/chosen": -0.35476621985435486,
"logps/rejected": -1.7162315845489502,
"loss": 0.5632,
"nll_loss": 0.5669391751289368,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.017738312482833862,
"rewards/margins": 0.06807325780391693,
"rewards/rejected": -0.08581157773733139,
"step": 445
},
{
"epoch": 2.682563338301043,
"grad_norm": 12.158041954040527,
"learning_rate": 2.357022603955159e-06,
"log_odds_chosen": 2.407587766647339,
"log_odds_ratio": -0.11502983421087265,
"logits/chosen": 363.87554931640625,
"logits/rejected": 364.67071533203125,
"logps/chosen": -0.4218372404575348,
"logps/rejected": -1.7667124271392822,
"loss": 0.5905,
"nll_loss": 0.5684647560119629,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.02109185978770256,
"rewards/margins": 0.06724376231431961,
"rewards/rejected": -0.08833561837673187,
"step": 450
},
{
"epoch": 2.712369597615499,
"grad_norm": 14.808917045593262,
"learning_rate": 2.3440361546924774e-06,
"log_odds_chosen": 2.5720152854919434,
"log_odds_ratio": -0.1182328313589096,
"logits/chosen": 389.94683837890625,
"logits/rejected": 369.15606689453125,
"logps/chosen": -0.3745032250881195,
"logps/rejected": -1.6708095073699951,
"loss": 0.6099,
"nll_loss": 0.5873192548751831,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018725162371993065,
"rewards/margins": 0.06481531262397766,
"rewards/rejected": -0.08354047685861588,
"step": 455
},
{
"epoch": 2.742175856929955,
"grad_norm": 15.351086616516113,
"learning_rate": 2.3312620206007847e-06,
"log_odds_chosen": 2.6212141513824463,
"log_odds_ratio": -0.12157906591892242,
"logits/chosen": 381.9286193847656,
"logits/rejected": 401.04998779296875,
"logps/chosen": -0.336896151304245,
"logps/rejected": -1.7683613300323486,
"loss": 0.5888,
"nll_loss": 0.6308404207229614,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01684480905532837,
"rewards/margins": 0.07157325744628906,
"rewards/rejected": -0.08841806650161743,
"step": 460
},
{
"epoch": 2.7719821162444114,
"grad_norm": 13.619884490966797,
"learning_rate": 2.3186944788008413e-06,
"log_odds_chosen": 2.487888813018799,
"log_odds_ratio": -0.1321084201335907,
"logits/chosen": 378.2283630371094,
"logits/rejected": 382.45391845703125,
"logps/chosen": -0.3096372187137604,
"logps/rejected": -1.5240898132324219,
"loss": 0.5702,
"nll_loss": 0.5487266778945923,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.015481861308217049,
"rewards/margins": 0.060722626745700836,
"rewards/rejected": -0.07620447874069214,
"step": 465
},
{
"epoch": 2.8017883755588673,
"grad_norm": 11.559633255004883,
"learning_rate": 2.3063280200722128e-06,
"log_odds_chosen": 2.1641154289245605,
"log_odds_ratio": -0.19840756058692932,
"logits/chosen": 382.61077880859375,
"logits/rejected": 354.3682556152344,
"logps/chosen": -0.408609539270401,
"logps/rejected": -1.5154647827148438,
"loss": 0.5838,
"nll_loss": 0.5971536636352539,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02043047733604908,
"rewards/margins": 0.05534275621175766,
"rewards/rejected": -0.07577323168516159,
"step": 470
},
{
"epoch": 2.8315946348733236,
"grad_norm": 12.24728012084961,
"learning_rate": 2.2941573387056174e-06,
"log_odds_chosen": 2.448145866394043,
"log_odds_ratio": -0.14108145236968994,
"logits/chosen": 352.58197021484375,
"logits/rejected": 376.586181640625,
"logps/chosen": -0.3962209224700928,
"logps/rejected": -1.6542632579803467,
"loss": 0.5532,
"nll_loss": 0.5462762713432312,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.01981104537844658,
"rewards/margins": 0.06290213763713837,
"rewards/rejected": -0.08271317183971405,
"step": 475
},
{
"epoch": 2.8614008941877795,
"grad_norm": 11.175488471984863,
"learning_rate": 2.2821773229381924e-06,
"log_odds_chosen": 2.349735736846924,
"log_odds_ratio": -0.11864028871059418,
"logits/chosen": 361.24639892578125,
"logits/rejected": 402.2587585449219,
"logps/chosen": -0.33937591314315796,
"logps/rejected": -1.5452320575714111,
"loss": 0.5077,
"nll_loss": 0.4929002821445465,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01696879416704178,
"rewards/margins": 0.060292817652225494,
"rewards/rejected": -0.07726161181926727,
"step": 480
},
{
"epoch": 2.8912071535022354,
"grad_norm": 12.026611328125,
"learning_rate": 2.270383045932499e-06,
"log_odds_chosen": 2.4791646003723145,
"log_odds_ratio": -0.12428289651870728,
"logits/chosen": 358.9771728515625,
"logits/rejected": 380.42901611328125,
"logps/chosen": -0.3796696364879608,
"logps/rejected": -1.7137501239776611,
"loss": 0.5514,
"nll_loss": 0.5423077344894409,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.01898348517715931,
"rewards/margins": 0.06670401990413666,
"rewards/rejected": -0.08568750321865082,
"step": 485
},
{
"epoch": 2.9210134128166914,
"grad_norm": 12.008419036865234,
"learning_rate": 2.2587697572631284e-06,
"log_odds_chosen": 2.308088541030884,
"log_odds_ratio": -0.1636713743209839,
"logits/chosen": 370.170654296875,
"logits/rejected": 335.97857666015625,
"logps/chosen": -0.41963282227516174,
"logps/rejected": -1.61661696434021,
"loss": 0.6122,
"nll_loss": 0.5415998697280884,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.020981641486287117,
"rewards/margins": 0.05984921008348465,
"rewards/rejected": -0.08083084970712662,
"step": 490
},
{
"epoch": 2.9508196721311473,
"grad_norm": 11.31982135772705,
"learning_rate": 2.2473328748774737e-06,
"log_odds_chosen": 2.167809247970581,
"log_odds_ratio": -0.174642875790596,
"logits/chosen": 364.75048828125,
"logits/rejected": 393.1929626464844,
"logps/chosen": -0.4049316346645355,
"logps/rejected": -1.4748752117156982,
"loss": 0.5603,
"nll_loss": 0.5929296612739563,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.020246583968400955,
"rewards/margins": 0.05349717289209366,
"rewards/rejected": -0.07374376058578491,
"step": 495
},
{
"epoch": 2.9806259314456036,
"grad_norm": 13.445329666137695,
"learning_rate": 2.23606797749979e-06,
"log_odds_chosen": 2.3928182125091553,
"log_odds_ratio": -0.14649812877178192,
"logits/chosen": 374.05535888671875,
"logits/rejected": 372.2560119628906,
"logps/chosen": -0.34778839349746704,
"logps/rejected": -1.5306968688964844,
"loss": 0.5921,
"nll_loss": 0.5048509836196899,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.017389420419931412,
"rewards/margins": 0.059145428240299225,
"rewards/rejected": -0.07653484493494034,
"step": 500
},
{
"epoch": 2.9865871833084947,
"eval_log_odds_chosen": 0.28559842705726624,
"eval_log_odds_ratio": -0.6970076560974121,
"eval_logits/chosen": 297.1682434082031,
"eval_logits/rejected": 268.0281982421875,
"eval_logps/chosen": -1.1085351705551147,
"eval_logps/rejected": -1.2919707298278809,
"eval_loss": 1.5517091751098633,
"eval_nll_loss": 1.4855411052703857,
"eval_rewards/accuracies": 0.5611510872840881,
"eval_rewards/chosen": -0.055426761507987976,
"eval_rewards/margins": 0.009171773679554462,
"eval_rewards/rejected": -0.06459853798151016,
"eval_runtime": 112.1561,
"eval_samples_per_second": 4.931,
"eval_steps_per_second": 1.239,
"step": 501
},
{
"epoch": 2.9865871833084947,
"step": 501,
"total_flos": 0.0,
"train_loss": 1.4570662823027956,
"train_runtime": 13599.7579,
"train_samples_per_second": 1.183,
"train_steps_per_second": 0.037
}
],
"logging_steps": 5,
"max_steps": 501,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}