diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -10,1869 +10,1869 @@ "log_history": [ { "epoch": 0.029806259314456036, - "grad_norm": 1765.45556640625, + "grad_norm": 1762.8857421875, "learning_rate": 2.5000000000000004e-07, - "log_odds_chosen": -0.21943321824073792, - "log_odds_ratio": -1.0067085027694702, - "logits/chosen": 204.28456115722656, - "logits/rejected": 202.977294921875, - "logps/chosen": -14.824699401855469, - "logps/rejected": -14.605265617370605, - "loss": 14.9632, - "nll_loss": 14.546000480651855, + "log_odds_chosen": -0.22333388030529022, + "log_odds_ratio": -1.0081762075424194, + "logits/chosen": 204.30679321289062, + "logits/rejected": 202.9920654296875, + "logps/chosen": -14.826652526855469, + "logps/rejected": -14.603320121765137, + "loss": 14.961, + "nll_loss": 14.546102523803711, "rewards/accuracies": 0.3499999940395355, - "rewards/chosen": -0.7412349581718445, - "rewards/margins": -0.01097165048122406, - "rewards/rejected": -0.7302632927894592, + "rewards/chosen": -0.7413326501846313, + "rewards/margins": -0.011166660115122795, + "rewards/rejected": -0.7301660776138306, "step": 5 }, { "epoch": 0.05961251862891207, - "grad_norm": 1195.8741455078125, + "grad_norm": 1195.5567626953125, "learning_rate": 5.000000000000001e-07, - "log_odds_chosen": 0.24401184916496277, - "log_odds_ratio": -0.7723467946052551, - "logits/chosen": 219.5009307861328, - "logits/rejected": 223.572021484375, - "logps/chosen": -12.244219779968262, - "logps/rejected": -12.487574577331543, - "loss": 12.6127, - "nll_loss": 12.338577270507812, + "log_odds_chosen": 0.25514093041419983, + "log_odds_ratio": -0.770182192325592, + "logits/chosen": 219.4593505859375, + "logits/rejected": 223.51095581054688, + "logps/chosen": -12.235333442687988, + "logps/rejected": -12.489803314208984, + "loss": 12.6124, + "nll_loss": 12.337944984436035, "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.6122109293937683, - "rewards/margins": 0.012167713604867458, - "rewards/rejected": -0.6243786215782166, + "rewards/chosen": -0.6117666363716125, + "rewards/margins": 0.012723559513688087, + "rewards/rejected": -0.6244901418685913, "step": 10 }, { "epoch": 0.08941877794336811, - "grad_norm": 722.9285278320312, + "grad_norm": 721.7440185546875, "learning_rate": 7.5e-07, - "log_odds_chosen": 0.0473303496837616, - "log_odds_ratio": -0.7741748690605164, - "logits/chosen": 282.27947998046875, - "logits/rejected": 261.2786865234375, - "logps/chosen": -7.970606803894043, - "logps/rejected": -8.0178804397583, - "loss": 8.2789, - "nll_loss": 7.956001281738281, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.39853033423423767, - "rewards/margins": 0.002363653387874365, - "rewards/rejected": -0.40089401602745056, + "log_odds_chosen": 0.04993244633078575, + "log_odds_ratio": -0.7743036150932312, + "logits/chosen": 281.7969055175781, + "logits/rejected": 260.814453125, + "logps/chosen": -7.967254638671875, + "logps/rejected": -8.01715087890625, + "loss": 8.2807, + "nll_loss": 7.958427429199219, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.3983627259731293, + "rewards/margins": 0.0024948143400251865, + "rewards/rejected": -0.4008575975894928, "step": 15 }, { "epoch": 0.11922503725782414, - "grad_norm": 212.62242126464844, + "grad_norm": 213.13336181640625, "learning_rate": 1.0000000000000002e-06, - "log_odds_chosen": -0.15251407027244568, - "log_odds_ratio": -0.9524042010307312, - "logits/chosen": 281.0796813964844, - "logits/rejected": 275.33013916015625, - "logps/chosen": -5.375563621520996, - "logps/rejected": -5.224381446838379, - "loss": 5.4453, - "nll_loss": 5.453672885894775, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.2687782049179077, - "rewards/margins": -0.007559105753898621, - "rewards/rejected": -0.2612191140651703, + "log_odds_chosen": -0.1490481197834015, + "log_odds_ratio": -0.95225590467453, + "logits/chosen": 280.4493103027344, + "logits/rejected": 274.66717529296875, + "logps/chosen": -5.374236583709717, + "logps/rejected": -5.226569175720215, + "loss": 5.4432, + "nll_loss": 5.450861930847168, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.2687118351459503, + "rewards/margins": -0.007383383810520172, + "rewards/rejected": -0.26132842898368835, "step": 20 }, { "epoch": 0.14903129657228018, - "grad_norm": 166.2330322265625, + "grad_norm": 154.36373901367188, "learning_rate": 1.25e-06, - "log_odds_chosen": -0.04391743987798691, - "log_odds_ratio": -0.8879317045211792, - "logits/chosen": 299.25030517578125, - "logits/rejected": 308.5736389160156, - "logps/chosen": -3.281724452972412, - "logps/rejected": -3.2199606895446777, - "loss": 3.5013, - "nll_loss": 3.3902111053466797, + "log_odds_chosen": -0.05349766090512276, + "log_odds_ratio": -0.8921065330505371, + "logits/chosen": 297.8148193359375, + "logits/rejected": 307.04766845703125, + "logps/chosen": -3.2826087474823, + "logps/rejected": -3.2111122608184814, + "loss": 3.5, + "nll_loss": 3.3887104988098145, "rewards/accuracies": 0.5, - "rewards/chosen": -0.1640862375497818, - "rewards/margins": -0.0030881778802722692, - "rewards/rejected": -0.16099804639816284, + "rewards/chosen": -0.16413041949272156, + "rewards/margins": -0.0035748339723795652, + "rewards/rejected": -0.16055560111999512, "step": 25 }, { "epoch": 0.17883755588673622, - "grad_norm": 83.01959228515625, + "grad_norm": 80.20259094238281, "learning_rate": 1.5e-06, - "log_odds_chosen": -0.07733707875013351, - "log_odds_ratio": -0.8942793607711792, - "logits/chosen": 347.654052734375, - "logits/rejected": 376.1275329589844, - "logps/chosen": -2.622657537460327, - "logps/rejected": -2.5195186138153076, - "loss": 2.5561, - "nll_loss": 2.6379752159118652, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.13113287091255188, - "rewards/margins": -0.005156923085451126, - "rewards/rejected": -0.12597593665122986, + "log_odds_chosen": -0.07229617983102798, + "log_odds_ratio": -0.8916282653808594, + "logits/chosen": 345.52191162109375, + "logits/rejected": 374.13287353515625, + "logps/chosen": -2.6274566650390625, + "logps/rejected": -2.530172348022461, + "loss": 2.5601, + "nll_loss": 2.645339012145996, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1313728392124176, + "rewards/margins": -0.004864226561039686, + "rewards/rejected": -0.12650862336158752, "step": 30 }, { "epoch": 0.20864381520119224, - "grad_norm": 40.82948684692383, + "grad_norm": 41.495731353759766, "learning_rate": 1.75e-06, - "log_odds_chosen": 0.16575101017951965, - "log_odds_ratio": -0.7404494285583496, - "logits/chosen": 382.174072265625, - "logits/rejected": 370.3721008300781, - "logps/chosen": -1.8132009506225586, - "logps/rejected": -1.9216792583465576, - "loss": 2.1303, - "nll_loss": 2.0061001777648926, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.09066005051136017, - "rewards/margins": 0.005423928610980511, - "rewards/rejected": -0.09608397632837296, + "log_odds_chosen": 0.1673038899898529, + "log_odds_ratio": -0.7395197153091431, + "logits/chosen": 379.2995300292969, + "logits/rejected": 367.61065673828125, + "logps/chosen": -1.7991399765014648, + "logps/rejected": -1.9078947305679321, + "loss": 2.1231, + "nll_loss": 1.9985812902450562, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.08995698392391205, + "rewards/margins": 0.0054377405904233456, + "rewards/rejected": -0.09539473056793213, "step": 35 }, { "epoch": 0.23845007451564829, - "grad_norm": 373.0379333496094, + "grad_norm": 57.26367950439453, "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.017796561121940613, - "log_odds_ratio": -0.7689038515090942, - "logits/chosen": 372.22100830078125, - "logits/rejected": 370.50439453125, - "logps/chosen": -1.6518943309783936, - "logps/rejected": -1.6649363040924072, - "loss": 1.9486, - "nll_loss": 2.0397918224334717, - "rewards/accuracies": 0.4749999940395355, - "rewards/chosen": -0.08259471505880356, - "rewards/margins": 0.0006520989118143916, - "rewards/rejected": -0.08324681222438812, + "log_odds_chosen": 0.02127310074865818, + "log_odds_ratio": -0.7780741453170776, + "logits/chosen": 371.747802734375, + "logits/rejected": 370.3223571777344, + "logps/chosen": -1.6784114837646484, + "logps/rejected": -1.6915397644042969, + "loss": 1.9474, + "nll_loss": 2.0377304553985596, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08392057567834854, + "rewards/margins": 0.0006564242066815495, + "rewards/rejected": -0.08457700163125992, "step": 40 }, { "epoch": 0.26825633383010433, - "grad_norm": 45.907875061035156, + "grad_norm": 48.953094482421875, "learning_rate": 2.25e-06, - "log_odds_chosen": 0.027211258187890053, - "log_odds_ratio": -0.7474765777587891, - "logits/chosen": 388.0882873535156, - "logits/rejected": 397.65460205078125, - "logps/chosen": -1.570575475692749, - "logps/rejected": -1.5880815982818604, - "loss": 1.8867, - "nll_loss": 1.7669483423233032, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07852877676486969, - "rewards/margins": 0.0008753080619499087, - "rewards/rejected": -0.0794040784239769, + "log_odds_chosen": 0.06037778779864311, + "log_odds_ratio": -0.7294493317604065, + "logits/chosen": 385.0721740722656, + "logits/rejected": 395.3931884765625, + "logps/chosen": -1.5469728708267212, + "logps/rejected": -1.5890170335769653, + "loss": 1.8679, + "nll_loss": 1.742649793624878, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07734864205121994, + "rewards/margins": 0.0021022059954702854, + "rewards/rejected": -0.07945084571838379, "step": 45 }, { "epoch": 0.29806259314456035, - "grad_norm": 45.138648986816406, + "grad_norm": 85.16621398925781, "learning_rate": 2.5e-06, - "log_odds_chosen": 0.19176200032234192, - "log_odds_ratio": -0.6679073572158813, - "logits/chosen": 396.5473327636719, - "logits/rejected": 418.2545471191406, - "logps/chosen": -1.404476523399353, - "logps/rejected": -1.5453894138336182, - "loss": 1.8521, - "nll_loss": 1.8635737895965576, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.07022383064031601, - "rewards/margins": 0.00704564293846488, - "rewards/rejected": -0.07726947963237762, + "log_odds_chosen": 0.22148697078227997, + "log_odds_ratio": -0.6563897728919983, + "logits/chosen": 395.87554931640625, + "logits/rejected": 417.33563232421875, + "logps/chosen": -1.4042726755142212, + "logps/rejected": -1.5677330493927002, + "loss": 1.8511, + "nll_loss": 1.8633716106414795, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07021363079547882, + "rewards/margins": 0.008173028007149696, + "rewards/rejected": -0.07838664948940277, "step": 50 }, { "epoch": 0.32786885245901637, - "grad_norm": 146.7917938232422, + "grad_norm": 36.78052520751953, "learning_rate": 2.7500000000000004e-06, - "log_odds_chosen": 0.06669901311397552, - "log_odds_ratio": -0.7251878380775452, - "logits/chosen": 385.10101318359375, - "logits/rejected": 378.09368896484375, - "logps/chosen": -1.4211018085479736, - "logps/rejected": -1.4656105041503906, - "loss": 1.8795, - "nll_loss": 1.921286940574646, + "log_odds_chosen": 0.04750330001115799, + "log_odds_ratio": -0.7403008341789246, + "logits/chosen": 383.05865478515625, + "logits/rejected": 376.47137451171875, + "logps/chosen": -1.4311497211456299, + "logps/rejected": -1.4584500789642334, + "loss": 1.8524, + "nll_loss": 1.9031813144683838, "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.0710550919175148, - "rewards/margins": 0.0022254353389143944, - "rewards/rejected": -0.07328052818775177, + "rewards/chosen": -0.07155750691890717, + "rewards/margins": 0.0013650130713358521, + "rewards/rejected": -0.07292251288890839, "step": 55 }, { "epoch": 0.35767511177347244, - "grad_norm": 36.712623596191406, + "grad_norm": 43.11362838745117, "learning_rate": 3e-06, - "log_odds_chosen": 0.1147073283791542, - "log_odds_ratio": -0.6886881589889526, - "logits/chosen": 391.64190673828125, - "logits/rejected": 383.321044921875, - "logps/chosen": -1.381176471710205, - "logps/rejected": -1.4568852186203003, - "loss": 1.7236, - "nll_loss": 1.7853686809539795, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.06905882805585861, - "rewards/margins": 0.0037854425609111786, - "rewards/rejected": -0.07284426689147949, + "log_odds_chosen": 0.15154634416103363, + "log_odds_ratio": -0.6628466844558716, + "logits/chosen": 388.72491455078125, + "logits/rejected": 380.75030517578125, + "logps/chosen": -1.324789047241211, + "logps/rejected": -1.4295395612716675, + "loss": 1.6907, + "nll_loss": 1.754913568496704, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.06623945385217667, + "rewards/margins": 0.005237526725977659, + "rewards/rejected": -0.07147698104381561, "step": 60 }, { "epoch": 0.38748137108792846, - "grad_norm": 27.392560958862305, + "grad_norm": 29.449420928955078, "learning_rate": 3.2500000000000002e-06, - "log_odds_chosen": 0.0811905488371849, - "log_odds_ratio": -0.705346941947937, - "logits/chosen": 390.33514404296875, - "logits/rejected": 391.02215576171875, - "logps/chosen": -1.2655917406082153, - "logps/rejected": -1.3007347583770752, - "loss": 1.6207, - "nll_loss": 1.5275566577911377, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.06327958405017853, - "rewards/margins": 0.0017571467906236649, - "rewards/rejected": -0.06503672897815704, + "log_odds_chosen": 0.0873890295624733, + "log_odds_ratio": -0.710555911064148, + "logits/chosen": 387.2967834472656, + "logits/rejected": 388.5743103027344, + "logps/chosen": -1.249342679977417, + "logps/rejected": -1.2920448780059814, + "loss": 1.5953, + "nll_loss": 1.5086474418640137, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.06246713548898697, + "rewards/margins": 0.0021351135801523924, + "rewards/rejected": -0.06460224092006683, "step": 65 }, { "epoch": 0.4172876304023845, - "grad_norm": 108.5710678100586, + "grad_norm": 66.8738784790039, "learning_rate": 3.5e-06, - "log_odds_chosen": 0.030709872022271156, - "log_odds_ratio": -0.7311884760856628, - "logits/chosen": 374.34515380859375, - "logits/rejected": 382.85137939453125, - "logps/chosen": -1.3965779542922974, - "logps/rejected": -1.4153110980987549, - "loss": 1.6444, - "nll_loss": 1.6620601415634155, + "log_odds_chosen": 0.049095284193754196, + "log_odds_ratio": -0.7218947410583496, + "logits/chosen": 375.4095153808594, + "logits/rejected": 383.84027099609375, + "logps/chosen": -1.3798081874847412, + "logps/rejected": -1.4165852069854736, + "loss": 1.632, + "nll_loss": 1.642600655555725, "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.06982889771461487, - "rewards/margins": 0.0009366500889882445, - "rewards/rejected": -0.07076555490493774, + "rewards/chosen": -0.06899039447307587, + "rewards/margins": 0.001838861615397036, + "rewards/rejected": -0.07082925736904144, "step": 70 }, { "epoch": 0.44709388971684055, - "grad_norm": 42.35745620727539, + "grad_norm": 24.510610580444336, "learning_rate": 3.7500000000000005e-06, - "log_odds_chosen": 0.17296305298805237, - "log_odds_ratio": -0.6624878644943237, - "logits/chosen": 394.97998046875, - "logits/rejected": 382.9609069824219, - "logps/chosen": -1.2325050830841064, - "logps/rejected": -1.3494950532913208, - "loss": 1.593, - "nll_loss": 1.53190016746521, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.06162526085972786, - "rewards/margins": 0.005849492736160755, - "rewards/rejected": -0.06747475266456604, + "log_odds_chosen": 0.21395280957221985, + "log_odds_ratio": -0.6359378099441528, + "logits/chosen": 395.4688415527344, + "logits/rejected": 382.9261169433594, + "logps/chosen": -1.1935937404632568, + "logps/rejected": -1.337820291519165, + "loss": 1.5629, + "nll_loss": 1.5003348588943481, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.05967969447374344, + "rewards/margins": 0.00721132755279541, + "rewards/rejected": -0.06689102202653885, "step": 75 }, { "epoch": 0.47690014903129657, - "grad_norm": 36.82132339477539, + "grad_norm": 30.089900970458984, "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.17391765117645264, - "log_odds_ratio": -0.6597349643707275, - "logits/chosen": 381.92547607421875, - "logits/rejected": 404.1871643066406, - "logps/chosen": -1.219416856765747, - "logps/rejected": -1.3357045650482178, - "loss": 1.6131, - "nll_loss": 1.6360372304916382, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.06097083538770676, - "rewards/margins": 0.005814389791339636, - "rewards/rejected": -0.06678523123264313, + "log_odds_chosen": 0.20370396971702576, + "log_odds_ratio": -0.6502530574798584, + "logits/chosen": 382.20904541015625, + "logits/rejected": 403.7727355957031, + "logps/chosen": -1.17880117893219, + "logps/rejected": -1.3107407093048096, + "loss": 1.5995, + "nll_loss": 1.6122217178344727, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.058940064162015915, + "rewards/margins": 0.006596976425498724, + "rewards/rejected": -0.06553704291582108, "step": 80 }, { "epoch": 0.5067064083457526, - "grad_norm": 29.89981460571289, + "grad_norm": 165.75381469726562, "learning_rate": 4.25e-06, - "log_odds_chosen": 0.18893679976463318, - "log_odds_ratio": -0.6906715631484985, - "logits/chosen": 408.48101806640625, - "logits/rejected": 392.5835266113281, - "logps/chosen": -1.2594187259674072, - "logps/rejected": -1.3885504007339478, - "loss": 1.5956, - "nll_loss": 1.603137731552124, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.06297092139720917, - "rewards/margins": 0.006456589791923761, - "rewards/rejected": -0.06942752748727798, + "log_odds_chosen": 0.07357416301965714, + "log_odds_ratio": -0.8076593279838562, + "logits/chosen": 408.95843505859375, + "logits/rejected": 394.03826904296875, + "logps/chosen": -1.4526355266571045, + "logps/rejected": -1.4595062732696533, + "loss": 1.6746, + "nll_loss": 1.7690614461898804, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07263178378343582, + "rewards/margins": 0.0003435421676840633, + "rewards/rejected": -0.07297532260417938, "step": 85 }, { "epoch": 0.5365126676602087, - "grad_norm": 32.92945861816406, + "grad_norm": 45.735618591308594, "learning_rate": 4.5e-06, - "log_odds_chosen": 0.555855393409729, - "log_odds_ratio": -0.5900682806968689, - "logits/chosen": 401.2886657714844, - "logits/rejected": 416.2565002441406, - "logps/chosen": -1.208212971687317, - "logps/rejected": -1.6518011093139648, - "loss": 1.4631, - "nll_loss": 1.474485158920288, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06041065603494644, - "rewards/margins": 0.022179413586854935, - "rewards/rejected": -0.08259007334709167, + "log_odds_chosen": 0.5337249040603638, + "log_odds_ratio": -0.5693989396095276, + "logits/chosen": 402.0947570800781, + "logits/rejected": 416.75689697265625, + "logps/chosen": -1.3862842321395874, + "logps/rejected": -1.796555757522583, + "loss": 1.5211, + "nll_loss": 1.5622494220733643, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.06931421905755997, + "rewards/margins": 0.020513568073511124, + "rewards/rejected": -0.0898277759552002, "step": 90 }, { "epoch": 0.5663189269746647, - "grad_norm": 34.76884460449219, + "grad_norm": 43.20003890991211, "learning_rate": 4.75e-06, - "log_odds_chosen": 0.19581779837608337, - "log_odds_ratio": -0.6574069261550903, - "logits/chosen": 371.4412536621094, - "logits/rejected": 383.65155029296875, - "logps/chosen": -1.1392086744308472, - "logps/rejected": -1.2306644916534424, - "loss": 1.5584, - "nll_loss": 1.438720941543579, + "log_odds_chosen": 0.18776021897792816, + "log_odds_ratio": -0.6678361892700195, + "logits/chosen": 367.4861145019531, + "logits/rejected": 380.6282958984375, + "logps/chosen": -1.1577775478363037, + "logps/rejected": -1.240468978881836, + "loss": 1.5718, + "nll_loss": 1.4726136922836304, "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05696043372154236, - "rewards/margins": 0.0045727952383458614, - "rewards/rejected": -0.06153322383761406, + "rewards/chosen": -0.05788888409733772, + "rewards/margins": 0.004134564660489559, + "rewards/rejected": -0.06202344968914986, "step": 95 }, { "epoch": 0.5961251862891207, - "grad_norm": 677.6953125, + "grad_norm": 48.09437561035156, "learning_rate": 5e-06, - "log_odds_chosen": 0.08993122726678848, - "log_odds_ratio": -0.7155017256736755, - "logits/chosen": 406.64996337890625, - "logits/rejected": 442.7906188964844, - "logps/chosen": -1.3057138919830322, - "logps/rejected": -1.3538284301757812, - "loss": 1.6646, - "nll_loss": 1.6470537185668945, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06528569757938385, - "rewards/margins": 0.0024057202972471714, - "rewards/rejected": -0.06769142299890518, + "log_odds_chosen": 0.23021917045116425, + "log_odds_ratio": -0.6669245958328247, + "logits/chosen": 398.15692138671875, + "logits/rejected": 436.06280517578125, + "logps/chosen": -1.3762584924697876, + "logps/rejected": -1.5756226778030396, + "loss": 1.6621, + "nll_loss": 1.676337480545044, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.06881292164325714, + "rewards/margins": 0.009968215599656105, + "rewards/rejected": -0.0787811428308487, "step": 100 }, { "epoch": 0.6259314456035767, - "grad_norm": 81.34811401367188, + "grad_norm": 27.461023330688477, "learning_rate": 4.8795003647426654e-06, - "log_odds_chosen": 0.3003528416156769, - "log_odds_ratio": -0.6239514946937561, - "logits/chosen": 391.1552734375, - "logits/rejected": 403.55609130859375, - "logps/chosen": -1.2369372844696045, - "logps/rejected": -1.4132254123687744, - "loss": 1.6764, - "nll_loss": 1.68059504032135, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.061846863478422165, - "rewards/margins": 0.008814404718577862, - "rewards/rejected": -0.0706612691283226, + "log_odds_chosen": 0.25321143865585327, + "log_odds_ratio": -0.6335381269454956, + "logits/chosen": 394.9198303222656, + "logits/rejected": 407.670166015625, + "logps/chosen": -1.1359978914260864, + "logps/rejected": -1.282949686050415, + "loss": 1.5569, + "nll_loss": 1.5841158628463745, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.05679989606142044, + "rewards/margins": 0.007347588427364826, + "rewards/rejected": -0.0641474798321724, "step": 105 }, { "epoch": 0.6557377049180327, - "grad_norm": 98.60116577148438, + "grad_norm": 58.5862922668457, "learning_rate": 4.767312946227961e-06, - "log_odds_chosen": 0.21533890068531036, - "log_odds_ratio": -0.6346350312232971, - "logits/chosen": 378.2474670410156, - "logits/rejected": 376.3426818847656, - "logps/chosen": -1.1033828258514404, - "logps/rejected": -1.25198233127594, - "loss": 1.5681, - "nll_loss": 1.557680368423462, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.055169135332107544, - "rewards/margins": 0.007429977413266897, - "rewards/rejected": -0.06259911507368088, + "log_odds_chosen": 0.17413778603076935, + "log_odds_ratio": -0.6657994985580444, + "logits/chosen": 372.2387390136719, + "logits/rejected": 370.97259521484375, + "logps/chosen": -1.1112958192825317, + "logps/rejected": -1.2337472438812256, + "loss": 1.5196, + "nll_loss": 1.5138860940933228, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.05556480213999748, + "rewards/margins": 0.00612256396561861, + "rewards/rejected": -0.06168735772371292, "step": 110 }, { "epoch": 0.6855439642324889, - "grad_norm": 22.872821807861328, + "grad_norm": 25.225566864013672, "learning_rate": 4.662524041201569e-06, - "log_odds_chosen": 0.2698196470737457, - "log_odds_ratio": -0.6147719621658325, - "logits/chosen": 400.8478698730469, - "logits/rejected": 407.18634033203125, - "logps/chosen": -0.9827004671096802, - "logps/rejected": -1.127701997756958, - "loss": 1.5249, - "nll_loss": 1.42640221118927, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": -0.04913502186536789, - "rewards/margins": 0.007250082679092884, - "rewards/rejected": -0.0563850998878479, + "log_odds_chosen": 0.2932291030883789, + "log_odds_ratio": -0.6261448264122009, + "logits/chosen": 398.36285400390625, + "logits/rejected": 405.1409912109375, + "logps/chosen": -0.9624778032302856, + "logps/rejected": -1.100894570350647, + "loss": 1.4976, + "nll_loss": 1.4066407680511475, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.04812389984726906, + "rewards/margins": 0.0069208345375955105, + "rewards/rejected": -0.05504472926259041, "step": 115 }, { "epoch": 0.7153502235469449, - "grad_norm": 27.9619083404541, + "grad_norm": 25.138811111450195, "learning_rate": 4.564354645876385e-06, - "log_odds_chosen": 0.3403358459472656, - "log_odds_ratio": -0.6000555753707886, - "logits/chosen": 381.107421875, - "logits/rejected": 381.04864501953125, - "logps/chosen": -1.048896074295044, - "logps/rejected": -1.2232722043991089, - "loss": 1.5554, - "nll_loss": 1.5394407510757446, + "log_odds_chosen": 0.30031102895736694, + "log_odds_ratio": -0.6141648292541504, + "logits/chosen": 381.42999267578125, + "logits/rejected": 381.4985656738281, + "logps/chosen": -1.05239999294281, + "logps/rejected": -1.2082456350326538, + "loss": 1.5521, + "nll_loss": 1.5355098247528076, "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.05244480445981026, - "rewards/margins": 0.00871881190687418, - "rewards/rejected": -0.06116361543536186, + "rewards/chosen": -0.05262000486254692, + "rewards/margins": 0.007792273070663214, + "rewards/rejected": -0.06041227653622627, "step": 120 }, { "epoch": 0.7451564828614009, - "grad_norm": 23.146177291870117, + "grad_norm": 19.848705291748047, "learning_rate": 4.47213595499958e-06, - "log_odds_chosen": 0.08713512122631073, - "log_odds_ratio": -0.7354093790054321, - "logits/chosen": 378.9410400390625, - "logits/rejected": 391.9457702636719, - "logps/chosen": -1.1668498516082764, - "logps/rejected": -1.1973512172698975, - "loss": 1.4862, - "nll_loss": 1.4849971532821655, + "log_odds_chosen": 0.05417771264910698, + "log_odds_ratio": -0.7723890542984009, + "logits/chosen": 375.4615173339844, + "logits/rejected": 388.3155517578125, + "logps/chosen": -1.1864535808563232, + "logps/rejected": -1.1864855289459229, + "loss": 1.4682, + "nll_loss": 1.473937749862671, "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.05834249407052994, - "rewards/margins": 0.0015250641154125333, - "rewards/rejected": -0.05986756086349487, + "rewards/chosen": -0.05932268500328064, + "rewards/margins": 1.5988014183676569e-06, + "rewards/rejected": -0.05932428315281868, "step": 125 }, { "epoch": 0.7749627421758569, - "grad_norm": 37.57433319091797, + "grad_norm": 30.878917694091797, "learning_rate": 4.385290096535147e-06, - "log_odds_chosen": 0.13211670517921448, - "log_odds_ratio": -0.7139819860458374, - "logits/chosen": 401.40985107421875, - "logits/rejected": 389.37921142578125, - "logps/chosen": -1.1555781364440918, - "logps/rejected": -1.2059427499771118, - "loss": 1.5256, - "nll_loss": 1.4828169345855713, + "log_odds_chosen": 0.1284504234790802, + "log_odds_ratio": -0.6890888214111328, + "logits/chosen": 400.09014892578125, + "logits/rejected": 389.0010070800781, + "logps/chosen": -1.1370112895965576, + "logps/rejected": -1.1725587844848633, + "loss": 1.5141, + "nll_loss": 1.4747650623321533, "rewards/accuracies": 0.5, - "rewards/chosen": -0.05777891352772713, - "rewards/margins": 0.002518222201615572, - "rewards/rejected": -0.06029713153839111, + "rewards/chosen": -0.05685057118535042, + "rewards/margins": 0.0017773698782548308, + "rewards/rejected": -0.05862794071435928, "step": 130 }, { "epoch": 0.8047690014903129, - "grad_norm": 37.914310455322266, + "grad_norm": 34.69911575317383, "learning_rate": 4.303314829119352e-06, - "log_odds_chosen": 0.10099569708108902, - "log_odds_ratio": -0.7038587331771851, - "logits/chosen": 414.90655517578125, - "logits/rejected": 416.6064453125, - "logps/chosen": -1.1292693614959717, - "logps/rejected": -1.2150599956512451, - "loss": 1.5378, - "nll_loss": 1.5873870849609375, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.056463468819856644, - "rewards/margins": 0.004289527423679829, - "rewards/rejected": -0.060752999037504196, + "log_odds_chosen": 0.07419878244400024, + "log_odds_ratio": -0.7176602482795715, + "logits/chosen": 412.095703125, + "logits/rejected": 414.66827392578125, + "logps/chosen": -1.1232882738113403, + "logps/rejected": -1.1864019632339478, + "loss": 1.5359, + "nll_loss": 1.5837700366973877, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05616441369056702, + "rewards/margins": 0.0031556878238916397, + "rewards/rejected": -0.059320103377103806, "step": 135 }, { "epoch": 0.834575260804769, - "grad_norm": 21.97135353088379, + "grad_norm": 33.93345642089844, "learning_rate": 4.2257712736425835e-06, - "log_odds_chosen": -0.07928862422704697, - "log_odds_ratio": -0.8006687164306641, - "logits/chosen": 397.2544250488281, - "logits/rejected": 403.22857666015625, - "logps/chosen": -1.11940598487854, - "logps/rejected": -1.0619796514511108, - "loss": 1.5228, - "nll_loss": 1.6315351724624634, - "rewards/accuracies": 0.4749999940395355, - "rewards/chosen": -0.05597030371427536, - "rewards/margins": -0.002871322212740779, - "rewards/rejected": -0.05309898406267166, + "log_odds_chosen": -0.04845789074897766, + "log_odds_ratio": -0.7893471121788025, + "logits/chosen": 398.22607421875, + "logits/rejected": 404.393798828125, + "logps/chosen": -1.119332194328308, + "logps/rejected": -1.0812653303146362, + "loss": 1.5122, + "nll_loss": 1.6213722229003906, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.05596661567687988, + "rewards/margins": -0.0019033461576327682, + "rewards/rejected": -0.05406326800584793, "step": 140 }, { "epoch": 0.8643815201192251, - "grad_norm": 34.809200286865234, + "grad_norm": 22.562604904174805, "learning_rate": 4.1522739926869985e-06, - "log_odds_chosen": -0.004179268144071102, - "log_odds_ratio": -0.7272334694862366, - "logits/chosen": 394.76995849609375, - "logits/rejected": 397.96514892578125, - "logps/chosen": -1.2000293731689453, - "logps/rejected": -1.1946508884429932, - "loss": 1.5155, - "nll_loss": 1.5167872905731201, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": -0.060001470148563385, - "rewards/margins": -0.0002689236425794661, - "rewards/rejected": -0.05973255634307861, + "log_odds_chosen": -0.06688841432332993, + "log_odds_ratio": -0.7556332349777222, + "logits/chosen": 395.27984619140625, + "logits/rejected": 398.4122009277344, + "logps/chosen": -1.2002326250076294, + "logps/rejected": -1.1435927152633667, + "loss": 1.5121, + "nll_loss": 1.514585256576538, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.06001163646578789, + "rewards/margins": -0.0028319929260760546, + "rewards/rejected": -0.05717964097857475, "step": 145 }, { "epoch": 0.8941877794336811, - "grad_norm": 30.051952362060547, + "grad_norm": 38.268333435058594, "learning_rate": 4.082482904638631e-06, - "log_odds_chosen": 0.36712345480918884, - "log_odds_ratio": -0.5663259625434875, - "logits/chosen": 400.39495849609375, - "logits/rejected": 418.39678955078125, - "logps/chosen": -1.0868648290634155, - "logps/rejected": -1.3322699069976807, - "loss": 1.477, - "nll_loss": 1.3918894529342651, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.054343242198228836, - "rewards/margins": 0.01227025780826807, - "rewards/rejected": -0.06661349534988403, + "log_odds_chosen": 0.3597918152809143, + "log_odds_ratio": -0.5650432705879211, + "logits/chosen": 401.6814270019531, + "logits/rejected": 418.9139709472656, + "logps/chosen": -1.0605757236480713, + "logps/rejected": -1.296025037765503, + "loss": 1.4755, + "nll_loss": 1.387669324874878, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.053028784692287445, + "rewards/margins": 0.011772466823458672, + "rewards/rejected": -0.06480124592781067, "step": 150 }, { "epoch": 0.9239940387481371, - "grad_norm": 16.999670028686523, + "grad_norm": 35.649044036865234, "learning_rate": 4.016096644512495e-06, - "log_odds_chosen": 0.13306589424610138, - "log_odds_ratio": -0.6789790391921997, - "logits/chosen": 380.4939880371094, - "logits/rejected": 395.53143310546875, - "logps/chosen": -1.1204369068145752, - "logps/rejected": -1.2021987438201904, - "loss": 1.436, - "nll_loss": 1.3288953304290771, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05602184683084488, - "rewards/margins": 0.004088088870048523, - "rewards/rejected": -0.060109943151474, + "log_odds_chosen": 0.11360454559326172, + "log_odds_ratio": -0.6917680501937866, + "logits/chosen": 380.48785400390625, + "logits/rejected": 395.10772705078125, + "logps/chosen": -1.1738497018814087, + "logps/rejected": -1.2541792392730713, + "loss": 1.4352, + "nll_loss": 1.3315799236297607, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.058692485094070435, + "rewards/margins": 0.004016467835754156, + "rewards/rejected": -0.06270895153284073, "step": 155 }, { "epoch": 0.9538002980625931, - "grad_norm": 34.52124786376953, + "grad_norm": 37.8629035949707, "learning_rate": 3.952847075210474e-06, - "log_odds_chosen": 0.08932497352361679, - "log_odds_ratio": -0.7400273084640503, - "logits/chosen": 386.62786865234375, - "logits/rejected": 432.2003479003906, - "logps/chosen": -1.0199127197265625, - "logps/rejected": -1.1069036722183228, - "loss": 1.425, - "nll_loss": 1.3653684854507446, - "rewards/accuracies": 0.5, - "rewards/chosen": -0.05099564045667648, - "rewards/margins": 0.00434954185038805, - "rewards/rejected": -0.05534517765045166, + "log_odds_chosen": 0.04191911593079567, + "log_odds_ratio": -0.7673999071121216, + "logits/chosen": 384.6130065917969, + "logits/rejected": 430.66485595703125, + "logps/chosen": -1.0005159378051758, + "logps/rejected": -1.0551975965499878, + "loss": 1.408, + "nll_loss": 1.3416965007781982, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.05002579838037491, + "rewards/margins": 0.00273408112116158, + "rewards/rejected": -0.05275987833738327, "step": 160 }, { "epoch": 0.9836065573770492, - "grad_norm": 17.771682739257812, + "grad_norm": 19.95792007446289, "learning_rate": 3.892494720807615e-06, - "log_odds_chosen": 0.02889970876276493, - "log_odds_ratio": -0.7212048768997192, - "logits/chosen": 396.8811950683594, - "logits/rejected": 409.22821044921875, - "logps/chosen": -1.091715693473816, - "logps/rejected": -1.1267164945602417, - "loss": 1.441, - "nll_loss": 1.3998154401779175, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": -0.05458579212427139, - "rewards/margins": 0.0017500361427664757, - "rewards/rejected": -0.056335825473070145, + "log_odds_chosen": 0.05066202953457832, + "log_odds_ratio": -0.7182776927947998, + "logits/chosen": 395.8006591796875, + "logits/rejected": 408.99554443359375, + "logps/chosen": -1.0879595279693604, + "logps/rejected": -1.125816822052002, + "loss": 1.436, + "nll_loss": 1.3948609828948975, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.05439797043800354, + "rewards/margins": 0.0018928736681118608, + "rewards/rejected": -0.056290846318006516, "step": 165 }, { "epoch": 0.9955290611028316, - "eval_log_odds_chosen": 0.19335530698299408, - "eval_log_odds_ratio": -0.6989776492118835, - "eval_logits/chosen": 318.99652099609375, - "eval_logits/rejected": 290.1581115722656, - "eval_logps/chosen": -1.0203651189804077, - "eval_logps/rejected": -1.1485036611557007, - "eval_loss": 1.4761662483215332, - "eval_nll_loss": 1.4310433864593506, - "eval_rewards/accuracies": 0.5323740839958191, - "eval_rewards/chosen": -0.051018260419368744, - "eval_rewards/margins": 0.006406927481293678, - "eval_rewards/rejected": -0.057425182312726974, - "eval_runtime": 112.3238, - "eval_samples_per_second": 4.923, - "eval_steps_per_second": 1.237, + "eval_log_odds_chosen": 0.1983117312192917, + "eval_log_odds_ratio": -0.6895310282707214, + "eval_logits/chosen": 318.3812255859375, + "eval_logits/rejected": 288.9291687011719, + "eval_logps/chosen": -1.0157941579818726, + "eval_logps/rejected": -1.1419692039489746, + "eval_loss": 1.467863917350769, + "eval_nll_loss": 1.4121437072753906, + "eval_rewards/accuracies": 0.5467625856399536, + "eval_rewards/chosen": -0.05078971013426781, + "eval_rewards/margins": 0.006308753043413162, + "eval_rewards/rejected": -0.05709846317768097, + "eval_runtime": 112.1639, + "eval_samples_per_second": 4.93, + "eval_steps_per_second": 1.239, "step": 167 }, { "epoch": 1.0134128166915053, - "grad_norm": 17.029600143432617, + "grad_norm": 16.564281463623047, "learning_rate": 3.834824944236852e-06, - "log_odds_chosen": 0.46681445837020874, - "log_odds_ratio": -0.5670086741447449, - "logits/chosen": 377.62884521484375, - "logits/rejected": 402.346435546875, - "logps/chosen": -0.9154840707778931, - "logps/rejected": -1.1631513833999634, - "loss": 1.3055, - "nll_loss": 1.1554943323135376, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.045774202793836594, - "rewards/margins": 0.012383360415697098, - "rewards/rejected": -0.05815757066011429, + "log_odds_chosen": 0.39181432127952576, + "log_odds_ratio": -0.5932676196098328, + "logits/chosen": 378.3958435058594, + "logits/rejected": 403.1106262207031, + "logps/chosen": -0.9357401132583618, + "logps/rejected": -1.1598111391067505, + "loss": 1.2992, + "nll_loss": 1.1567914485931396, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.04678700119256973, + "rewards/margins": 0.011203557252883911, + "rewards/rejected": -0.05799056217074394, "step": 170 }, { "epoch": 1.0432190760059612, - "grad_norm": 20.676471710205078, + "grad_norm": 24.491374969482422, "learning_rate": 3.7796447300922724e-06, - "log_odds_chosen": 0.8411234021186829, - "log_odds_ratio": -0.4436827600002289, - "logits/chosen": 360.05242919921875, - "logits/rejected": 400.02374267578125, - "logps/chosen": -0.6783148646354675, - "logps/rejected": -1.1674000024795532, - "loss": 1.0898, - "nll_loss": 1.1356347799301147, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.033915746957063675, - "rewards/margins": 0.024454256519675255, - "rewards/rejected": -0.058369994163513184, + "log_odds_chosen": 0.8750826120376587, + "log_odds_ratio": -0.42914777994155884, + "logits/chosen": 358.5318603515625, + "logits/rejected": 399.3114929199219, + "logps/chosen": -0.6476485133171082, + "logps/rejected": -1.1458537578582764, + "loss": 1.0769, + "nll_loss": 1.1138975620269775, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.03238242492079735, + "rewards/margins": 0.02491025999188423, + "rewards/rejected": -0.05729268863797188, "step": 175 }, { "epoch": 1.0730253353204173, - "grad_norm": 18.295047760009766, + "grad_norm": 22.750883102416992, "learning_rate": 3.72677996249965e-06, - "log_odds_chosen": 0.8419575691223145, - "log_odds_ratio": -0.43040966987609863, - "logits/chosen": 360.3951110839844, - "logits/rejected": 335.01239013671875, - "logps/chosen": -0.7921234965324402, - "logps/rejected": -1.2925331592559814, - "loss": 1.1448, - "nll_loss": 1.2160179615020752, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.03960617631673813, - "rewards/margins": 0.025020483881235123, - "rewards/rejected": -0.06462665647268295, + "log_odds_chosen": 0.8333228826522827, + "log_odds_ratio": -0.43526044487953186, + "logits/chosen": 354.4723205566406, + "logits/rejected": 329.74591064453125, + "logps/chosen": -0.789750874042511, + "logps/rejected": -1.287760853767395, + "loss": 1.132, + "nll_loss": 1.2151093482971191, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03948754072189331, + "rewards/margins": 0.02490049973130226, + "rewards/rejected": -0.06438804417848587, "step": 180 }, { "epoch": 1.1028315946348732, - "grad_norm": 20.26190757751465, + "grad_norm": 20.229358673095703, "learning_rate": 3.6760731104690393e-06, - "log_odds_chosen": 1.0061752796173096, - "log_odds_ratio": -0.3863833546638489, - "logits/chosen": 388.26934814453125, - "logits/rejected": 379.1220703125, - "logps/chosen": -0.6712931990623474, - "logps/rejected": -1.1969959735870361, - "loss": 1.0422, - "nll_loss": 0.9980667233467102, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.03356466069817543, - "rewards/margins": 0.026285137981176376, - "rewards/rejected": -0.05984979867935181, + "log_odds_chosen": 1.0057324171066284, + "log_odds_ratio": -0.3837296664714813, + "logits/chosen": 384.34808349609375, + "logits/rejected": 376.38800048828125, + "logps/chosen": -0.6548343896865845, + "logps/rejected": -1.1811447143554688, + "loss": 1.0221, + "nll_loss": 0.9857061505317688, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.032741717994213104, + "rewards/margins": 0.026315515860915184, + "rewards/rejected": -0.05905723571777344, "step": 185 }, { "epoch": 1.1326378539493294, - "grad_norm": 16.21082305908203, + "grad_norm": 18.751834869384766, "learning_rate": 3.6273812505500587e-06, - "log_odds_chosen": 0.6967722177505493, - "log_odds_ratio": -0.49137839674949646, - "logits/chosen": 353.41705322265625, - "logits/rejected": 400.4765930175781, - "logps/chosen": -0.75420081615448, - "logps/rejected": -1.1797516345977783, - "loss": 1.1136, - "nll_loss": 1.0225417613983154, + "log_odds_chosen": 0.6209810972213745, + "log_odds_ratio": -0.5106909275054932, + "logits/chosen": 358.50823974609375, + "logits/rejected": 404.4180603027344, + "logps/chosen": -0.7595417499542236, + "logps/rejected": -1.1261508464813232, + "loss": 1.0914, + "nll_loss": 1.0129649639129639, "rewards/accuracies": 0.75, - "rewards/chosen": -0.0377100370824337, - "rewards/margins": 0.021277543157339096, - "rewards/rejected": -0.0589875802397728, + "rewards/chosen": -0.03797708824276924, + "rewards/margins": 0.018330451101064682, + "rewards/rejected": -0.05630754306912422, "step": 190 }, { "epoch": 1.1624441132637853, - "grad_norm": 18.45132827758789, + "grad_norm": 20.339866638183594, "learning_rate": 3.5805743701971648e-06, - "log_odds_chosen": 0.8713854551315308, - "log_odds_ratio": -0.4125959873199463, - "logits/chosen": 383.83868408203125, - "logits/rejected": 397.7996520996094, - "logps/chosen": -0.7979816198348999, - "logps/rejected": -1.2784751653671265, - "loss": 1.1249, - "nll_loss": 1.1307828426361084, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.039899080991744995, - "rewards/margins": 0.02402467653155327, - "rewards/rejected": -0.06392376124858856, + "log_odds_chosen": 0.8648549914360046, + "log_odds_ratio": -0.40149006247520447, + "logits/chosen": 381.13031005859375, + "logits/rejected": 395.5570983886719, + "logps/chosen": -0.8033710718154907, + "logps/rejected": -1.2736122608184814, + "loss": 1.1227, + "nll_loss": 1.1343204975128174, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.04016854614019394, + "rewards/margins": 0.02351205423474312, + "rewards/rejected": -0.06368060410022736, "step": 195 }, { "epoch": 1.1922503725782414, - "grad_norm": 29.213319778442383, + "grad_norm": 20.895063400268555, "learning_rate": 3.5355339059327378e-06, - "log_odds_chosen": 0.9310399889945984, - "log_odds_ratio": -0.43441715836524963, - "logits/chosen": 408.2393798828125, - "logits/rejected": 392.23309326171875, - "logps/chosen": -0.65810626745224, - "logps/rejected": -1.2119154930114746, - "loss": 1.0925, - "nll_loss": 1.0188348293304443, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.03290531411767006, - "rewards/margins": 0.0276904609054327, - "rewards/rejected": -0.06059577316045761, + "log_odds_chosen": 0.9302545785903931, + "log_odds_ratio": -0.4023068845272064, + "logits/chosen": 408.6002197265625, + "logits/rejected": 393.536865234375, + "logps/chosen": -0.7376815676689148, + "logps/rejected": -1.2836555242538452, + "loss": 1.0834, + "nll_loss": 1.019555687904358, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.03688408061861992, + "rewards/margins": 0.02729869820177555, + "rewards/rejected": -0.06418277323246002, "step": 200 }, { "epoch": 1.2220566318926975, - "grad_norm": 20.968154907226562, + "grad_norm": 21.968069076538086, "learning_rate": 3.4921514788478916e-06, - "log_odds_chosen": 1.0998015403747559, - "log_odds_ratio": -0.39691638946533203, - "logits/chosen": 365.73724365234375, - "logits/rejected": 359.8885803222656, - "logps/chosen": -0.6815972924232483, - "logps/rejected": -1.2400376796722412, - "loss": 1.0466, - "nll_loss": 1.0264532566070557, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.03407986834645271, - "rewards/margins": 0.027922023087739944, - "rewards/rejected": -0.06200189143419266, + "log_odds_chosen": 1.1145693063735962, + "log_odds_ratio": -0.38622182607650757, + "logits/chosen": 364.79913330078125, + "logits/rejected": 359.30718994140625, + "logps/chosen": -0.6945966482162476, + "logps/rejected": -1.2616204023361206, + "loss": 1.0621, + "nll_loss": 1.079245686531067, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.03472983092069626, + "rewards/margins": 0.028351187705993652, + "rewards/rejected": -0.06308101862668991, "step": 205 }, { "epoch": 1.2518628912071534, - "grad_norm": 17.646827697753906, + "grad_norm": 19.83363914489746, "learning_rate": 3.450327796711771e-06, - "log_odds_chosen": 1.2030134201049805, - "log_odds_ratio": -0.3409472107887268, - "logits/chosen": 371.56903076171875, - "logits/rejected": 400.691162109375, - "logps/chosen": -0.6153351664543152, - "logps/rejected": -1.2756757736206055, - "loss": 1.0517, - "nll_loss": 0.9517441987991333, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.03076675906777382, - "rewards/margins": 0.03301702067255974, - "rewards/rejected": -0.06378378719091415, + "log_odds_chosen": 1.1763904094696045, + "log_odds_ratio": -0.34168320894241333, + "logits/chosen": 371.95068359375, + "logits/rejected": 400.94305419921875, + "logps/chosen": -0.6090874075889587, + "logps/rejected": -1.2537710666656494, + "loss": 1.0413, + "nll_loss": 0.9631906747817993, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.030454367399215698, + "rewards/margins": 0.03223418444395065, + "rewards/rejected": -0.06268856674432755, "step": 210 }, { "epoch": 1.2816691505216096, - "grad_norm": 25.96933364868164, + "grad_norm": 22.73797035217285, "learning_rate": 3.409971697352368e-06, - "log_odds_chosen": 1.0242887735366821, - "log_odds_ratio": -0.3722797930240631, - "logits/chosen": 393.1634826660156, - "logits/rejected": 376.97198486328125, - "logps/chosen": -0.7517871856689453, - "logps/rejected": -1.3418635129928589, - "loss": 1.0677, - "nll_loss": 1.063118577003479, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.037589360028505325, - "rewards/margins": 0.02950381301343441, - "rewards/rejected": -0.06709317117929459, + "log_odds_chosen": 1.0536540746688843, + "log_odds_ratio": -0.3665863871574402, + "logits/chosen": 392.6047058105469, + "logits/rejected": 377.4068603515625, + "logps/chosen": -0.7370086908340454, + "logps/rejected": -1.3404157161712646, + "loss": 1.0487, + "nll_loss": 1.0565564632415771, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.03685043752193451, + "rewards/margins": 0.030170351266860962, + "rewards/rejected": -0.06702078878879547, "step": 215 }, { "epoch": 1.3114754098360657, - "grad_norm": 14.424154281616211, + "grad_norm": 13.09876537322998, "learning_rate": 3.3709993123162106e-06, - "log_odds_chosen": 0.6680114269256592, - "log_odds_ratio": -0.5037292242050171, - "logits/chosen": 385.2915344238281, - "logits/rejected": 379.8268127441406, - "logps/chosen": -0.8324653506278992, - "logps/rejected": -1.1821435689926147, - "loss": 1.071, - "nll_loss": 1.0840386152267456, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04162326827645302, - "rewards/margins": 0.0174839086830616, - "rewards/rejected": -0.05910717695951462, + "log_odds_chosen": 0.7300616502761841, + "log_odds_ratio": -0.4766615033149719, + "logits/chosen": 384.1726989746094, + "logits/rejected": 378.66851806640625, + "logps/chosen": -0.7808234691619873, + "logps/rejected": -1.1460365056991577, + "loss": 1.0819, + "nll_loss": 1.038731336593628, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.0390411801636219, + "rewards/margins": 0.0182606503367424, + "rewards/rejected": -0.057301826775074005, "step": 220 }, { "epoch": 1.3412816691505216, - "grad_norm": 24.496023178100586, + "grad_norm": 29.453706741333008, "learning_rate": 3.3333333333333333e-06, - "log_odds_chosen": 0.5463358759880066, - "log_odds_ratio": -0.5178000926971436, - "logits/chosen": 381.60198974609375, - "logits/rejected": 374.200439453125, - "logps/chosen": -0.8569077253341675, - "logps/rejected": -1.1593918800354004, - "loss": 1.0304, - "nll_loss": 1.1032346487045288, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.042845387011766434, - "rewards/margins": 0.015124207362532616, - "rewards/rejected": -0.0579695925116539, + "log_odds_chosen": 0.4621034562587738, + "log_odds_ratio": -0.5440367460250854, + "logits/chosen": 385.5031433105469, + "logits/rejected": 378.17987060546875, + "logps/chosen": -0.8730419278144836, + "logps/rejected": -1.139762043952942, + "loss": 1.0496, + "nll_loss": 1.1089845895767212, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04365209490060806, + "rewards/margins": 0.01333601027727127, + "rewards/rejected": -0.056988101452589035, "step": 225 }, { "epoch": 1.3710879284649775, - "grad_norm": 31.743003845214844, + "grad_norm": 24.137882232666016, "learning_rate": 3.296902366978936e-06, - "log_odds_chosen": 1.1322697401046753, - "log_odds_ratio": -0.3533535599708557, - "logits/chosen": 353.97186279296875, - "logits/rejected": 374.7437438964844, - "logps/chosen": -0.5964599251747131, - "logps/rejected": -1.2119852304458618, - "loss": 1.0402, - "nll_loss": 0.9073736071586609, + "log_odds_chosen": 1.0880992412567139, + "log_odds_ratio": -0.37469881772994995, + "logits/chosen": 356.7733459472656, + "logits/rejected": 376.2106628417969, + "logps/chosen": -0.6375613808631897, + "logps/rejected": -1.2090116739273071, + "loss": 1.0368, + "nll_loss": 0.927442729473114, "rewards/accuracies": 0.875, - "rewards/chosen": -0.029822995886206627, - "rewards/margins": 0.030776266008615494, - "rewards/rejected": -0.06059925630688667, + "rewards/chosen": -0.031878065317869186, + "rewards/margins": 0.02857252024114132, + "rewards/rejected": -0.06045059114694595, "step": 230 }, { "epoch": 1.4008941877794336, - "grad_norm": 23.891324996948242, + "grad_norm": 19.043012619018555, "learning_rate": 3.2616403652672114e-06, - "log_odds_chosen": 1.1859080791473389, - "log_odds_ratio": -0.37409111857414246, - "logits/chosen": 381.7622985839844, - "logits/rejected": 395.0599365234375, - "logps/chosen": -0.6458351016044617, - "logps/rejected": -1.346355676651001, - "loss": 1.0587, - "nll_loss": 0.9488533735275269, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.032291755080223083, - "rewards/margins": 0.035026032477617264, - "rewards/rejected": -0.06731779128313065, + "log_odds_chosen": 1.1069047451019287, + "log_odds_ratio": -0.39715421199798584, + "logits/chosen": 377.45684814453125, + "logits/rejected": 391.23175048828125, + "logps/chosen": -0.6500628590583801, + "logps/rejected": -1.3308535814285278, + "loss": 1.0109, + "nll_loss": 0.9406328201293945, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.032503142952919006, + "rewards/margins": 0.034039538353681564, + "rewards/rejected": -0.06654268503189087, "step": 235 }, { "epoch": 1.4307004470938898, - "grad_norm": 16.38582992553711, + "grad_norm": 18.564252853393555, "learning_rate": 3.2274861218395142e-06, - "log_odds_chosen": 0.7762764692306519, - "log_odds_ratio": -0.43844375014305115, - "logits/chosen": 407.67388916015625, - "logits/rejected": 413.35260009765625, - "logps/chosen": -0.7236464619636536, - "logps/rejected": -1.1575326919555664, - "loss": 1.0752, - "nll_loss": 1.0268566608428955, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.03618232533335686, - "rewards/margins": 0.02169431373476982, - "rewards/rejected": -0.05787663906812668, + "log_odds_chosen": 0.8188554048538208, + "log_odds_ratio": -0.4366012513637543, + "logits/chosen": 400.0711364746094, + "logits/rejected": 406.6979675292969, + "logps/chosen": -0.7228484153747559, + "logps/rejected": -1.1837232112884521, + "loss": 1.0716, + "nll_loss": 1.032801866531372, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03614242747426033, + "rewards/margins": 0.023043744266033173, + "rewards/rejected": -0.05918616056442261, "step": 240 }, { "epoch": 1.4605067064083457, - "grad_norm": 16.295490264892578, + "grad_norm": 13.215555191040039, "learning_rate": 3.1943828249997e-06, - "log_odds_chosen": 0.9785711169242859, - "log_odds_ratio": -0.4029998779296875, - "logits/chosen": 400.16632080078125, - "logits/rejected": 388.1484069824219, - "logps/chosen": -0.6374613642692566, - "logps/rejected": -1.146707534790039, - "loss": 1.0837, - "nll_loss": 1.123439073562622, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.03187306597828865, - "rewards/margins": 0.025462310761213303, - "rewards/rejected": -0.05733537673950195, + "log_odds_chosen": 0.9353200793266296, + "log_odds_ratio": -0.4173661172389984, + "logits/chosen": 397.68170166015625, + "logits/rejected": 386.11883544921875, + "logps/chosen": -0.6454007029533386, + "logps/rejected": -1.1329607963562012, + "loss": 1.0931, + "nll_loss": 1.0978925228118896, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.03227003663778305, + "rewards/margins": 0.024378007277846336, + "rewards/rejected": -0.056648045778274536, "step": 245 }, { "epoch": 1.4903129657228018, - "grad_norm": 22.652774810791016, + "grad_norm": 15.847436904907227, "learning_rate": 3.1622776601683796e-06, - "log_odds_chosen": 1.0432734489440918, - "log_odds_ratio": -0.4298950135707855, - "logits/chosen": 374.0715637207031, - "logits/rejected": 381.5113830566406, - "logps/chosen": -0.6628987193107605, - "logps/rejected": -1.2346137762069702, - "loss": 0.9864, - "nll_loss": 0.9000906944274902, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.03314493969082832, - "rewards/margins": 0.028585752472281456, - "rewards/rejected": -0.06173068284988403, + "log_odds_chosen": 1.0629552602767944, + "log_odds_ratio": -0.4346255660057068, + "logits/chosen": 370.0399475097656, + "logits/rejected": 377.7971496582031, + "logps/chosen": -0.6677332520484924, + "logps/rejected": -1.2528654336929321, + "loss": 0.9948, + "nll_loss": 0.9116696119308472, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.03338665887713432, + "rewards/margins": 0.029256608337163925, + "rewards/rejected": -0.06264327466487885, "step": 250 }, { "epoch": 1.520119225037258, - "grad_norm": 15.01534652709961, + "grad_norm": 20.606616973876953, "learning_rate": 3.131121455425748e-06, - "log_odds_chosen": 1.0844942331314087, - "log_odds_ratio": -0.34810084104537964, - "logits/chosen": 391.9859924316406, - "logits/rejected": 394.78021240234375, - "logps/chosen": -0.5884779095649719, - "logps/rejected": -1.1623605489730835, - "loss": 1.0497, - "nll_loss": 0.9377425312995911, + "log_odds_chosen": 1.0881011486053467, + "log_odds_ratio": -0.33976244926452637, + "logits/chosen": 390.563720703125, + "logits/rejected": 393.47064208984375, + "logps/chosen": -0.6047049760818481, + "logps/rejected": -1.1917129755020142, + "loss": 1.0504, + "nll_loss": 0.9429427981376648, "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.029423898085951805, - "rewards/margins": 0.028694134205579758, - "rewards/rejected": -0.05811803415417671, + "rewards/chosen": -0.030235249549150467, + "rewards/margins": 0.0293504036962986, + "rewards/rejected": -0.05958564952015877, "step": 255 }, { "epoch": 1.5499254843517138, - "grad_norm": 22.26698112487793, + "grad_norm": 35.40441131591797, "learning_rate": 3.1008683647302113e-06, - "log_odds_chosen": 0.9070035815238953, - "log_odds_ratio": -0.43072786927223206, - "logits/chosen": 372.50006103515625, - "logits/rejected": 414.58331298828125, - "logps/chosen": -0.763823926448822, - "logps/rejected": -1.341328740119934, - "loss": 1.043, - "nll_loss": 1.0060240030288696, + "log_odds_chosen": 0.8506741523742676, + "log_odds_ratio": -0.4449694752693176, + "logits/chosen": 372.16888427734375, + "logits/rejected": 413.76153564453125, + "logps/chosen": -0.8014513254165649, + "logps/rejected": -1.3543529510498047, + "loss": 1.0248, + "nll_loss": 1.0251777172088623, "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.03819119185209274, - "rewards/margins": 0.028875242918729782, - "rewards/rejected": -0.06706643104553223, + "rewards/chosen": -0.040072567760944366, + "rewards/margins": 0.027645081281661987, + "rewards/rejected": -0.06771764904260635, "step": 260 }, { "epoch": 1.5797317436661698, - "grad_norm": 14.599881172180176, + "grad_norm": 13.316988945007324, "learning_rate": 3.0714755841697565e-06, - "log_odds_chosen": 1.0877039432525635, - "log_odds_ratio": -0.43615055084228516, - "logits/chosen": 384.4775390625, - "logits/rejected": 406.6970520019531, - "logps/chosen": -0.6974985003471375, - "logps/rejected": -1.3204139471054077, - "loss": 1.098, - "nll_loss": 1.024665117263794, + "log_odds_chosen": 1.0472757816314697, + "log_odds_ratio": -0.4307102560997009, + "logits/chosen": 383.9051513671875, + "logits/rejected": 406.1117248535156, + "logps/chosen": -0.6818675398826599, + "logps/rejected": -1.2686574459075928, + "loss": 1.1204, + "nll_loss": 1.0089762210845947, "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.03487492725253105, - "rewards/margins": 0.031145762652158737, - "rewards/rejected": -0.06602068990468979, + "rewards/chosen": -0.034093379974365234, + "rewards/margins": 0.029339497908949852, + "rewards/rejected": -0.06343287974596024, "step": 265 }, { "epoch": 1.6095380029806259, - "grad_norm": 17.716583251953125, + "grad_norm": 17.495107650756836, "learning_rate": 3.0429030972509227e-06, - "log_odds_chosen": 0.9025327563285828, - "log_odds_ratio": -0.4233691692352295, - "logits/chosen": 367.71807861328125, - "logits/rejected": 379.2008361816406, - "logps/chosen": -0.777201771736145, - "logps/rejected": -1.2777531147003174, - "loss": 1.0837, - "nll_loss": 1.1377698183059692, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.03886008635163307, - "rewards/margins": 0.025027573108673096, - "rewards/rejected": -0.06388765573501587, + "log_odds_chosen": 0.9306485056877136, + "log_odds_ratio": -0.4013773798942566, + "logits/chosen": 370.3818054199219, + "logits/rejected": 381.3802490234375, + "logps/chosen": -0.7612948417663574, + "logps/rejected": -1.283376932144165, + "loss": 1.0864, + "nll_loss": 1.1147105693817139, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03806474059820175, + "rewards/margins": 0.026104098185896873, + "rewards/rejected": -0.06416884064674377, "step": 270 }, { "epoch": 1.639344262295082, - "grad_norm": 14.134200096130371, + "grad_norm": 14.062923431396484, "learning_rate": 3.0151134457776365e-06, - "log_odds_chosen": 0.8205320239067078, - "log_odds_ratio": -0.44056087732315063, - "logits/chosen": 360.33575439453125, - "logits/rejected": 350.024169921875, - "logps/chosen": -0.6577683687210083, - "logps/rejected": -1.066030740737915, - "loss": 1.0837, - "nll_loss": 1.0101639032363892, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.032888419926166534, - "rewards/margins": 0.020413123071193695, - "rewards/rejected": -0.05330154299736023, + "log_odds_chosen": 0.8347261548042297, + "log_odds_ratio": -0.4390513002872467, + "logits/chosen": 361.4908752441406, + "logits/rejected": 350.319091796875, + "logps/chosen": -0.6371272802352905, + "logps/rejected": -1.0568915605545044, + "loss": 1.0712, + "nll_loss": 0.9875114560127258, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.031856365501880646, + "rewards/margins": 0.020988214761018753, + "rewards/rejected": -0.0528445765376091, "step": 275 }, { "epoch": 1.669150521609538, - "grad_norm": 15.517395973205566, + "grad_norm": 14.235246658325195, "learning_rate": 2.988071523335984e-06, - "log_odds_chosen": 0.7949902415275574, - "log_odds_ratio": -0.5562250018119812, - "logits/chosen": 404.2984313964844, - "logits/rejected": 391.6941833496094, - "logps/chosen": -0.7360959649085999, - "logps/rejected": -1.1831490993499756, - "loss": 1.0486, - "nll_loss": 1.0734833478927612, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.03680479899048805, - "rewards/margins": 0.022352661937475204, - "rewards/rejected": -0.05915746092796326, + "log_odds_chosen": 0.8683498501777649, + "log_odds_ratio": -0.5000298619270325, + "logits/chosen": 403.158935546875, + "logits/rejected": 391.2458190917969, + "logps/chosen": -0.6794577240943909, + "logps/rejected": -1.190443754196167, + "loss": 1.0475, + "nll_loss": 1.049759864807129, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.033972885459661484, + "rewards/margins": 0.025549303740262985, + "rewards/rejected": -0.05952219292521477, "step": 280 }, { "epoch": 1.698956780923994, - "grad_norm": 14.143935203552246, + "grad_norm": 14.518519401550293, "learning_rate": 2.961744388795462e-06, - "log_odds_chosen": 0.9420916438102722, - "log_odds_ratio": -0.42187291383743286, - "logits/chosen": 367.45843505859375, - "logits/rejected": 374.1835632324219, - "logps/chosen": -0.6173609495162964, - "logps/rejected": -1.1151915788650513, - "loss": 0.996, - "nll_loss": 0.9254717826843262, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.03086804784834385, - "rewards/margins": 0.024891531094908714, - "rewards/rejected": -0.05575958639383316, + "log_odds_chosen": 0.9579475522041321, + "log_odds_ratio": -0.3945266008377075, + "logits/chosen": 368.3428649902344, + "logits/rejected": 374.80645751953125, + "logps/chosen": -0.6118819117546082, + "logps/rejected": -1.1229194402694702, + "loss": 0.9917, + "nll_loss": 0.9298090934753418, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.030594095587730408, + "rewards/margins": 0.025551876053214073, + "rewards/rejected": -0.05614597350358963, "step": 285 }, { "epoch": 1.7287630402384502, - "grad_norm": 13.992819786071777, + "grad_norm": 16.039731979370117, "learning_rate": 2.9361010975735177e-06, - "log_odds_chosen": 0.9273719787597656, - "log_odds_ratio": -0.39941272139549255, - "logits/chosen": 386.17742919921875, - "logits/rejected": 424.8526306152344, - "logps/chosen": -0.7709314227104187, - "logps/rejected": -1.294065237045288, - "loss": 1.0527, - "nll_loss": 0.9949714541435242, + "log_odds_chosen": 0.8852699398994446, + "log_odds_ratio": -0.41907158493995667, + "logits/chosen": 385.8910217285156, + "logits/rejected": 422.97454833984375, + "logps/chosen": -0.8015801310539246, + "logps/rejected": -1.3009235858917236, + "loss": 1.0643, + "nll_loss": 1.0100016593933105, "rewards/accuracies": 0.875, - "rewards/chosen": -0.038546573370695114, - "rewards/margins": 0.026156682521104813, - "rewards/rejected": -0.06470325589179993, + "rewards/chosen": -0.04007900878787041, + "rewards/margins": 0.02496717870235443, + "rewards/rejected": -0.06504618376493454, "step": 290 }, { "epoch": 1.758569299552906, - "grad_norm": 15.243948936462402, + "grad_norm": 17.417320251464844, "learning_rate": 2.9111125486979104e-06, - "log_odds_chosen": 0.7636137008666992, - "log_odds_ratio": -0.4647112786769867, - "logits/chosen": 361.9948425292969, - "logits/rejected": 406.70654296875, - "logps/chosen": -0.7253848314285278, - "logps/rejected": -1.145918607711792, - "loss": 1.0847, - "nll_loss": 1.016174554824829, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.03626924008131027, - "rewards/margins": 0.021026695147156715, - "rewards/rejected": -0.05729593709111214, + "log_odds_chosen": 0.8097723722457886, + "log_odds_ratio": -0.4489704966545105, + "logits/chosen": 363.5550231933594, + "logits/rejected": 407.45367431640625, + "logps/chosen": -0.7277875542640686, + "logps/rejected": -1.1767876148223877, + "loss": 1.0644, + "nll_loss": 1.0175808668136597, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03638937696814537, + "rewards/margins": 0.022450000047683716, + "rewards/rejected": -0.058839380741119385, "step": 295 }, { "epoch": 1.788375558867362, - "grad_norm": 23.890466690063477, + "grad_norm": 22.727943420410156, "learning_rate": 2.8867513459481293e-06, - "log_odds_chosen": 1.2909433841705322, - "log_odds_ratio": -0.3190842270851135, - "logits/chosen": 403.19427490234375, - "logits/rejected": 380.4273986816406, - "logps/chosen": -0.6161251068115234, - "logps/rejected": -1.2782180309295654, - "loss": 0.9952, - "nll_loss": 0.9254310727119446, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.030806254595518112, - "rewards/margins": 0.03310465067625046, - "rewards/rejected": -0.06391090154647827, + "log_odds_chosen": 1.2782224416732788, + "log_odds_ratio": -0.3165340721607208, + "logits/chosen": 403.18780517578125, + "logits/rejected": 379.86224365234375, + "logps/chosen": -0.6022372245788574, + "logps/rejected": -1.2621891498565674, + "loss": 1.0012, + "nll_loss": 0.9228881597518921, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.03011186420917511, + "rewards/margins": 0.032997600734233856, + "rewards/rejected": -0.06310946494340897, "step": 300 }, { "epoch": 1.8181818181818183, - "grad_norm": 16.844104766845703, + "grad_norm": 13.393155097961426, "learning_rate": 2.862991671569341e-06, - "log_odds_chosen": 0.5357767939567566, - "log_odds_ratio": -0.5353686213493347, - "logits/chosen": 395.70831298828125, - "logits/rejected": 405.61749267578125, - "logps/chosen": -0.9245316386222839, - "logps/rejected": -1.2031428813934326, - "loss": 1.0432, - "nll_loss": 1.1699957847595215, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.046226583421230316, - "rewards/margins": 0.013930551707744598, - "rewards/rejected": -0.060157131403684616, + "log_odds_chosen": 0.5560621619224548, + "log_odds_ratio": -0.5250486135482788, + "logits/chosen": 394.03631591796875, + "logits/rejected": 403.3617858886719, + "logps/chosen": -0.9106165170669556, + "logps/rejected": -1.2179043292999268, + "loss": 1.0386, + "nll_loss": 1.1626732349395752, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.045530833303928375, + "rewards/margins": 0.015364391729235649, + "rewards/rejected": -0.060895223170518875, "step": 305 }, { "epoch": 1.8479880774962743, - "grad_norm": 14.692316055297852, + "grad_norm": 14.096085548400879, "learning_rate": 2.839809171235324e-06, - "log_odds_chosen": 1.0770504474639893, - "log_odds_ratio": -0.42079129815101624, - "logits/chosen": 377.4819030761719, - "logits/rejected": 387.6199645996094, - "logps/chosen": -0.7239227294921875, - "logps/rejected": -1.3824554681777954, - "loss": 1.0884, - "nll_loss": 1.0769283771514893, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.036196134984493256, - "rewards/margins": 0.032926641404628754, - "rewards/rejected": -0.06912277638912201, + "log_odds_chosen": 1.0126060247421265, + "log_odds_ratio": -0.4341171383857727, + "logits/chosen": 378.22705078125, + "logits/rejected": 388.7279357910156, + "logps/chosen": -0.6974117159843445, + "logps/rejected": -1.3275178670883179, + "loss": 1.0991, + "nll_loss": 1.0783545970916748, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.034870583564043045, + "rewards/margins": 0.03150530904531479, + "rewards/rejected": -0.06637589633464813, "step": 310 }, { "epoch": 1.8777943368107302, - "grad_norm": 15.1817045211792, + "grad_norm": 15.438323974609375, "learning_rate": 2.817180849095055e-06, - "log_odds_chosen": 0.5459250807762146, - "log_odds_ratio": -0.5598369240760803, - "logits/chosen": 352.6174621582031, - "logits/rejected": 371.89764404296875, - "logps/chosen": -0.9762029647827148, - "logps/rejected": -1.3525390625, - "loss": 1.0938, - "nll_loss": 1.238140344619751, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.04881014674901962, - "rewards/margins": 0.018816810101270676, - "rewards/rejected": -0.0676269605755806, + "log_odds_chosen": 0.4888283610343933, + "log_odds_ratio": -0.5892666578292847, + "logits/chosen": 354.91192626953125, + "logits/rejected": 373.19049072265625, + "logps/chosen": -1.0054099559783936, + "logps/rejected": -1.3448001146316528, + "loss": 1.0997, + "nll_loss": 1.2546958923339844, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.05027049034833908, + "rewards/margins": 0.01696951314806938, + "rewards/rejected": -0.06724000722169876, "step": 315 }, { "epoch": 1.9076005961251863, - "grad_norm": 17.332054138183594, + "grad_norm": 15.382440567016602, "learning_rate": 2.7950849718747376e-06, - "log_odds_chosen": 1.1397926807403564, - "log_odds_ratio": -0.36622655391693115, - "logits/chosen": 373.9564514160156, - "logits/rejected": 395.34271240234375, - "logps/chosen": -0.6329408884048462, - "logps/rejected": -1.2445515394210815, - "loss": 0.9928, - "nll_loss": 0.9283340573310852, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.03164704144001007, - "rewards/margins": 0.030580539256334305, - "rewards/rejected": -0.062227584421634674, + "log_odds_chosen": 1.0956491231918335, + "log_odds_ratio": -0.3748942017555237, + "logits/chosen": 376.21466064453125, + "logits/rejected": 396.38897705078125, + "logps/chosen": -0.6471365690231323, + "logps/rejected": -1.257728934288025, + "loss": 0.986, + "nll_loss": 0.9363555908203125, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.032356828451156616, + "rewards/margins": 0.03052961453795433, + "rewards/rejected": -0.06288645416498184, "step": 320 }, { "epoch": 1.9374068554396424, - "grad_norm": 20.475017547607422, + "grad_norm": 36.433021545410156, "learning_rate": 2.773500981126146e-06, - "log_odds_chosen": 1.1559429168701172, - "log_odds_ratio": -0.3606329560279846, - "logits/chosen": 372.6563720703125, - "logits/rejected": 405.1517333984375, - "logps/chosen": -0.6990076303482056, - "logps/rejected": -1.3749182224273682, - "loss": 1.0121, - "nll_loss": 0.9322077631950378, + "log_odds_chosen": 1.154837965965271, + "log_odds_ratio": -0.362586110830307, + "logits/chosen": 373.2748107910156, + "logits/rejected": 404.8694152832031, + "logps/chosen": -0.705539882183075, + "logps/rejected": -1.3716325759887695, + "loss": 1.0139, + "nll_loss": 0.9342381358146667, "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.0349503830075264, - "rewards/margins": 0.03379552438855171, - "rewards/rejected": -0.06874591112136841, + "rewards/chosen": -0.03527699410915375, + "rewards/margins": 0.03330463916063309, + "rewards/rejected": -0.06858162581920624, "step": 325 }, { "epoch": 1.9672131147540983, - "grad_norm": 20.384191513061523, + "grad_norm": 20.0263671875, "learning_rate": 2.752409412815902e-06, - "log_odds_chosen": 0.8144651651382446, - "log_odds_ratio": -0.4188029170036316, - "logits/chosen": 367.2298889160156, - "logits/rejected": 376.0736083984375, - "logps/chosen": -0.7355102896690369, - "logps/rejected": -1.211102843284607, - "loss": 1.0378, - "nll_loss": 0.8931636810302734, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.036775510758161545, - "rewards/margins": 0.023779626935720444, - "rewards/rejected": -0.06055514141917229, + "log_odds_chosen": 0.8623636960983276, + "log_odds_ratio": -0.414236456155777, + "logits/chosen": 370.912841796875, + "logits/rejected": 377.9576721191406, + "logps/chosen": -0.7194432020187378, + "logps/rejected": -1.2195098400115967, + "loss": 1.0256, + "nll_loss": 0.8793627023696899, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.03597215935587883, + "rewards/margins": 0.025003332644701004, + "rewards/rejected": -0.060975492000579834, "step": 330 }, { "epoch": 1.9970193740685542, - "grad_norm": 15.695927619934082, + "grad_norm": 24.618507385253906, "learning_rate": 2.7317918235407652e-06, - "log_odds_chosen": 0.5675193071365356, - "log_odds_ratio": -0.5574907660484314, - "logits/chosen": 395.9285888671875, - "logits/rejected": 387.2447204589844, - "logps/chosen": -0.9066513776779175, - "logps/rejected": -1.2281653881072998, - "loss": 1.0908, - "nll_loss": 1.2198901176452637, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.045332565903663635, - "rewards/margins": 0.016075702384114265, - "rewards/rejected": -0.06140827015042305, + "log_odds_chosen": 0.5057398080825806, + "log_odds_ratio": -0.5592184662818909, + "logits/chosen": 395.17340087890625, + "logits/rejected": 387.1885986328125, + "logps/chosen": -0.9086158871650696, + "logps/rejected": -1.1841217279434204, + "loss": 1.1098, + "nll_loss": 1.2389247417449951, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.04543079435825348, + "rewards/margins": 0.013775287196040154, + "rewards/rejected": -0.05920607969164848, "step": 335 }, { "epoch": 1.9970193740685542, - "eval_log_odds_chosen": 0.2359991818666458, - "eval_log_odds_ratio": -0.6970126628875732, - "eval_logits/chosen": 314.6778564453125, - "eval_logits/rejected": 285.82061767578125, - "eval_logps/chosen": -0.9949654936790466, - "eval_logps/rejected": -1.1527600288391113, - "eval_loss": 1.4250013828277588, - "eval_nll_loss": 1.3697166442871094, - "eval_rewards/accuracies": 0.5323740839958191, - "eval_rewards/chosen": -0.04974827170372009, - "eval_rewards/margins": 0.00788972433656454, - "eval_rewards/rejected": -0.05763799697160721, - "eval_runtime": 112.2726, - "eval_samples_per_second": 4.926, - "eval_steps_per_second": 1.238, + "eval_log_odds_chosen": 0.21048486232757568, + "eval_log_odds_ratio": -0.7227855920791626, + "eval_logits/chosen": 315.02960205078125, + "eval_logits/rejected": 286.43115234375, + "eval_logps/chosen": -1.0353137254714966, + "eval_logps/rejected": -1.1580623388290405, + "eval_loss": 1.4451346397399902, + "eval_nll_loss": 1.3838590383529663, + "eval_rewards/accuracies": 0.5467625856399536, + "eval_rewards/chosen": -0.05176568776369095, + "eval_rewards/margins": 0.006137436721473932, + "eval_rewards/rejected": -0.05790312588214874, + "eval_runtime": 112.1251, + "eval_samples_per_second": 4.932, + "eval_steps_per_second": 1.24, "step": 335 }, { "epoch": 2.0268256333830106, - "grad_norm": 21.729570388793945, + "grad_norm": 17.44247817993164, "learning_rate": 2.711630722733202e-06, - "log_odds_chosen": 2.0113790035247803, - "log_odds_ratio": -0.19709806144237518, - "logits/chosen": 389.3846435546875, - "logits/rejected": 366.3945617675781, - "logps/chosen": -0.38005977869033813, - "logps/rejected": -1.4012727737426758, - "loss": 0.6433, - "nll_loss": 0.6980705261230469, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.019002988934516907, - "rewards/margins": 0.051060646772384644, - "rewards/rejected": -0.07006363570690155, + "log_odds_chosen": 1.980719804763794, + "log_odds_ratio": -0.21638807654380798, + "logits/chosen": 392.9175109863281, + "logits/rejected": 369.302490234375, + "logps/chosen": -0.39937111735343933, + "logps/rejected": -1.395355224609375, + "loss": 0.6343, + "nll_loss": 0.7234522700309753, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.019968556240200996, + "rewards/margins": 0.04979920759797096, + "rewards/rejected": -0.06976776570081711, "step": 340 }, { "epoch": 2.0566318926974665, - "grad_norm": 11.89656925201416, + "grad_norm": 11.428772926330566, "learning_rate": 2.691909510290828e-06, - "log_odds_chosen": 2.5525763034820557, - "log_odds_ratio": -0.12284793704748154, - "logits/chosen": 351.57080078125, - "logits/rejected": 357.44329833984375, - "logps/chosen": -0.3399081528186798, - "logps/rejected": -1.6293659210205078, - "loss": 0.5495, - "nll_loss": 0.5662155151367188, + "log_odds_chosen": 2.5441951751708984, + "log_odds_ratio": -0.12063421308994293, + "logits/chosen": 354.2935485839844, + "logits/rejected": 359.0185852050781, + "logps/chosen": -0.3628384470939636, + "logps/rejected": -1.6579961776733398, + "loss": 0.5571, + "nll_loss": 0.5666171312332153, "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.01699540950357914, - "rewards/margins": 0.06447288393974304, - "rewards/rejected": -0.08146829158067703, + "rewards/chosen": -0.01814192347228527, + "rewards/margins": 0.06475789844989777, + "rewards/rejected": -0.08289982378482819, "step": 345 }, { "epoch": 2.0864381520119224, - "grad_norm": 13.419454574584961, + "grad_norm": 13.283677101135254, "learning_rate": 2.6726124191242444e-06, - "log_odds_chosen": 2.548877716064453, - "log_odds_ratio": -0.11839280277490616, - "logits/chosen": 350.29986572265625, - "logits/rejected": 386.45709228515625, - "logps/chosen": -0.382639080286026, - "logps/rejected": -1.8921934366226196, - "loss": 0.5743, - "nll_loss": 0.5715562105178833, + "log_odds_chosen": 2.592142343521118, + "log_odds_ratio": -0.11488159000873566, + "logits/chosen": 353.8732604980469, + "logits/rejected": 388.585693359375, + "logps/chosen": -0.3672012686729431, + "logps/rejected": -1.8615690469741821, + "loss": 0.5687, + "nll_loss": 0.5486581921577454, "rewards/accuracies": 1.0, - "rewards/chosen": -0.01913195475935936, - "rewards/margins": 0.0754777267575264, - "rewards/rejected": -0.09460968524217606, + "rewards/chosen": -0.018360063433647156, + "rewards/margins": 0.07471838593482971, + "rewards/rejected": -0.09307844936847687, "step": 350 }, { "epoch": 2.1162444113263787, - "grad_norm": 13.355463027954102, + "grad_norm": 12.212410926818848, "learning_rate": 2.6537244621713765e-06, - "log_odds_chosen": 2.2259714603424072, - "log_odds_ratio": -0.15891632437705994, - "logits/chosen": 352.84619140625, - "logits/rejected": 371.22576904296875, - "logps/chosen": -0.37806540727615356, - "logps/rejected": -1.5315955877304077, - "loss": 0.5507, - "nll_loss": 0.6317521333694458, + "log_odds_chosen": 2.209368944168091, + "log_odds_ratio": -0.15512482821941376, + "logits/chosen": 352.80633544921875, + "logits/rejected": 371.6228942871094, + "logps/chosen": -0.3736402690410614, + "logps/rejected": -1.5454914569854736, + "loss": 0.5485, + "nll_loss": 0.609760582447052, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.018903274089097977, - "rewards/margins": 0.05767650529742241, - "rewards/rejected": -0.07657978683710098, + "rewards/chosen": -0.01868201419711113, + "rewards/margins": 0.058592550456523895, + "rewards/rejected": -0.07727457582950592, "step": 355 }, { "epoch": 2.1460506706408347, - "grad_norm": 10.8477201461792, + "grad_norm": 12.874505043029785, "learning_rate": 2.6352313834736496e-06, - "log_odds_chosen": 2.581636428833008, - "log_odds_ratio": -0.1250651776790619, - "logits/chosen": 353.0003356933594, - "logits/rejected": 398.9602355957031, - "logps/chosen": -0.3573206067085266, - "logps/rejected": -1.6087188720703125, - "loss": 0.5407, - "nll_loss": 0.5504949688911438, + "log_odds_chosen": 2.694078207015991, + "log_odds_ratio": -0.11345534026622772, + "logits/chosen": 355.081787109375, + "logits/rejected": 400.65533447265625, + "logps/chosen": -0.3401663899421692, + "logps/rejected": -1.6482181549072266, + "loss": 0.5505, + "nll_loss": 0.5371311902999878, "rewards/accuracies": 1.0, - "rewards/chosen": -0.01786603033542633, - "rewards/margins": 0.06256992369890213, - "rewards/rejected": -0.08043594658374786, + "rewards/chosen": -0.01700832135975361, + "rewards/margins": 0.06540258973836899, + "rewards/rejected": -0.08241091668605804, "step": 360 }, { "epoch": 2.1758569299552906, - "grad_norm": 12.167034149169922, + "grad_norm": 12.150455474853516, "learning_rate": 2.6171196129510684e-06, - "log_odds_chosen": 1.9800822734832764, - "log_odds_ratio": -0.16938333213329315, - "logits/chosen": 341.21527099609375, - "logits/rejected": 329.54119873046875, - "logps/chosen": -0.348991334438324, - "logps/rejected": -1.3196141719818115, - "loss": 0.5516, - "nll_loss": 0.5312565565109253, + "log_odds_chosen": 2.1292691230773926, + "log_odds_ratio": -0.15649950504302979, + "logits/chosen": 340.80157470703125, + "logits/rejected": 330.2677001953125, + "logps/chosen": -0.3447723984718323, + "logps/rejected": -1.3634696006774902, + "loss": 0.5401, + "nll_loss": 0.5159801840782166, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.017449568957090378, - "rewards/margins": 0.04853113740682602, - "rewards/rejected": -0.0659807100892067, + "rewards/chosen": -0.017238620668649673, + "rewards/margins": 0.05093486234545708, + "rewards/rejected": -0.06817348301410675, "step": 365 }, { "epoch": 2.2056631892697465, - "grad_norm": 15.195405960083008, + "grad_norm": 15.934440612792969, "learning_rate": 2.599376224550182e-06, - "log_odds_chosen": 2.0713467597961426, - "log_odds_ratio": -0.19306516647338867, - "logits/chosen": 316.6725769042969, - "logits/rejected": 339.6087646484375, - "logps/chosen": -0.36510804295539856, - "logps/rejected": -1.4302679300308228, - "loss": 0.5732, - "nll_loss": 0.5869459509849548, + "log_odds_chosen": 2.0337166786193848, + "log_odds_ratio": -0.19345471262931824, + "logits/chosen": 315.1424560546875, + "logits/rejected": 338.2904968261719, + "logps/chosen": -0.3659020662307739, + "logps/rejected": -1.4170308113098145, + "loss": 0.5707, + "nll_loss": 0.5888785719871521, "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.01825539954006672, - "rewards/margins": 0.05325800180435181, - "rewards/rejected": -0.07151339948177338, + "rewards/chosen": -0.018295101821422577, + "rewards/margins": 0.052556443959474564, + "rewards/rejected": -0.07085154205560684, "step": 370 }, { "epoch": 2.235469448584203, - "grad_norm": 12.842897415161133, + "grad_norm": 13.303545951843262, "learning_rate": 2.5819888974716113e-06, - "log_odds_chosen": 1.9603370428085327, - "log_odds_ratio": -0.18798741698265076, - "logits/chosen": 368.00836181640625, - "logits/rejected": 389.7608337402344, - "logps/chosen": -0.4214121699333191, - "logps/rejected": -1.4475972652435303, - "loss": 0.5831, - "nll_loss": 0.6068717241287231, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.021070610731840134, - "rewards/margins": 0.051309265196323395, - "rewards/rejected": -0.07237987220287323, + "log_odds_chosen": 1.9749561548233032, + "log_odds_ratio": -0.1846763789653778, + "logits/chosen": 365.7724304199219, + "logits/rejected": 387.26141357421875, + "logps/chosen": -0.42183151841163635, + "logps/rejected": -1.4507567882537842, + "loss": 0.6027, + "nll_loss": 0.5997955203056335, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.021091576665639877, + "rewards/margins": 0.05144626647233963, + "rewards/rejected": -0.07253783941268921, "step": 375 }, { "epoch": 2.2652757078986587, - "grad_norm": 15.618853569030762, + "grad_norm": 18.135498046875, "learning_rate": 2.564945880212886e-06, - "log_odds_chosen": 2.2622876167297363, - "log_odds_ratio": -0.1320658028125763, - "logits/chosen": 366.3780517578125, - "logits/rejected": 351.96820068359375, - "logps/chosen": -0.3000200688838959, - "logps/rejected": -1.3632047176361084, - "loss": 0.5571, - "nll_loss": 0.525825560092926, + "log_odds_chosen": 2.311295509338379, + "log_odds_ratio": -0.12876024842262268, + "logits/chosen": 364.0061950683594, + "logits/rejected": 350.2301330566406, + "logps/chosen": -0.29145348072052, + "logps/rejected": -1.3336101770401, + "loss": 0.5545, + "nll_loss": 0.5340723991394043, "rewards/accuracies": 1.0, - "rewards/chosen": -0.015001003630459309, - "rewards/margins": 0.05315924435853958, - "rewards/rejected": -0.06816024333238602, + "rewards/chosen": -0.014572675339877605, + "rewards/margins": 0.05210784077644348, + "rewards/rejected": -0.06668051332235336, "step": 380 }, { "epoch": 2.2950819672131146, - "grad_norm": 13.373687744140625, + "grad_norm": 10.94619369506836, "learning_rate": 2.5482359571881276e-06, - "log_odds_chosen": 2.5866951942443848, - "log_odds_ratio": -0.11987988650798798, - "logits/chosen": 358.47344970703125, - "logits/rejected": 352.4609375, - "logps/chosen": -0.283217191696167, - "logps/rejected": -1.4752601385116577, - "loss": 0.5301, - "nll_loss": 0.49565237760543823, + "log_odds_chosen": 2.5354793071746826, + "log_odds_ratio": -0.115506611764431, + "logits/chosen": 353.3926696777344, + "logits/rejected": 348.86944580078125, + "logps/chosen": -0.2818690240383148, + "logps/rejected": -1.487006425857544, + "loss": 0.5179, + "nll_loss": 0.476929247379303, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.014160861261188984, - "rewards/margins": 0.059602152556180954, - "rewards/rejected": -0.07376301288604736, + "rewards/chosen": -0.014093451201915741, + "rewards/margins": 0.06025686860084534, + "rewards/rejected": -0.07435031235218048, "step": 385 }, { "epoch": 2.3248882265275705, - "grad_norm": 11.959152221679688, + "grad_norm": 12.89717960357666, "learning_rate": 2.5318484177091667e-06, - "log_odds_chosen": 2.3983092308044434, - "log_odds_ratio": -0.11385631561279297, - "logits/chosen": 370.3407287597656, - "logits/rejected": 393.58978271484375, - "logps/chosen": -0.36266201734542847, - "logps/rejected": -1.6288502216339111, - "loss": 0.578, - "nll_loss": 0.5790597200393677, + "log_odds_chosen": 2.246914863586426, + "log_odds_ratio": -0.13051298260688782, + "logits/chosen": 370.3692626953125, + "logits/rejected": 393.1583557128906, + "logps/chosen": -0.37999650835990906, + "logps/rejected": -1.5727269649505615, + "loss": 0.5955, + "nll_loss": 0.6084927320480347, "rewards/accuracies": 1.0, - "rewards/chosen": -0.018133098259568214, - "rewards/margins": 0.06330940872430801, - "rewards/rejected": -0.08144249767065048, + "rewards/chosen": -0.018999826163053513, + "rewards/margins": 0.05963651463389397, + "rewards/rejected": -0.07863634079694748, "step": 390 }, { "epoch": 2.354694485842027, - "grad_norm": 11.902227401733398, + "grad_norm": 9.882362365722656, "learning_rate": 2.515773027133138e-06, - "log_odds_chosen": 2.4830586910247803, - "log_odds_ratio": -0.13829158246517181, - "logits/chosen": 369.2203063964844, - "logits/rejected": 362.56298828125, - "logps/chosen": -0.2860831320285797, - "logps/rejected": -1.3531745672225952, - "loss": 0.5233, - "nll_loss": 0.48577412962913513, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01430415641516447, - "rewards/margins": 0.053354568779468536, - "rewards/rejected": -0.06765872985124588, + "log_odds_chosen": 2.3919968605041504, + "log_odds_ratio": -0.13801579177379608, + "logits/chosen": 369.07232666015625, + "logits/rejected": 362.56475830078125, + "logps/chosen": -0.2836388051509857, + "logps/rejected": -1.353062391281128, + "loss": 0.5206, + "nll_loss": 0.473809152841568, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.014181938953697681, + "rewards/margins": 0.05347117781639099, + "rewards/rejected": -0.0676531195640564, "step": 395 }, { "epoch": 2.384500745156483, - "grad_norm": 18.2595157623291, + "grad_norm": 20.866735458374023, "learning_rate": 2.5e-06, - "log_odds_chosen": 2.4875540733337402, - "log_odds_ratio": -0.13931448757648468, - "logits/chosen": 366.81646728515625, - "logits/rejected": 388.4540710449219, - "logps/chosen": -0.3392675817012787, - "logps/rejected": -1.6781524419784546, - "loss": 0.5707, - "nll_loss": 0.5266181826591492, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.016963381320238113, - "rewards/margins": 0.06694425642490387, - "rewards/rejected": -0.08390761911869049, + "log_odds_chosen": 2.305642604827881, + "log_odds_ratio": -0.17361058294773102, + "logits/chosen": 367.1854553222656, + "logits/rejected": 388.62860107421875, + "logps/chosen": -0.37132248282432556, + "logps/rejected": -1.6480903625488281, + "loss": 0.5804, + "nll_loss": 0.5412487387657166, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.018566126003861427, + "rewards/margins": 0.0638383999466896, + "rewards/rejected": -0.08240451663732529, "step": 400 }, { "epoch": 2.4143070044709387, - "grad_norm": 10.78487777709961, + "grad_norm": 17.410255432128906, "learning_rate": 2.484519974999767e-06, - "log_odds_chosen": 2.3641769886016846, - "log_odds_ratio": -0.18085625767707825, - "logits/chosen": 417.9383850097656, - "logits/rejected": 384.9745178222656, - "logps/chosen": -0.36932411789894104, - "logps/rejected": -1.5650533437728882, - "loss": 0.5707, - "nll_loss": 0.5322312712669373, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.018466206267476082, - "rewards/margins": 0.05978646129369736, - "rewards/rejected": -0.07825267314910889, + "log_odds_chosen": 2.341656446456909, + "log_odds_ratio": -0.18742091953754425, + "logits/chosen": 417.4825744628906, + "logits/rejected": 384.49346923828125, + "logps/chosen": -0.38954219222068787, + "logps/rejected": -1.552782416343689, + "loss": 0.5795, + "nll_loss": 0.5449979305267334, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.019477110356092453, + "rewards/margins": 0.05816201493144035, + "rewards/rejected": -0.07763911783695221, "step": 405 }, { "epoch": 2.444113263785395, - "grad_norm": 12.914924621582031, + "grad_norm": 11.311455726623535, "learning_rate": 2.4693239916239746e-06, - "log_odds_chosen": 2.4095664024353027, - "log_odds_ratio": -0.17002181708812714, - "logits/chosen": 363.0850830078125, - "logits/rejected": 378.43634033203125, - "logps/chosen": -0.3721050024032593, - "logps/rejected": -1.5407812595367432, - "loss": 0.5689, - "nll_loss": 0.5765537619590759, + "log_odds_chosen": 2.352574586868286, + "log_odds_ratio": -0.18433162569999695, + "logits/chosen": 365.95965576171875, + "logits/rejected": 380.1703186035156, + "logps/chosen": -0.37695974111557007, + "logps/rejected": -1.5367991924285889, + "loss": 0.5696, + "nll_loss": 0.5719352960586548, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.018605249002575874, - "rewards/margins": 0.058433812111616135, - "rewards/rejected": -0.07703907042741776, + "rewards/chosen": -0.018847983330488205, + "rewards/margins": 0.05799197405576706, + "rewards/rejected": -0.07683996111154556, "step": 410 }, { "epoch": 2.473919523099851, - "grad_norm": 11.604476928710938, + "grad_norm": 11.967494010925293, "learning_rate": 2.4544034683690802e-06, - "log_odds_chosen": 2.4141106605529785, - "log_odds_ratio": -0.13905009627342224, - "logits/chosen": 363.8720703125, - "logits/rejected": 393.9859924316406, - "logps/chosen": -0.32817938923835754, - "logps/rejected": -1.5454423427581787, - "loss": 0.5702, - "nll_loss": 0.5272970795631409, + "log_odds_chosen": 2.2503182888031006, + "log_odds_ratio": -0.15851208567619324, + "logits/chosen": 364.34222412109375, + "logits/rejected": 394.3598327636719, + "logps/chosen": -0.3465135991573334, + "logps/rejected": -1.4553066492080688, + "loss": 0.5766, + "nll_loss": 0.5365554690361023, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.016408968716859818, - "rewards/margins": 0.06086314842104912, - "rewards/rejected": -0.07727211713790894, + "rewards/chosen": -0.01732568070292473, + "rewards/margins": 0.05543965846300125, + "rewards/rejected": -0.07276533544063568, "step": 415 }, { "epoch": 2.503725782414307, - "grad_norm": 11.285563468933105, + "grad_norm": 11.675920486450195, "learning_rate": 2.4397501823713327e-06, - "log_odds_chosen": 2.0902717113494873, - "log_odds_ratio": -0.18547013401985168, - "logits/chosen": 364.81866455078125, - "logits/rejected": 342.7242736816406, - "logps/chosen": -0.3733817934989929, - "logps/rejected": -1.4410852193832397, - "loss": 0.56, - "nll_loss": 0.6532183885574341, + "log_odds_chosen": 2.0490882396698, + "log_odds_ratio": -0.1818782538175583, + "logits/chosen": 367.0909423828125, + "logits/rejected": 343.985107421875, + "logps/chosen": -0.36017632484436035, + "logps/rejected": -1.39711594581604, + "loss": 0.554, + "nll_loss": 0.6418091654777527, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.018669091165065765, - "rewards/margins": 0.0533851683139801, - "rewards/rejected": -0.07205425947904587, + "rewards/chosen": -0.018008816987276077, + "rewards/margins": 0.05184697359800339, + "rewards/rejected": -0.06985578685998917, "step": 420 }, { "epoch": 2.533532041728763, - "grad_norm": 12.517095565795898, + "grad_norm": 11.233902931213379, "learning_rate": 2.4253562503633297e-06, - "log_odds_chosen": 2.795741081237793, - "log_odds_ratio": -0.08831789344549179, - "logits/chosen": 362.86871337890625, - "logits/rejected": 359.4671630859375, - "logps/chosen": -0.32691091299057007, - "logps/rejected": -1.8046060800552368, - "loss": 0.5335, - "nll_loss": 0.5374017357826233, + "log_odds_chosen": 2.5332672595977783, + "log_odds_ratio": -0.10215308517217636, + "logits/chosen": 365.8087463378906, + "logits/rejected": 362.74371337890625, + "logps/chosen": -0.3472338318824768, + "logps/rejected": -1.7049144506454468, + "loss": 0.5363, + "nll_loss": 0.5403138399124146, "rewards/accuracies": 1.0, - "rewards/chosen": -0.016345545649528503, - "rewards/margins": 0.0738847479224205, - "rewards/rejected": -0.0902303010225296, + "rewards/chosen": -0.01736168935894966, + "rewards/margins": 0.06788404285907745, + "rewards/rejected": -0.08524572849273682, "step": 425 }, { "epoch": 2.563338301043219, - "grad_norm": 13.33828353881836, + "grad_norm": 16.26917266845703, "learning_rate": 2.411214110852061e-06, - "log_odds_chosen": 2.7160139083862305, - "log_odds_ratio": -0.10933760553598404, - "logits/chosen": 362.9604187011719, - "logits/rejected": 374.8692626953125, - "logps/chosen": -0.27513235807418823, - "logps/rejected": -1.615644097328186, - "loss": 0.5522, - "nll_loss": 0.48540863394737244, + "log_odds_chosen": 2.512302875518799, + "log_odds_ratio": -0.1274806559085846, + "logits/chosen": 365.8606262207031, + "logits/rejected": 377.60894775390625, + "logps/chosen": -0.30852970480918884, + "logps/rejected": -1.5747673511505127, + "loss": 0.551, + "nll_loss": 0.5144289135932922, "rewards/accuracies": 1.0, - "rewards/chosen": -0.01375661976635456, - "rewards/margins": 0.06702558696269989, - "rewards/rejected": -0.0807822048664093, + "rewards/chosen": -0.015426484867930412, + "rewards/margins": 0.06331188976764679, + "rewards/rejected": -0.07873837649822235, "step": 430 }, { "epoch": 2.593144560357675, - "grad_norm": 15.951871871948242, + "grad_norm": 13.473649024963379, "learning_rate": 2.3973165074269213e-06, - "log_odds_chosen": 2.399064779281616, - "log_odds_ratio": -0.150381401181221, - "logits/chosen": 368.9129943847656, - "logits/rejected": 337.7628173828125, - "logps/chosen": -0.3689618408679962, - "logps/rejected": -1.5598814487457275, - "loss": 0.5514, - "nll_loss": 0.5270097255706787, + "log_odds_chosen": 2.2823190689086914, + "log_odds_ratio": -0.1513710767030716, + "logits/chosen": 372.6357421875, + "logits/rejected": 341.8959045410156, + "logps/chosen": -0.3947034776210785, + "logps/rejected": -1.5539586544036865, + "loss": 0.5703, + "nll_loss": 0.5524027943611145, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.01844809204339981, - "rewards/margins": 0.059545982629060745, - "rewards/rejected": -0.07799407094717026, + "rewards/chosen": -0.019735176116228104, + "rewards/margins": 0.05796275660395622, + "rewards/rejected": -0.07769793272018433, "step": 435 }, { "epoch": 2.6229508196721314, - "grad_norm": 12.57322883605957, + "grad_norm": 15.039813041687012, "learning_rate": 2.3836564731139807e-06, - "log_odds_chosen": 2.7293245792388916, - "log_odds_ratio": -0.10528914630413055, - "logits/chosen": 354.0178527832031, - "logits/rejected": 365.93829345703125, - "logps/chosen": -0.2712039351463318, - "logps/rejected": -1.582219123840332, - "loss": 0.5703, - "nll_loss": 0.5448659062385559, + "log_odds_chosen": 2.4606306552886963, + "log_odds_ratio": -0.11453738063573837, + "logits/chosen": 356.7464599609375, + "logits/rejected": 367.7265930175781, + "logps/chosen": -0.27872234582901, + "logps/rejected": -1.4566452503204346, + "loss": 0.5883, + "nll_loss": 0.5446338653564453, "rewards/accuracies": 1.0, - "rewards/chosen": -0.013560195453464985, - "rewards/margins": 0.06555076688528061, - "rewards/rejected": -0.07911095768213272, + "rewards/chosen": -0.0139361172914505, + "rewards/margins": 0.05889614298939705, + "rewards/rejected": -0.07283225655555725, "step": 440 }, { "epoch": 2.6527570789865873, - "grad_norm": 13.282082557678223, + "grad_norm": 13.054855346679688, "learning_rate": 2.3702273156998867e-06, - "log_odds_chosen": 2.619792938232422, - "log_odds_ratio": -0.10272769629955292, - "logits/chosen": 335.6366271972656, - "logits/rejected": 372.410400390625, - "logps/chosen": -0.36089158058166504, - "logps/rejected": -1.8113043308258057, - "loss": 0.5563, - "nll_loss": 0.5579748749732971, + "log_odds_chosen": 2.510906219482422, + "log_odds_ratio": -0.11371259391307831, + "logits/chosen": 337.3484191894531, + "logits/rejected": 373.2784423828125, + "logps/chosen": -0.35476621985435486, + "logps/rejected": -1.7162315845489502, + "loss": 0.5632, + "nll_loss": 0.5669391751289368, "rewards/accuracies": 1.0, - "rewards/chosen": -0.018044577911496162, - "rewards/margins": 0.07252063602209091, - "rewards/rejected": -0.09056521207094193, + "rewards/chosen": -0.017738312482833862, + "rewards/margins": 0.06807325780391693, + "rewards/rejected": -0.08581157773733139, "step": 445 }, { "epoch": 2.682563338301043, - "grad_norm": 10.777383804321289, + "grad_norm": 12.158041954040527, "learning_rate": 2.357022603955159e-06, - "log_odds_chosen": 2.4564261436462402, - "log_odds_ratio": -0.11157449334859848, - "logits/chosen": 362.14312744140625, - "logits/rejected": 363.303466796875, - "logps/chosen": -0.39076924324035645, - "logps/rejected": -1.7145166397094727, - "loss": 0.5754, - "nll_loss": 0.5376263856887817, + "log_odds_chosen": 2.407587766647339, + "log_odds_ratio": -0.11502983421087265, + "logits/chosen": 363.87554931640625, + "logits/rejected": 364.67071533203125, + "logps/chosen": -0.4218372404575348, + "logps/rejected": -1.7667124271392822, + "loss": 0.5905, + "nll_loss": 0.5684647560119629, "rewards/accuracies": 1.0, - "rewards/chosen": -0.019538460299372673, - "rewards/margins": 0.06618736684322357, - "rewards/rejected": -0.0857258215546608, + "rewards/chosen": -0.02109185978770256, + "rewards/margins": 0.06724376231431961, + "rewards/rejected": -0.08833561837673187, "step": 450 }, { "epoch": 2.712369597615499, - "grad_norm": 12.512327194213867, + "grad_norm": 14.808917045593262, "learning_rate": 2.3440361546924774e-06, - "log_odds_chosen": 2.614637613296509, - "log_odds_ratio": -0.11486033350229263, - "logits/chosen": 395.16949462890625, - "logits/rejected": 374.3088684082031, - "logps/chosen": -0.3622822165489197, - "logps/rejected": -1.6618531942367554, - "loss": 0.6153, - "nll_loss": 0.568195641040802, + "log_odds_chosen": 2.5720152854919434, + "log_odds_ratio": -0.1182328313589096, + "logits/chosen": 389.94683837890625, + "logits/rejected": 369.15606689453125, + "logps/chosen": -0.3745032250881195, + "logps/rejected": -1.6708095073699951, + "loss": 0.6099, + "nll_loss": 0.5873192548751831, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.018114114180207253, - "rewards/margins": 0.06497855484485626, - "rewards/rejected": -0.08309266716241837, + "rewards/chosen": -0.018725162371993065, + "rewards/margins": 0.06481531262397766, + "rewards/rejected": -0.08354047685861588, "step": 455 }, { "epoch": 2.742175856929955, - "grad_norm": 12.090532302856445, + "grad_norm": 15.351086616516113, "learning_rate": 2.3312620206007847e-06, - "log_odds_chosen": 2.508338451385498, - "log_odds_ratio": -0.1204490214586258, - "logits/chosen": 382.52630615234375, - "logits/rejected": 401.80841064453125, - "logps/chosen": -0.3474404215812683, - "logps/rejected": -1.7473865747451782, - "loss": 0.5838, - "nll_loss": 0.6167483925819397, + "log_odds_chosen": 2.6212141513824463, + "log_odds_ratio": -0.12157906591892242, + "logits/chosen": 381.9286193847656, + "logits/rejected": 401.04998779296875, + "logps/chosen": -0.336896151304245, + "logps/rejected": -1.7683613300323486, + "loss": 0.5888, + "nll_loss": 0.6308404207229614, "rewards/accuracies": 1.0, - "rewards/chosen": -0.017372019588947296, - "rewards/margins": 0.06999730318784714, - "rewards/rejected": -0.08736933022737503, + "rewards/chosen": -0.01684480905532837, + "rewards/margins": 0.07157325744628906, + "rewards/rejected": -0.08841806650161743, "step": 460 }, { "epoch": 2.7719821162444114, - "grad_norm": 13.27834701538086, + "grad_norm": 13.619884490966797, "learning_rate": 2.3186944788008413e-06, - "log_odds_chosen": 2.5867724418640137, - "log_odds_ratio": -0.14203417301177979, - "logits/chosen": 376.5874328613281, - "logits/rejected": 381.06341552734375, - "logps/chosen": -0.2869132459163666, - "logps/rejected": -1.5630210638046265, - "loss": 0.5778, - "nll_loss": 0.55084627866745, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.014345663599669933, - "rewards/margins": 0.06380538642406464, - "rewards/rejected": -0.07815105468034744, + "log_odds_chosen": 2.487888813018799, + "log_odds_ratio": -0.1321084201335907, + "logits/chosen": 378.2283630371094, + "logits/rejected": 382.45391845703125, + "logps/chosen": -0.3096372187137604, + "logps/rejected": -1.5240898132324219, + "loss": 0.5702, + "nll_loss": 0.5487266778945923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015481861308217049, + "rewards/margins": 0.060722626745700836, + "rewards/rejected": -0.07620447874069214, "step": 465 }, { "epoch": 2.8017883755588673, - "grad_norm": 11.784134864807129, + "grad_norm": 11.559633255004883, "learning_rate": 2.3063280200722128e-06, - "log_odds_chosen": 2.1283843517303467, - "log_odds_ratio": -0.20095142722129822, - "logits/chosen": 383.31781005859375, - "logits/rejected": 354.9120788574219, - "logps/chosen": -0.40080317854881287, - "logps/rejected": -1.5093116760253906, - "loss": 0.5644, - "nll_loss": 0.575947642326355, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.020040160045027733, - "rewards/margins": 0.05542542785406113, - "rewards/rejected": -0.07546558976173401, + "log_odds_chosen": 2.1641154289245605, + "log_odds_ratio": -0.19840756058692932, + "logits/chosen": 382.61077880859375, + "logits/rejected": 354.3682556152344, + "logps/chosen": -0.408609539270401, + "logps/rejected": -1.5154647827148438, + "loss": 0.5838, + "nll_loss": 0.5971536636352539, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02043047733604908, + "rewards/margins": 0.05534275621175766, + "rewards/rejected": -0.07577323168516159, "step": 470 }, { "epoch": 2.8315946348733236, - "grad_norm": 13.008294105529785, + "grad_norm": 12.24728012084961, "learning_rate": 2.2941573387056174e-06, - "log_odds_chosen": 2.6808362007141113, - "log_odds_ratio": -0.10760221630334854, - "logits/chosen": 350.5984802246094, - "logits/rejected": 374.9319152832031, - "logps/chosen": -0.34488445520401, - "logps/rejected": -1.7149194478988647, - "loss": 0.5386, - "nll_loss": 0.491553395986557, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01724422350525856, - "rewards/margins": 0.06850175559520721, - "rewards/rejected": -0.08574597537517548, + "log_odds_chosen": 2.448145866394043, + "log_odds_ratio": -0.14108145236968994, + "logits/chosen": 352.58197021484375, + "logits/rejected": 376.586181640625, + "logps/chosen": -0.3962209224700928, + "logps/rejected": -1.6542632579803467, + "loss": 0.5532, + "nll_loss": 0.5462762713432312, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.01981104537844658, + "rewards/margins": 0.06290213763713837, + "rewards/rejected": -0.08271317183971405, "step": 475 }, { "epoch": 2.8614008941877795, - "grad_norm": 10.424010276794434, + "grad_norm": 11.175488471984863, "learning_rate": 2.2821773229381924e-06, - "log_odds_chosen": 2.2412309646606445, - "log_odds_ratio": -0.1566620171070099, - "logits/chosen": 362.31378173828125, - "logits/rejected": 402.6854248046875, - "logps/chosen": -0.3766781687736511, - "logps/rejected": -1.4856204986572266, - "loss": 0.5052, - "nll_loss": 0.48381978273391724, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.018833911046385765, - "rewards/margins": 0.055447112768888474, - "rewards/rejected": -0.07428102195262909, + "log_odds_chosen": 2.349735736846924, + "log_odds_ratio": -0.11864028871059418, + "logits/chosen": 361.24639892578125, + "logits/rejected": 402.2587585449219, + "logps/chosen": -0.33937591314315796, + "logps/rejected": -1.5452320575714111, + "loss": 0.5077, + "nll_loss": 0.4929002821445465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01696879416704178, + "rewards/margins": 0.060292817652225494, + "rewards/rejected": -0.07726161181926727, "step": 480 }, { "epoch": 2.8912071535022354, - "grad_norm": 12.359146118164062, + "grad_norm": 12.026611328125, "learning_rate": 2.270383045932499e-06, - "log_odds_chosen": 2.6057076454162598, - "log_odds_ratio": -0.12701039016246796, - "logits/chosen": 357.164306640625, - "logits/rejected": 380.32073974609375, - "logps/chosen": -0.37163636088371277, - "logps/rejected": -1.8207753896713257, - "loss": 0.5419, - "nll_loss": 0.5325015187263489, + "log_odds_chosen": 2.4791646003723145, + "log_odds_ratio": -0.12428289651870728, + "logits/chosen": 358.9771728515625, + "logits/rejected": 380.42901611328125, + "logps/chosen": -0.3796696364879608, + "logps/rejected": -1.7137501239776611, + "loss": 0.5514, + "nll_loss": 0.5423077344894409, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.01858181692659855, - "rewards/margins": 0.07245694845914841, - "rewards/rejected": -0.0910387635231018, + "rewards/chosen": -0.01898348517715931, + "rewards/margins": 0.06670401990413666, + "rewards/rejected": -0.08568750321865082, "step": 485 }, { "epoch": 2.9210134128166914, - "grad_norm": 11.646001815795898, + "grad_norm": 12.008419036865234, "learning_rate": 2.2587697572631284e-06, - "log_odds_chosen": 2.3249075412750244, - "log_odds_ratio": -0.19486014544963837, - "logits/chosen": 372.1253967285156, - "logits/rejected": 338.1502380371094, - "logps/chosen": -0.4259433150291443, - "logps/rejected": -1.5797032117843628, - "loss": 0.6087, - "nll_loss": 0.5371652245521545, + "log_odds_chosen": 2.308088541030884, + "log_odds_ratio": -0.1636713743209839, + "logits/chosen": 370.170654296875, + "logits/rejected": 335.97857666015625, + "logps/chosen": -0.41963282227516174, + "logps/rejected": -1.61661696434021, + "loss": 0.6122, + "nll_loss": 0.5415998697280884, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.021297167986631393, - "rewards/margins": 0.05768799036741257, - "rewards/rejected": -0.07898515462875366, + "rewards/chosen": -0.020981641486287117, + "rewards/margins": 0.05984921008348465, + "rewards/rejected": -0.08083084970712662, "step": 490 }, { "epoch": 2.9508196721311473, - "grad_norm": 11.838138580322266, + "grad_norm": 11.31982135772705, "learning_rate": 2.2473328748774737e-06, - "log_odds_chosen": 2.3507559299468994, - "log_odds_ratio": -0.1578751504421234, - "logits/chosen": 366.9432373046875, - "logits/rejected": 394.8822326660156, - "logps/chosen": -0.3771159052848816, - "logps/rejected": -1.533601999282837, - "loss": 0.5442, - "nll_loss": 0.5532703399658203, + "log_odds_chosen": 2.167809247970581, + "log_odds_ratio": -0.174642875790596, + "logits/chosen": 364.75048828125, + "logits/rejected": 393.1929626464844, + "logps/chosen": -0.4049316346645355, + "logps/rejected": -1.4748752117156982, + "loss": 0.5603, + "nll_loss": 0.5929296612739563, "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.018855798989534378, - "rewards/margins": 0.05782430246472359, - "rewards/rejected": -0.07668010145425797, + "rewards/chosen": -0.020246583968400955, + "rewards/margins": 0.05349717289209366, + "rewards/rejected": -0.07374376058578491, "step": 495 }, { "epoch": 2.9806259314456036, - "grad_norm": 13.802445411682129, + "grad_norm": 13.445329666137695, "learning_rate": 2.23606797749979e-06, - "log_odds_chosen": 2.5029566287994385, - "log_odds_ratio": -0.12695619463920593, - "logits/chosen": 374.8814697265625, - "logits/rejected": 372.7264099121094, - "logps/chosen": -0.32484811544418335, - "logps/rejected": -1.5648537874221802, - "loss": 0.5724, - "nll_loss": 0.47206535935401917, + "log_odds_chosen": 2.3928182125091553, + "log_odds_ratio": -0.14649812877178192, + "logits/chosen": 374.05535888671875, + "logits/rejected": 372.2560119628906, + "logps/chosen": -0.34778839349746704, + "logps/rejected": -1.5306968688964844, + "loss": 0.5921, + "nll_loss": 0.5048509836196899, "rewards/accuracies": 1.0, - "rewards/chosen": -0.016242407262325287, - "rewards/margins": 0.062000274658203125, - "rewards/rejected": -0.07824268192052841, + "rewards/chosen": -0.017389420419931412, + "rewards/margins": 0.059145428240299225, + "rewards/rejected": -0.07653484493494034, "step": 500 }, { "epoch": 2.9865871833084947, - "eval_log_odds_chosen": 0.2937372922897339, - "eval_log_odds_ratio": -0.6945178508758545, - "eval_logits/chosen": 300.6891174316406, - "eval_logits/rejected": 271.8756103515625, - "eval_logps/chosen": -1.0802680253982544, - "eval_logps/rejected": -1.2502641677856445, - "eval_loss": 1.539820671081543, - "eval_nll_loss": 1.4724125862121582, - "eval_rewards/accuracies": 0.5395683646202087, - "eval_rewards/chosen": -0.05401340499520302, - "eval_rewards/margins": 0.00849980115890503, - "eval_rewards/rejected": -0.06251321732997894, - "eval_runtime": 112.3165, - "eval_samples_per_second": 4.924, - "eval_steps_per_second": 1.238, + "eval_log_odds_chosen": 0.28559842705726624, + "eval_log_odds_ratio": -0.6970076560974121, + "eval_logits/chosen": 297.1682434082031, + "eval_logits/rejected": 268.0281982421875, + "eval_logps/chosen": -1.1085351705551147, + "eval_logps/rejected": -1.2919707298278809, + "eval_loss": 1.5517091751098633, + "eval_nll_loss": 1.4855411052703857, + "eval_rewards/accuracies": 0.5611510872840881, + "eval_rewards/chosen": -0.055426761507987976, + "eval_rewards/margins": 0.009171773679554462, + "eval_rewards/rejected": -0.06459853798151016, + "eval_runtime": 112.1561, + "eval_samples_per_second": 4.931, + "eval_steps_per_second": 1.239, "step": 501 }, { "epoch": 2.9865871833084947, "step": 501, "total_flos": 0.0, - "train_loss": 1.4594077459590402, - "train_runtime": 13816.0738, - "train_samples_per_second": 1.165, - "train_steps_per_second": 0.036 + "train_loss": 1.4570662823027956, + "train_runtime": 13599.7579, + "train_samples_per_second": 1.183, + "train_steps_per_second": 0.037 } ], "logging_steps": 5,