|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": 0.09486827999353409, |
|
"logits/rejected": 0.17880678176879883, |
|
"logps/chosen": -404.6722717285156, |
|
"logps/rejected": -393.01068115234375, |
|
"loss": 0.2923, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": 0.3109264671802521, |
|
"logits/rejected": 0.1413353830575943, |
|
"logps/chosen": -451.60137939453125, |
|
"logps/rejected": -439.4466857910156, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.3472222089767456, |
|
"rewards/chosen": -0.0008689137175679207, |
|
"rewards/margins": 9.278658399125561e-05, |
|
"rewards/rejected": -0.0009617002215236425, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.16002216935157776, |
|
"logits/rejected": 0.29283756017684937, |
|
"logps/chosen": -446.17352294921875, |
|
"logps/rejected": -447.1424865722656, |
|
"loss": 0.3012, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0012470056535676122, |
|
"rewards/margins": 7.514755270676687e-05, |
|
"rewards/rejected": -0.0013221531407907605, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": 0.16278645396232605, |
|
"logits/rejected": 0.23292532563209534, |
|
"logps/chosen": -390.2185974121094, |
|
"logps/rejected": -385.9638977050781, |
|
"loss": 0.3074, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.001785513712093234, |
|
"rewards/margins": -4.0346338209928945e-05, |
|
"rewards/rejected": -0.0017451674211770296, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.21185627579689026, |
|
"logits/rejected": 0.16483844816684723, |
|
"logps/chosen": -396.40234375, |
|
"logps/rejected": -413.7225036621094, |
|
"loss": 0.2927, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.003013796405866742, |
|
"rewards/margins": 0.0003060643211938441, |
|
"rewards/rejected": -0.003319860901683569, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": 0.13742004334926605, |
|
"logits/rejected": 0.31375652551651, |
|
"logps/chosen": -412.4908142089844, |
|
"logps/rejected": -392.556640625, |
|
"loss": 0.3019, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.005482032895088196, |
|
"rewards/margins": 0.00019160615920554847, |
|
"rewards/rejected": -0.005673639010637999, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.22858476638793945, |
|
"logits/rejected": 0.2066006362438202, |
|
"logps/chosen": -394.327880859375, |
|
"logps/rejected": -392.41656494140625, |
|
"loss": 0.32, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.009965803474187851, |
|
"rewards/margins": 0.00048381154192611575, |
|
"rewards/rejected": -0.010449616238474846, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": 0.29500612616539, |
|
"logits/rejected": 0.1506035029888153, |
|
"logps/chosen": -392.02960205078125, |
|
"logps/rejected": -427.17596435546875, |
|
"loss": 0.3328, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.01748177967965603, |
|
"rewards/margins": 0.0010660603875294328, |
|
"rewards/rejected": -0.018547840416431427, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.1481747180223465, |
|
"logits/rejected": 0.2276589423418045, |
|
"logps/chosen": -396.8224182128906, |
|
"logps/rejected": -373.9072570800781, |
|
"loss": 0.3068, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.019837453961372375, |
|
"rewards/margins": 0.0006632342119701207, |
|
"rewards/rejected": -0.020500686019659042, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": 0.13089337944984436, |
|
"logits/rejected": 0.19030170142650604, |
|
"logps/chosen": -379.7206115722656, |
|
"logps/rejected": -391.56512451171875, |
|
"loss": 0.2966, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.018794242292642593, |
|
"rewards/margins": 0.0023801042698323727, |
|
"rewards/rejected": -0.021174345165491104, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": 0.13803163170814514, |
|
"logits/rejected": 0.24431411921977997, |
|
"logps/chosen": -408.5091247558594, |
|
"logps/rejected": -396.9778747558594, |
|
"loss": 0.3097, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.017420392483472824, |
|
"rewards/margins": 0.0018956039566546679, |
|
"rewards/rejected": -0.019315997138619423, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": 0.155339315533638, |
|
"logits/rejected": 0.1211571916937828, |
|
"logps/chosen": -430.47344970703125, |
|
"logps/rejected": -437.48956298828125, |
|
"loss": 0.2946, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.018004143610596657, |
|
"rewards/margins": 0.004096529446542263, |
|
"rewards/rejected": -0.022100670263171196, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": 0.1062009185552597, |
|
"logits/rejected": 0.26274779438972473, |
|
"logps/chosen": -413.7748107910156, |
|
"logps/rejected": -401.53778076171875, |
|
"loss": 0.3079, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.01474347896873951, |
|
"rewards/margins": 0.00652940571308136, |
|
"rewards/rejected": -0.02127288654446602, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": 0.13843606412410736, |
|
"logits/rejected": 0.2405589520931244, |
|
"logps/chosen": -445.410888671875, |
|
"logps/rejected": -441.92486572265625, |
|
"loss": 0.3126, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.016456641256809235, |
|
"rewards/margins": 0.006972718983888626, |
|
"rewards/rejected": -0.02342936024069786, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": 0.1316756308078766, |
|
"logits/rejected": 0.0917954295873642, |
|
"logps/chosen": -441.77911376953125, |
|
"logps/rejected": -474.7543029785156, |
|
"loss": 0.3121, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.030273189768195152, |
|
"rewards/margins": 0.006867046467959881, |
|
"rewards/rejected": -0.03714023157954216, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": 0.07779763638973236, |
|
"logits/rejected": 0.16235283017158508, |
|
"logps/chosen": -468.13873291015625, |
|
"logps/rejected": -508.16888427734375, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.042156465351581573, |
|
"rewards/margins": 0.025058995932340622, |
|
"rewards/rejected": -0.0672154575586319, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": 0.013025517575442791, |
|
"logits/rejected": 0.05558537319302559, |
|
"logps/chosen": -490.2015686035156, |
|
"logps/rejected": -526.5242919921875, |
|
"loss": 0.296, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.07817033678293228, |
|
"rewards/margins": 0.030566949397325516, |
|
"rewards/rejected": -0.1087372750043869, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": 0.005712971091270447, |
|
"logits/rejected": -0.005013291724026203, |
|
"logps/chosen": -577.2238159179688, |
|
"logps/rejected": -608.1441650390625, |
|
"loss": 0.2962, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.12602511048316956, |
|
"rewards/margins": 0.029870549216866493, |
|
"rewards/rejected": -0.1558956503868103, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": 0.0872177928686142, |
|
"logits/rejected": 0.047161780297756195, |
|
"logps/chosen": -466.25384521484375, |
|
"logps/rejected": -508.01190185546875, |
|
"loss": 0.2824, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.06323900073766708, |
|
"rewards/margins": 0.03396384045481682, |
|
"rewards/rejected": -0.0972028374671936, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": 0.040699899196624756, |
|
"logits/rejected": 0.08730391412973404, |
|
"logps/chosen": -451.14190673828125, |
|
"logps/rejected": -483.7627868652344, |
|
"loss": 0.269, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.05393596366047859, |
|
"rewards/margins": 0.02897338569164276, |
|
"rewards/rejected": -0.08290934562683105, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -0.029990673065185547, |
|
"logits/rejected": 0.0431547686457634, |
|
"logps/chosen": -511.7386779785156, |
|
"logps/rejected": -536.8895263671875, |
|
"loss": 0.2937, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.06965841352939606, |
|
"rewards/margins": 0.030940961092710495, |
|
"rewards/rejected": -0.10059938579797745, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": -0.017718762159347534, |
|
"logits/rejected": 0.0021002888679504395, |
|
"logps/chosen": -475.02398681640625, |
|
"logps/rejected": -524.1427001953125, |
|
"loss": 0.2823, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.09790189564228058, |
|
"rewards/margins": 0.04916772618889809, |
|
"rewards/rejected": -0.14706961810588837, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": 0.039277154952287674, |
|
"logits/rejected": 0.08032336086034775, |
|
"logps/chosen": -542.1201782226562, |
|
"logps/rejected": -580.0626220703125, |
|
"loss": 0.2699, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11237654834985733, |
|
"rewards/margins": 0.05318045616149902, |
|
"rewards/rejected": -0.16555699706077576, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -0.040498532354831696, |
|
"logits/rejected": 0.038641445338726044, |
|
"logps/chosen": -530.233642578125, |
|
"logps/rejected": -566.8374633789062, |
|
"loss": 0.3008, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11718879640102386, |
|
"rewards/margins": 0.04862760379910469, |
|
"rewards/rejected": -0.16581639647483826, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": 0.03174019977450371, |
|
"logits/rejected": 0.028316298499703407, |
|
"logps/chosen": -544.1737060546875, |
|
"logps/rejected": -595.1419677734375, |
|
"loss": 0.2807, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1217833161354065, |
|
"rewards/margins": 0.05592575669288635, |
|
"rewards/rejected": -0.17770907282829285, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -0.03996685892343521, |
|
"logits/rejected": 0.0781441181898117, |
|
"logps/chosen": -561.5933227539062, |
|
"logps/rejected": -588.9613037109375, |
|
"loss": 0.289, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.12694257497787476, |
|
"rewards/margins": 0.05903642624616623, |
|
"rewards/rejected": -0.18597903847694397, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.07835476100444794, |
|
"logits/rejected": -0.022311905398964882, |
|
"logps/chosen": -542.6710815429688, |
|
"logps/rejected": -574.5667114257812, |
|
"loss": 0.2814, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.13178928196430206, |
|
"rewards/margins": 0.04299298673868179, |
|
"rewards/rejected": -0.17478224635124207, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -0.08058448135852814, |
|
"logits/rejected": 0.07340067625045776, |
|
"logps/chosen": -508.29901123046875, |
|
"logps/rejected": -560.2567138671875, |
|
"loss": 0.2709, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.11642781645059586, |
|
"rewards/margins": 0.05028604343533516, |
|
"rewards/rejected": -0.1667138636112213, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -0.060424257069826126, |
|
"logits/rejected": -0.05060155317187309, |
|
"logps/chosen": -522.2492065429688, |
|
"logps/rejected": -580.984375, |
|
"loss": 0.2702, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1186426654458046, |
|
"rewards/margins": 0.05852733179926872, |
|
"rewards/rejected": -0.17716999351978302, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.0831371396780014, |
|
"logits/rejected": 0.1073642149567604, |
|
"logps/chosen": -578.5635986328125, |
|
"logps/rejected": -601.4964599609375, |
|
"loss": 0.2702, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.14065107703208923, |
|
"rewards/margins": 0.04782631993293762, |
|
"rewards/rejected": -0.18847739696502686, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -0.034952230751514435, |
|
"logits/rejected": -0.033971935510635376, |
|
"logps/chosen": -521.9036865234375, |
|
"logps/rejected": -568.0286865234375, |
|
"loss": 0.281, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10558326542377472, |
|
"rewards/margins": 0.05633324384689331, |
|
"rewards/rejected": -0.16191650927066803, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.05012059211730957, |
|
"logits/rejected": -0.07782770693302155, |
|
"logps/chosen": -489.470458984375, |
|
"logps/rejected": -555.35302734375, |
|
"loss": 0.2723, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09179838001728058, |
|
"rewards/margins": 0.06358543783426285, |
|
"rewards/rejected": -0.15538384020328522, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.19141286611557007, |
|
"logits/rejected": -0.10618031024932861, |
|
"logps/chosen": -450.0037536621094, |
|
"logps/rejected": -540.8576049804688, |
|
"loss": 0.2691, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09631234407424927, |
|
"rewards/margins": 0.0666104406118393, |
|
"rewards/rejected": -0.16292276978492737, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.05697326734662056, |
|
"logits/rejected": -0.13873888552188873, |
|
"logps/chosen": -481.6494140625, |
|
"logps/rejected": -599.4074096679688, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12175127118825912, |
|
"rewards/margins": 0.0913892462849617, |
|
"rewards/rejected": -0.21314053237438202, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -0.08442874252796173, |
|
"logits/rejected": -0.0031359121203422546, |
|
"logps/chosen": -474.0492248535156, |
|
"logps/rejected": -555.919189453125, |
|
"loss": 0.2772, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.11865799129009247, |
|
"rewards/margins": 0.05636826157569885, |
|
"rewards/rejected": -0.17502623796463013, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -0.0918760672211647, |
|
"logits/rejected": 0.022966912016272545, |
|
"logps/chosen": -555.8822021484375, |
|
"logps/rejected": -670.1365356445312, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1380760669708252, |
|
"rewards/margins": 0.0869637131690979, |
|
"rewards/rejected": -0.2250397652387619, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.1127350777387619, |
|
"logits/rejected": -0.15412607789039612, |
|
"logps/chosen": -516.8878784179688, |
|
"logps/rejected": -592.467041015625, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12640734016895294, |
|
"rewards/margins": 0.06364301592111588, |
|
"rewards/rejected": -0.19005033373832703, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.1551699936389923, |
|
"logits/rejected": -0.055733174085617065, |
|
"logps/chosen": -500.0525817871094, |
|
"logps/rejected": -564.5525512695312, |
|
"loss": 0.271, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.11618797481060028, |
|
"rewards/margins": 0.05627555400133133, |
|
"rewards/rejected": -0.17246350646018982, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.15336255729198456, |
|
"logits/rejected": -0.09010852128267288, |
|
"logps/chosen": -532.9185180664062, |
|
"logps/rejected": -579.7677612304688, |
|
"loss": 0.2518, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09618903696537018, |
|
"rewards/margins": 0.059428442269563675, |
|
"rewards/rejected": -0.15561747550964355, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.14842228591442108, |
|
"logits/rejected": -0.18628902733325958, |
|
"logps/chosen": -499.26495361328125, |
|
"logps/rejected": -566.7688598632812, |
|
"loss": 0.2847, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10898256301879883, |
|
"rewards/margins": 0.05999414250254631, |
|
"rewards/rejected": -0.16897672414779663, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -0.16273057460784912, |
|
"logits/rejected": -0.10563422739505768, |
|
"logps/chosen": -554.4479370117188, |
|
"logps/rejected": -563.3797607421875, |
|
"loss": 0.2849, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10144559293985367, |
|
"rewards/margins": 0.04638112708926201, |
|
"rewards/rejected": -0.14782671630382538, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": -0.11332042515277863, |
|
"logits/rejected": -0.11062748730182648, |
|
"logps/chosen": -518.7150268554688, |
|
"logps/rejected": -573.0471801757812, |
|
"loss": 0.259, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10492125898599625, |
|
"rewards/margins": 0.046738140285015106, |
|
"rewards/rejected": -0.15165939927101135, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.158113032579422, |
|
"logits/rejected": -0.11588151752948761, |
|
"logps/chosen": -462.90423583984375, |
|
"logps/rejected": -516.649658203125, |
|
"loss": 0.2722, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10597708076238632, |
|
"rewards/margins": 0.0523579902946949, |
|
"rewards/rejected": -0.15833505988121033, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.21510057151317596, |
|
"logits/rejected": -0.08049353212118149, |
|
"logps/chosen": -583.40283203125, |
|
"logps/rejected": -586.7947998046875, |
|
"loss": 0.2929, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.11892227083444595, |
|
"rewards/margins": 0.05448797345161438, |
|
"rewards/rejected": -0.17341025173664093, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.12544101476669312, |
|
"logits/rejected": -0.11414500325918198, |
|
"logps/chosen": -459.9356384277344, |
|
"logps/rejected": -505.929931640625, |
|
"loss": 0.2729, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.11049865186214447, |
|
"rewards/margins": 0.047077327966690063, |
|
"rewards/rejected": -0.15757599472999573, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.10923869907855988, |
|
"logits/rejected": -0.1209484338760376, |
|
"logps/chosen": -453.79071044921875, |
|
"logps/rejected": -520.7780151367188, |
|
"loss": 0.2685, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09267580509185791, |
|
"rewards/margins": 0.06151905655860901, |
|
"rewards/rejected": -0.15419486165046692, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -0.20092570781707764, |
|
"logits/rejected": -0.04523925110697746, |
|
"logps/chosen": -501.44183349609375, |
|
"logps/rejected": -541.0614624023438, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1075231060385704, |
|
"rewards/margins": 0.054855745285749435, |
|
"rewards/rejected": -0.16237884759902954, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.18216080963611603, |
|
"logits/rejected": -0.09222938120365143, |
|
"logps/chosen": -543.4312744140625, |
|
"logps/rejected": -569.2079467773438, |
|
"loss": 0.2854, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11925343424081802, |
|
"rewards/margins": 0.03793327510356903, |
|
"rewards/rejected": -0.15718670189380646, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -0.12351751327514648, |
|
"logits/rejected": -0.06279022991657257, |
|
"logps/chosen": -496.760498046875, |
|
"logps/rejected": -529.7518310546875, |
|
"loss": 0.29, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0903807058930397, |
|
"rewards/margins": 0.060529064387083054, |
|
"rewards/rejected": -0.15090976655483246, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -0.19289804995059967, |
|
"logits/rejected": -0.1217590793967247, |
|
"logps/chosen": -558.1929321289062, |
|
"logps/rejected": -592.0021362304688, |
|
"loss": 0.2771, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11521999537944794, |
|
"rewards/margins": 0.05031859874725342, |
|
"rewards/rejected": -0.16553862392902374, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.033294953405857086, |
|
"logits/rejected": -0.07580285519361496, |
|
"logps/chosen": -491.3495178222656, |
|
"logps/rejected": -552.3511352539062, |
|
"loss": 0.2827, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09923096001148224, |
|
"rewards/margins": 0.07192887365818024, |
|
"rewards/rejected": -0.17115983366966248, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.2156916856765747, |
|
"logits/rejected": -0.06504158675670624, |
|
"logps/chosen": -521.3781127929688, |
|
"logps/rejected": -539.2896728515625, |
|
"loss": 0.2831, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10902263969182968, |
|
"rewards/margins": 0.04854800924658775, |
|
"rewards/rejected": -0.15757066011428833, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.12651291489601135, |
|
"logits/rejected": -0.11372752487659454, |
|
"logps/chosen": -529.92529296875, |
|
"logps/rejected": -584.7510986328125, |
|
"loss": 0.2821, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.11373905837535858, |
|
"rewards/margins": 0.055428702384233475, |
|
"rewards/rejected": -0.16916777193546295, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.17947567999362946, |
|
"logits/rejected": -0.15643848478794098, |
|
"logps/chosen": -505.3380432128906, |
|
"logps/rejected": -594.7916259765625, |
|
"loss": 0.2675, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10552126169204712, |
|
"rewards/margins": 0.07201484590768814, |
|
"rewards/rejected": -0.17753610014915466, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.1622428297996521, |
|
"logits/rejected": -0.08399353176355362, |
|
"logps/chosen": -583.8826904296875, |
|
"logps/rejected": -565.4668579101562, |
|
"loss": 0.2736, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11298346519470215, |
|
"rewards/margins": 0.0552997961640358, |
|
"rewards/rejected": -0.16828325390815735, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.15642212331295013, |
|
"logits/rejected": -0.11185960471630096, |
|
"logps/chosen": -478.8877868652344, |
|
"logps/rejected": -511.4723205566406, |
|
"loss": 0.2606, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09287136793136597, |
|
"rewards/margins": 0.05674201250076294, |
|
"rewards/rejected": -0.1496133804321289, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -0.19819292426109314, |
|
"logits/rejected": -0.08049633353948593, |
|
"logps/chosen": -502.8023986816406, |
|
"logps/rejected": -527.1845703125, |
|
"loss": 0.2718, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.11550305783748627, |
|
"rewards/margins": 0.045039866119623184, |
|
"rewards/rejected": -0.16054292023181915, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.19029836356639862, |
|
"logits/rejected": -0.12178380787372589, |
|
"logps/chosen": -487.24029541015625, |
|
"logps/rejected": -522.4542846679688, |
|
"loss": 0.2977, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08964806795120239, |
|
"rewards/margins": 0.05422767996788025, |
|
"rewards/rejected": -0.14387574791908264, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.11798320710659027, |
|
"logits/rejected": -0.06398233026266098, |
|
"logps/chosen": -555.4868774414062, |
|
"logps/rejected": -606.1224365234375, |
|
"loss": 0.2674, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10348886251449585, |
|
"rewards/margins": 0.07239896804094315, |
|
"rewards/rejected": -0.1758878380060196, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -0.1332792490720749, |
|
"logits/rejected": -0.1340535581111908, |
|
"logps/chosen": -518.5892944335938, |
|
"logps/rejected": -602.2269287109375, |
|
"loss": 0.253, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09993033111095428, |
|
"rewards/margins": 0.06545485556125641, |
|
"rewards/rejected": -0.1653851717710495, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.24548590183258057, |
|
"logits/rejected": -0.07364498823881149, |
|
"logps/chosen": -528.7620849609375, |
|
"logps/rejected": -581.3148803710938, |
|
"loss": 0.2575, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.10455663502216339, |
|
"rewards/margins": 0.05815718695521355, |
|
"rewards/rejected": -0.16271382570266724, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.17857643961906433, |
|
"logits/rejected": -0.05776941031217575, |
|
"logps/chosen": -495.97613525390625, |
|
"logps/rejected": -546.7186279296875, |
|
"loss": 0.2811, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09525806456804276, |
|
"rewards/margins": 0.07870879769325256, |
|
"rewards/rejected": -0.17396686971187592, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.1720505952835083, |
|
"logits/rejected": -0.25772562623023987, |
|
"logps/chosen": -486.467529296875, |
|
"logps/rejected": -584.5782470703125, |
|
"loss": 0.2582, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09170956909656525, |
|
"rewards/margins": 0.09501364082098007, |
|
"rewards/rejected": -0.18672320246696472, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.21994712948799133, |
|
"logits/rejected": -0.1386212706565857, |
|
"logps/chosen": -567.0380859375, |
|
"logps/rejected": -595.9439697265625, |
|
"loss": 0.2555, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.11543399095535278, |
|
"rewards/margins": 0.06394585222005844, |
|
"rewards/rejected": -0.17937985062599182, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.17482668161392212, |
|
"logits/rejected": -0.11466997861862183, |
|
"logps/chosen": -497.3523864746094, |
|
"logps/rejected": -575.636962890625, |
|
"loss": 0.2737, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10399528592824936, |
|
"rewards/margins": 0.07409018278121948, |
|
"rewards/rejected": -0.17808546125888824, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -0.1267072856426239, |
|
"logits/rejected": -0.22677993774414062, |
|
"logps/chosen": -562.6024169921875, |
|
"logps/rejected": -630.305419921875, |
|
"loss": 0.2679, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11934264004230499, |
|
"rewards/margins": 0.08058986812829971, |
|
"rewards/rejected": -0.1999325305223465, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.2002047747373581, |
|
"logits/rejected": -0.1947084367275238, |
|
"logps/chosen": -541.3032836914062, |
|
"logps/rejected": -579.1988525390625, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.10852652788162231, |
|
"rewards/margins": 0.0627359002828598, |
|
"rewards/rejected": -0.17126242816448212, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -0.1807963103055954, |
|
"logits/rejected": -0.1801833063364029, |
|
"logps/chosen": -545.267333984375, |
|
"logps/rejected": -617.2647705078125, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.10741935670375824, |
|
"rewards/margins": 0.07981701195240021, |
|
"rewards/rejected": -0.18723638355731964, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -0.16296163201332092, |
|
"logits/rejected": -0.16335263848304749, |
|
"logps/chosen": -568.46923828125, |
|
"logps/rejected": -639.3665771484375, |
|
"loss": 0.2827, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1257370114326477, |
|
"rewards/margins": 0.07859646528959274, |
|
"rewards/rejected": -0.20433346927165985, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -0.15190322697162628, |
|
"logits/rejected": -0.023175863549113274, |
|
"logps/chosen": -568.4057006835938, |
|
"logps/rejected": -605.7794189453125, |
|
"loss": 0.2618, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1031126156449318, |
|
"rewards/margins": 0.07158243656158447, |
|
"rewards/rejected": -0.17469504475593567, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.2676157057285309, |
|
"logits/rejected": -0.15279750525951385, |
|
"logps/chosen": -566.8143310546875, |
|
"logps/rejected": -579.0692138671875, |
|
"loss": 0.284, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11574618518352509, |
|
"rewards/margins": 0.03857272118330002, |
|
"rewards/rejected": -0.1543188989162445, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.13174203038215637, |
|
"logits/rejected": -0.1824689358472824, |
|
"logps/chosen": -478.6060485839844, |
|
"logps/rejected": -531.3140869140625, |
|
"loss": 0.2583, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08994091302156448, |
|
"rewards/margins": 0.053567446768283844, |
|
"rewards/rejected": -0.14350834488868713, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.08020667731761932, |
|
"logits/rejected": -0.13351663947105408, |
|
"logps/chosen": -520.2698974609375, |
|
"logps/rejected": -585.7660522460938, |
|
"loss": 0.2855, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.10003659874200821, |
|
"rewards/margins": 0.06318075209856033, |
|
"rewards/rejected": -0.16321733593940735, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.18519246578216553, |
|
"logits/rejected": -0.13630035519599915, |
|
"logps/chosen": -504.2767639160156, |
|
"logps/rejected": -569.7481689453125, |
|
"loss": 0.2545, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09188510477542877, |
|
"rewards/margins": 0.06354068219661713, |
|
"rewards/rejected": -0.1554257869720459, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.19004374742507935, |
|
"logits/rejected": -0.1609114110469818, |
|
"logps/chosen": -557.939208984375, |
|
"logps/rejected": -546.6970825195312, |
|
"loss": 0.2965, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10281065851449966, |
|
"rewards/margins": 0.04500015825033188, |
|
"rewards/rejected": -0.14781081676483154, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.17647784948349, |
|
"logits/rejected": -0.13355641067028046, |
|
"logps/chosen": -567.4881591796875, |
|
"logps/rejected": -612.3013305664062, |
|
"loss": 0.2652, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11634314060211182, |
|
"rewards/margins": 0.061061274260282516, |
|
"rewards/rejected": -0.17740443348884583, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.2159070074558258, |
|
"logits/rejected": -0.10073345899581909, |
|
"logps/chosen": -459.47149658203125, |
|
"logps/rejected": -529.6015014648438, |
|
"loss": 0.2666, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08737196773290634, |
|
"rewards/margins": 0.06437215954065323, |
|
"rewards/rejected": -0.15174412727355957, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.1441185027360916, |
|
"logits/rejected": -0.027431348338723183, |
|
"logps/chosen": -476.61798095703125, |
|
"logps/rejected": -507.6756286621094, |
|
"loss": 0.2678, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09565510600805283, |
|
"rewards/margins": 0.05930342152714729, |
|
"rewards/rejected": -0.15495853126049042, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -0.14351201057434082, |
|
"logits/rejected": -0.13506287336349487, |
|
"logps/chosen": -515.0467529296875, |
|
"logps/rejected": -579.8074951171875, |
|
"loss": 0.2695, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.10201451927423477, |
|
"rewards/margins": 0.05492178350687027, |
|
"rewards/rejected": -0.15693630278110504, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.12005716562271118, |
|
"logits/rejected": -0.13143005967140198, |
|
"logps/chosen": -480.64166259765625, |
|
"logps/rejected": -542.0699462890625, |
|
"loss": 0.2681, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10447077453136444, |
|
"rewards/margins": 0.0592375211417675, |
|
"rewards/rejected": -0.16370829939842224, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.12967665493488312, |
|
"logits/rejected": -0.09862512350082397, |
|
"logps/chosen": -504.17559814453125, |
|
"logps/rejected": -561.1007690429688, |
|
"loss": 0.2608, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10162999480962753, |
|
"rewards/margins": 0.052912771701812744, |
|
"rewards/rejected": -0.15454277396202087, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.12466283142566681, |
|
"logits/rejected": -0.18476256728172302, |
|
"logps/chosen": -521.7217407226562, |
|
"logps/rejected": -606.356689453125, |
|
"loss": 0.2598, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11335861682891846, |
|
"rewards/margins": 0.0591861791908741, |
|
"rewards/rejected": -0.17254477739334106, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.17083628475666046, |
|
"logits/rejected": -0.12109515815973282, |
|
"logps/chosen": -456.31787109375, |
|
"logps/rejected": -514.2985229492188, |
|
"loss": 0.2716, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.09381003677845001, |
|
"rewards/margins": 0.06866031140089035, |
|
"rewards/rejected": -0.16247034072875977, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.16086441278457642, |
|
"logits/rejected": -0.13963501155376434, |
|
"logps/chosen": -485.7345275878906, |
|
"logps/rejected": -566.4594116210938, |
|
"loss": 0.2668, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08713211864233017, |
|
"rewards/margins": 0.0719505250453949, |
|
"rewards/rejected": -0.15908263623714447, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.08465079963207245, |
|
"logits/rejected": -0.21077242493629456, |
|
"logps/chosen": -507.953369140625, |
|
"logps/rejected": -587.9386596679688, |
|
"loss": 0.2522, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09845003485679626, |
|
"rewards/margins": 0.0746922716498375, |
|
"rewards/rejected": -0.17314231395721436, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.12216424942016602, |
|
"logits/rejected": -0.10754810273647308, |
|
"logps/chosen": -534.802734375, |
|
"logps/rejected": -569.3134155273438, |
|
"loss": 0.2789, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1041802167892456, |
|
"rewards/margins": 0.06386038661003113, |
|
"rewards/rejected": -0.16804060339927673, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.2511650621891022, |
|
"logits/rejected": -0.1288149058818817, |
|
"logps/chosen": -511.1424255371094, |
|
"logps/rejected": -537.6439208984375, |
|
"loss": 0.2629, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.08994197845458984, |
|
"rewards/margins": 0.051044024527072906, |
|
"rewards/rejected": -0.14098599553108215, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.2117297351360321, |
|
"logits/rejected": -0.17164471745491028, |
|
"logps/chosen": -505.76715087890625, |
|
"logps/rejected": -567.2267456054688, |
|
"loss": 0.2535, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0884295180439949, |
|
"rewards/margins": 0.07234706729650497, |
|
"rewards/rejected": -0.16077657043933868, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -0.13743457198143005, |
|
"logits/rejected": -0.13362528383731842, |
|
"logps/chosen": -507.50946044921875, |
|
"logps/rejected": -544.0374755859375, |
|
"loss": 0.2857, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.10853898525238037, |
|
"rewards/margins": 0.044584743678569794, |
|
"rewards/rejected": -0.15312373638153076, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -0.16523101925849915, |
|
"logits/rejected": -0.1837397962808609, |
|
"logps/chosen": -475.5501403808594, |
|
"logps/rejected": -527.5878295898438, |
|
"loss": 0.2617, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.08936251699924469, |
|
"rewards/margins": 0.06296424567699432, |
|
"rewards/rejected": -0.15232674777507782, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.23205402493476868, |
|
"logits/rejected": -0.2261538952589035, |
|
"logps/chosen": -500.764892578125, |
|
"logps/rejected": -598.1412963867188, |
|
"loss": 0.2395, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09027452021837234, |
|
"rewards/margins": 0.07599581778049469, |
|
"rewards/rejected": -0.16627033054828644, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.18639487028121948, |
|
"logits/rejected": -0.2467382252216339, |
|
"logps/chosen": -519.2239379882812, |
|
"logps/rejected": -602.8407592773438, |
|
"loss": 0.2546, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09007105976343155, |
|
"rewards/margins": 0.07813303172588348, |
|
"rewards/rejected": -0.16820409893989563, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -0.09497157484292984, |
|
"logits/rejected": -0.20631170272827148, |
|
"logps/chosen": -507.2242126464844, |
|
"logps/rejected": -574.6315307617188, |
|
"loss": 0.2819, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.10080975294113159, |
|
"rewards/margins": 0.06279795616865158, |
|
"rewards/rejected": -0.16360768675804138, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -0.13831308484077454, |
|
"logits/rejected": -0.15870514512062073, |
|
"logps/chosen": -443.75421142578125, |
|
"logps/rejected": -540.8892822265625, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09177975356578827, |
|
"rewards/margins": 0.07154536247253418, |
|
"rewards/rejected": -0.16332513093948364, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2777036651094288, |
|
"train_runtime": 6958.035, |
|
"train_samples_per_second": 4.312, |
|
"train_steps_per_second": 0.135 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|