|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9940933254577673, |
|
"eval_steps": 43, |
|
"global_step": 422, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004725339633786178, |
|
"grad_norm": 1.1176782705315427, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -1.5937305688858032, |
|
"logits/rejected": -1.7021960020065308, |
|
"logps/chosen": -247.54559326171875, |
|
"logps/rejected": -179.0218048095703, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009450679267572357, |
|
"grad_norm": 1.2535773078918948, |
|
"learning_rate": 7.692307692307692e-08, |
|
"logits/chosen": -1.7181015014648438, |
|
"logits/rejected": -1.644026756286621, |
|
"logps/chosen": -259.1505432128906, |
|
"logps/rejected": -241.68020629882812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.014176018901358535, |
|
"grad_norm": 0.995484839907434, |
|
"learning_rate": 1.1538461538461539e-07, |
|
"logits/chosen": -0.8613071441650391, |
|
"logits/rejected": -0.8891040682792664, |
|
"logps/chosen": -230.91070556640625, |
|
"logps/rejected": -219.62979125976562, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": 6.782727723475546e-05, |
|
"rewards/margins": -0.0004302160523366183, |
|
"rewards/rejected": 0.0004980433732271194, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.018901358535144713, |
|
"grad_norm": 1.1568688062326662, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -1.1649622917175293, |
|
"logits/rejected": -1.131172776222229, |
|
"logps/chosen": -184.93499755859375, |
|
"logps/rejected": -184.127197265625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -0.0002185619086958468, |
|
"rewards/margins": -0.0006310059688985348, |
|
"rewards/rejected": 0.0004124442348256707, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02362669816893089, |
|
"grad_norm": 1.0155652752384032, |
|
"learning_rate": 1.9230769230769231e-07, |
|
"logits/chosen": -1.8650751113891602, |
|
"logits/rejected": -1.9386688470840454, |
|
"logps/chosen": -193.21636962890625, |
|
"logps/rejected": -175.6696014404297, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00019388733198866248, |
|
"rewards/margins": -0.00023763455101288855, |
|
"rewards/rejected": 0.0004315219703130424, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02835203780271707, |
|
"grad_norm": 1.2022303509332404, |
|
"learning_rate": 2.3076923076923078e-07, |
|
"logits/chosen": -1.8849067687988281, |
|
"logits/rejected": -1.8837637901306152, |
|
"logps/chosen": -234.3896484375, |
|
"logps/rejected": -218.09625244140625, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0004923291853629053, |
|
"rewards/margins": 0.0006284262635745108, |
|
"rewards/rejected": -0.00013609707821160555, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03307737743650325, |
|
"grad_norm": 1.136952564893261, |
|
"learning_rate": 2.692307692307692e-07, |
|
"logits/chosen": -1.816144585609436, |
|
"logits/rejected": -1.934072494506836, |
|
"logps/chosen": -246.14027404785156, |
|
"logps/rejected": -177.02993774414062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0834413589909673e-05, |
|
"rewards/margins": 0.001102915033698082, |
|
"rewards/rejected": -0.0011237493017688394, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03780271707028943, |
|
"grad_norm": 1.1596582232433024, |
|
"learning_rate": 3.076923076923077e-07, |
|
"logits/chosen": -1.8363107442855835, |
|
"logits/rejected": -1.8167006969451904, |
|
"logps/chosen": -239.65370178222656, |
|
"logps/rejected": -221.20333862304688, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00017343120998702943, |
|
"rewards/margins": -0.0007941695512272418, |
|
"rewards/rejected": 0.0006207384867593646, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.042528056704075605, |
|
"grad_norm": 1.1810520949859216, |
|
"learning_rate": 3.461538461538461e-07, |
|
"logits/chosen": -1.6903538703918457, |
|
"logits/rejected": -1.7734307050704956, |
|
"logps/chosen": -239.26535034179688, |
|
"logps/rejected": -193.875244140625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0003690614248625934, |
|
"rewards/margins": 0.00022412401449400932, |
|
"rewards/rejected": 0.00014493743947241455, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04725339633786178, |
|
"grad_norm": 1.253373850013781, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.5372889041900635, |
|
"logits/rejected": -1.5536653995513916, |
|
"logps/chosen": -222.93399047851562, |
|
"logps/rejected": -223.1899871826172, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0005048069870099425, |
|
"rewards/margins": 0.000533790560439229, |
|
"rewards/rejected": -0.0010385976638644934, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05197873597164796, |
|
"grad_norm": 1.1609876885112298, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logits/chosen": -1.20901620388031, |
|
"logits/rejected": -1.2452011108398438, |
|
"logps/chosen": -274.1497497558594, |
|
"logps/rejected": -227.36790466308594, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0004496507463045418, |
|
"rewards/margins": -0.0006296815699897707, |
|
"rewards/rejected": 0.0001800309109967202, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05670407560543414, |
|
"grad_norm": 1.1166995258772006, |
|
"learning_rate": 4.6153846153846156e-07, |
|
"logits/chosen": -1.8701603412628174, |
|
"logits/rejected": -1.8107975721359253, |
|
"logps/chosen": -219.3397674560547, |
|
"logps/rejected": -232.28269958496094, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0005328843253664672, |
|
"rewards/margins": 0.000161867166752927, |
|
"rewards/rejected": -0.0006947515066713095, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06142941523922032, |
|
"grad_norm": 1.1010862122779934, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.3035281896591187, |
|
"logits/rejected": -1.3319075107574463, |
|
"logps/chosen": -200.9400634765625, |
|
"logps/rejected": -177.5894317626953, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00024598470190539956, |
|
"rewards/margins": 0.001570576336234808, |
|
"rewards/rejected": -0.0013245916925370693, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0661547548730065, |
|
"grad_norm": 1.109450926366569, |
|
"learning_rate": 4.999926250172797e-07, |
|
"logits/chosen": -1.2467422485351562, |
|
"logits/rejected": -1.2619496583938599, |
|
"logps/chosen": -247.1371612548828, |
|
"logps/rejected": -232.03895568847656, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.00045037176460027695, |
|
"rewards/margins": 0.0029086670838296413, |
|
"rewards/rejected": -0.0024582953192293644, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07088009450679268, |
|
"grad_norm": 1.0686336079566285, |
|
"learning_rate": 4.999705005042417e-07, |
|
"logits/chosen": -0.9053488969802856, |
|
"logits/rejected": -0.9105295538902283, |
|
"logps/chosen": -200.528076171875, |
|
"logps/rejected": -191.2373046875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0003694885817822069, |
|
"rewards/margins": 0.001977597363293171, |
|
"rewards/rejected": -0.00234708609059453, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07560543414057885, |
|
"grad_norm": 1.1198403704053106, |
|
"learning_rate": 4.999336277662292e-07, |
|
"logits/chosen": -1.382132887840271, |
|
"logits/rejected": -1.3727940320968628, |
|
"logps/chosen": -237.94508361816406, |
|
"logps/rejected": -251.71519470214844, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -3.6539247957989573e-05, |
|
"rewards/margins": 0.002266494557261467, |
|
"rewards/rejected": -0.002303033834323287, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08033077377436504, |
|
"grad_norm": 1.022581880493408, |
|
"learning_rate": 4.998820089787287e-07, |
|
"logits/chosen": -1.0172172784805298, |
|
"logits/rejected": -1.0724008083343506, |
|
"logps/chosen": -232.03070068359375, |
|
"logps/rejected": -214.65155029296875, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0009355681831948459, |
|
"rewards/margins": 0.002296539256349206, |
|
"rewards/rejected": -0.003232107497751713, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08505611340815121, |
|
"grad_norm": 1.127368600628108, |
|
"learning_rate": 4.998156471872415e-07, |
|
"logits/chosen": -1.6294444799423218, |
|
"logits/rejected": -1.6482771635055542, |
|
"logps/chosen": -226.63442993164062, |
|
"logps/rejected": -209.50799560546875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.0007066840189509094, |
|
"rewards/margins": 0.005803423933684826, |
|
"rewards/rejected": -0.005096739623695612, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0897814530419374, |
|
"grad_norm": 1.1009230447057081, |
|
"learning_rate": 4.997345463071041e-07, |
|
"logits/chosen": -1.9955631494522095, |
|
"logits/rejected": -1.9177535772323608, |
|
"logps/chosen": -219.0171661376953, |
|
"logps/rejected": -204.72091674804688, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.001279333489947021, |
|
"rewards/margins": 0.003984270617365837, |
|
"rewards/rejected": -0.005263603758066893, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09450679267572357, |
|
"grad_norm": 1.1219432468145276, |
|
"learning_rate": 4.996387111232572e-07, |
|
"logits/chosen": -0.9923038482666016, |
|
"logits/rejected": -1.0317736864089966, |
|
"logps/chosen": -207.70364379882812, |
|
"logps/rejected": -210.37612915039062, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0004186414007563144, |
|
"rewards/margins": 0.007054868154227734, |
|
"rewards/rejected": -0.007473509293049574, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09923213230950975, |
|
"grad_norm": 1.1456255677406275, |
|
"learning_rate": 4.995281472899636e-07, |
|
"logits/chosen": -1.2455811500549316, |
|
"logits/rejected": -1.2986594438552856, |
|
"logps/chosen": -260.4434814453125, |
|
"logps/rejected": -239.2939453125, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.001413986086845398, |
|
"rewards/margins": 0.006085187196731567, |
|
"rewards/rejected": -0.007499172817915678, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.10395747194329592, |
|
"grad_norm": 1.0796373972108642, |
|
"learning_rate": 4.99402861330474e-07, |
|
"logits/chosen": -1.6776717901229858, |
|
"logits/rejected": -1.6839845180511475, |
|
"logps/chosen": -243.64987182617188, |
|
"logps/rejected": -224.65371704101562, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.0021368232555687428, |
|
"rewards/margins": 0.007371032610535622, |
|
"rewards/rejected": -0.009507855400443077, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10868281157708211, |
|
"grad_norm": 1.067721897185461, |
|
"learning_rate": 4.992628606366425e-07, |
|
"logits/chosen": -1.6994775533676147, |
|
"logits/rejected": -1.6823248863220215, |
|
"logps/chosen": -184.07522583007812, |
|
"logps/rejected": -201.24017333984375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0037174485623836517, |
|
"rewards/margins": 0.005765101406723261, |
|
"rewards/rejected": -0.009482549503445625, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11340815121086828, |
|
"grad_norm": 1.1222480052149575, |
|
"learning_rate": 4.991081534684911e-07, |
|
"logits/chosen": -1.3170721530914307, |
|
"logits/rejected": -1.3209936618804932, |
|
"logps/chosen": -173.28440856933594, |
|
"logps/rejected": -180.39111328125, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.0013278307160362601, |
|
"rewards/margins": 0.01227161381393671, |
|
"rewards/rejected": -0.013599444180727005, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11813349084465447, |
|
"grad_norm": 1.1336846530783633, |
|
"learning_rate": 4.98938748953721e-07, |
|
"logits/chosen": -1.2200762033462524, |
|
"logits/rejected": -1.2801724672317505, |
|
"logps/chosen": -228.53701782226562, |
|
"logps/rejected": -209.99066162109375, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.0031590620055794716, |
|
"rewards/margins": 0.014617552980780602, |
|
"rewards/rejected": -0.017776615917682648, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12285883047844064, |
|
"grad_norm": 1.1070134845717687, |
|
"learning_rate": 4.987546570871754e-07, |
|
"logits/chosen": -1.7048778533935547, |
|
"logits/rejected": -1.6716248989105225, |
|
"logps/chosen": -237.16851806640625, |
|
"logps/rejected": -234.78970336914062, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.006663296837359667, |
|
"rewards/margins": 0.013624398037791252, |
|
"rewards/rejected": -0.02028769627213478, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1275841701122268, |
|
"grad_norm": 1.1180888707264458, |
|
"learning_rate": 4.985558887302488e-07, |
|
"logits/chosen": -1.692581057548523, |
|
"logits/rejected": -1.7662020921707153, |
|
"logps/chosen": -197.77049255371094, |
|
"logps/rejected": -182.52011108398438, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.003295771311968565, |
|
"rewards/margins": 0.009790323674678802, |
|
"rewards/rejected": -0.01308609452098608, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.132309509746013, |
|
"grad_norm": 1.1912060262420856, |
|
"learning_rate": 4.983424556102468e-07, |
|
"logits/chosen": -1.8331196308135986, |
|
"logits/rejected": -1.8659313917160034, |
|
"logps/chosen": -200.51959228515625, |
|
"logps/rejected": -181.59939575195312, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.004111767280846834, |
|
"rewards/margins": 0.017347747460007668, |
|
"rewards/rejected": -0.021459516137838364, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.13703484937979918, |
|
"grad_norm": 1.1795634739349266, |
|
"learning_rate": 4.981143703196941e-07, |
|
"logits/chosen": -2.1984832286834717, |
|
"logits/rejected": -2.172135591506958, |
|
"logps/chosen": -193.10231018066406, |
|
"logps/rejected": -180.98953247070312, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.010889173485338688, |
|
"rewards/margins": 0.014179128222167492, |
|
"rewards/rejected": -0.02506830170750618, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14176018901358536, |
|
"grad_norm": 1.066586203576245, |
|
"learning_rate": 4.978716463155912e-07, |
|
"logits/chosen": -2.06459379196167, |
|
"logits/rejected": -2.0417637825012207, |
|
"logps/chosen": -159.03701782226562, |
|
"logps/rejected": -195.22523498535156, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.0062042661011219025, |
|
"rewards/margins": 0.016392884775996208, |
|
"rewards/rejected": -0.02259715273976326, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14648552864737152, |
|
"grad_norm": 1.2110413924457768, |
|
"learning_rate": 4.976142979186209e-07, |
|
"logits/chosen": -1.9228863716125488, |
|
"logits/rejected": -1.9126472473144531, |
|
"logps/chosen": -202.9162139892578, |
|
"logps/rejected": -175.83433532714844, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.007678491994738579, |
|
"rewards/margins": 0.02073112316429615, |
|
"rewards/rejected": -0.02840961515903473, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1512108682811577, |
|
"grad_norm": 1.2866090020461936, |
|
"learning_rate": 4.973423403123028e-07, |
|
"logits/chosen": -1.701865792274475, |
|
"logits/rejected": -1.7730942964553833, |
|
"logps/chosen": -235.43563842773438, |
|
"logps/rejected": -229.68411254882812, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.0038016163744032383, |
|
"rewards/margins": 0.025650067254900932, |
|
"rewards/rejected": -0.029451683163642883, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1559362079149439, |
|
"grad_norm": 1.1184970866462265, |
|
"learning_rate": 4.970557895420983e-07, |
|
"logits/chosen": -1.7996641397476196, |
|
"logits/rejected": -1.7775640487670898, |
|
"logps/chosen": -172.83163452148438, |
|
"logps/rejected": -201.3394317626953, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.014111585915088654, |
|
"rewards/margins": 0.02376263216137886, |
|
"rewards/rejected": -0.03787422180175781, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.16066154754873008, |
|
"grad_norm": 1.1028286808678158, |
|
"learning_rate": 4.967546625144633e-07, |
|
"logits/chosen": -1.1831226348876953, |
|
"logits/rejected": -1.1660494804382324, |
|
"logps/chosen": -177.04531860351562, |
|
"logps/rejected": -188.2403564453125, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.011519413441419601, |
|
"rewards/margins": 0.02097604051232338, |
|
"rewards/rejected": -0.03249545022845268, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16538688718251623, |
|
"grad_norm": 1.1486945876483394, |
|
"learning_rate": 4.964389769958506e-07, |
|
"logits/chosen": -1.382279634475708, |
|
"logits/rejected": -1.419837474822998, |
|
"logps/chosen": -156.91551208496094, |
|
"logps/rejected": -157.46437072753906, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.012027422897517681, |
|
"rewards/margins": 0.030697565525770187, |
|
"rewards/rejected": -0.04272499307990074, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17011222681630242, |
|
"grad_norm": 1.1027256033620638, |
|
"learning_rate": 4.961087516116621e-07, |
|
"logits/chosen": -1.1302804946899414, |
|
"logits/rejected": -1.1647142171859741, |
|
"logps/chosen": -263.8468322753906, |
|
"logps/rejected": -243.76544189453125, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.020269813016057014, |
|
"rewards/margins": 0.02116047963500023, |
|
"rewards/rejected": -0.041430290788412094, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1748375664500886, |
|
"grad_norm": 1.0823022111801957, |
|
"learning_rate": 4.957640058451501e-07, |
|
"logits/chosen": -1.5351812839508057, |
|
"logits/rejected": -1.5872442722320557, |
|
"logps/chosen": -204.82644653320312, |
|
"logps/rejected": -176.9189453125, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02254084125161171, |
|
"rewards/margins": 0.017112018540501595, |
|
"rewards/rejected": -0.039652857929468155, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1795629060838748, |
|
"grad_norm": 1.0721194904364362, |
|
"learning_rate": 4.954047600362669e-07, |
|
"logits/chosen": -1.8736214637756348, |
|
"logits/rejected": -1.819676160812378, |
|
"logps/chosen": -178.29270935058594, |
|
"logps/rejected": -191.61605834960938, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.02358250319957733, |
|
"rewards/margins": 0.031186437234282494, |
|
"rewards/rejected": -0.054768942296504974, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.18428824571766095, |
|
"grad_norm": 1.0629161817956538, |
|
"learning_rate": 4.950310353804659e-07, |
|
"logits/chosen": -1.7329224348068237, |
|
"logits/rejected": -1.780181646347046, |
|
"logps/chosen": -188.88072204589844, |
|
"logps/rejected": -177.74427795410156, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.024656984955072403, |
|
"rewards/margins": 0.03351202234625816, |
|
"rewards/rejected": -0.058169007301330566, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.18901358535144713, |
|
"grad_norm": 1.0963449505388767, |
|
"learning_rate": 4.946428539274497e-07, |
|
"logits/chosen": -1.920142650604248, |
|
"logits/rejected": -1.9512125253677368, |
|
"logps/chosen": -237.4847412109375, |
|
"logps/rejected": -210.24838256835938, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.030252018943428993, |
|
"rewards/margins": 0.040137603878974915, |
|
"rewards/rejected": -0.07038962841033936, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19373892498523332, |
|
"grad_norm": 1.039447855381113, |
|
"learning_rate": 4.942402385798706e-07, |
|
"logits/chosen": -1.246740698814392, |
|
"logits/rejected": -1.281036138534546, |
|
"logps/chosen": -240.5216064453125, |
|
"logps/rejected": -188.30795288085938, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.032288651913404465, |
|
"rewards/margins": 0.03159747272729874, |
|
"rewards/rejected": -0.0638861209154129, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1984642646190195, |
|
"grad_norm": 1.0133081629791365, |
|
"learning_rate": 4.938232130919785e-07, |
|
"logits/chosen": -2.049900531768799, |
|
"logits/rejected": -2.0438833236694336, |
|
"logps/chosen": -241.96177673339844, |
|
"logps/rejected": -223.10511779785156, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.05470336228609085, |
|
"rewards/margins": 0.040740497410297394, |
|
"rewards/rejected": -0.09544385224580765, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.20318960425280566, |
|
"grad_norm": 1.046962172171356, |
|
"learning_rate": 4.933918020682195e-07, |
|
"logits/chosen": -2.0764129161834717, |
|
"logits/rejected": -1.9940263032913208, |
|
"logps/chosen": -204.7505340576172, |
|
"logps/rejected": -206.99880981445312, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.03313834220170975, |
|
"rewards/margins": 0.06050185114145279, |
|
"rewards/rejected": -0.09364018589258194, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.20318960425280566, |
|
"eval_logits/chosen": -2.2005555629730225, |
|
"eval_logits/rejected": -2.2166874408721924, |
|
"eval_logps/chosen": -216.52699279785156, |
|
"eval_logps/rejected": -209.94314575195312, |
|
"eval_loss": 0.6714360117912292, |
|
"eval_rewards/accuracies": 0.5871211886405945, |
|
"eval_rewards/chosen": -0.05296258255839348, |
|
"eval_rewards/margins": 0.046969976276159286, |
|
"eval_rewards/rejected": -0.09993256628513336, |
|
"eval_runtime": 225.63, |
|
"eval_samples_per_second": 16.204, |
|
"eval_steps_per_second": 0.293, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.20791494388659185, |
|
"grad_norm": 1.1285157956991194, |
|
"learning_rate": 4.929460309617843e-07, |
|
"logits/chosen": -2.0923304557800293, |
|
"logits/rejected": -2.151911973953247, |
|
"logps/chosen": -252.43092346191406, |
|
"logps/rejected": -221.0844268798828, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.04450148344039917, |
|
"rewards/margins": 0.058394819498062134, |
|
"rewards/rejected": -0.10289628803730011, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.21264028352037803, |
|
"grad_norm": 1.0651629243936434, |
|
"learning_rate": 4.924859260731066e-07, |
|
"logits/chosen": -2.0476608276367188, |
|
"logits/rejected": -2.174062490463257, |
|
"logps/chosen": -219.19517517089844, |
|
"logps/rejected": -189.25193786621094, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.06256880611181259, |
|
"rewards/margins": 0.04902214929461479, |
|
"rewards/rejected": -0.11159095913171768, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.21736562315416422, |
|
"grad_norm": 1.0516408835233841, |
|
"learning_rate": 4.920115145483112e-07, |
|
"logits/chosen": -1.602857232093811, |
|
"logits/rejected": -1.6103214025497437, |
|
"logps/chosen": -252.79664611816406, |
|
"logps/rejected": -229.30770874023438, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.08149686455726624, |
|
"rewards/margins": 0.037921737879514694, |
|
"rewards/rejected": -0.11941860616207123, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.22209096278795037, |
|
"grad_norm": 1.1789732456475222, |
|
"learning_rate": 4.915228243776124e-07, |
|
"logits/chosen": -1.1500588655471802, |
|
"logits/rejected": -1.1534898281097412, |
|
"logps/chosen": -253.38931274414062, |
|
"logps/rejected": -220.110595703125, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.06619597226381302, |
|
"rewards/margins": 0.07172124832868576, |
|
"rewards/rejected": -0.13791722059249878, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.22681630242173656, |
|
"grad_norm": 1.056306792669834, |
|
"learning_rate": 4.91019884393663e-07, |
|
"logits/chosen": -0.8076485991477966, |
|
"logits/rejected": -0.8253241181373596, |
|
"logps/chosen": -211.2886962890625, |
|
"logps/rejected": -174.4979705810547, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.05709861218929291, |
|
"rewards/margins": 0.05306413769721985, |
|
"rewards/rejected": -0.11016274988651276, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23154164205552275, |
|
"grad_norm": 1.1483517794441522, |
|
"learning_rate": 4.905027242698521e-07, |
|
"logits/chosen": -1.5992224216461182, |
|
"logits/rejected": -1.7107007503509521, |
|
"logps/chosen": -258.3623962402344, |
|
"logps/rejected": -208.75918579101562, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.07419726252555847, |
|
"rewards/margins": 0.07502313703298569, |
|
"rewards/rejected": -0.14922040700912476, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.23626698168930893, |
|
"grad_norm": 1.0512679200538415, |
|
"learning_rate": 4.89971374518556e-07, |
|
"logits/chosen": -2.0465784072875977, |
|
"logits/rejected": -2.033297300338745, |
|
"logps/chosen": -185.704345703125, |
|
"logps/rejected": -203.28013610839844, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.08691324293613434, |
|
"rewards/margins": 0.06635289639234543, |
|
"rewards/rejected": -0.15326614677906036, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2409923213230951, |
|
"grad_norm": 1.112120640811307, |
|
"learning_rate": 4.894258664893363e-07, |
|
"logits/chosen": -1.7012823820114136, |
|
"logits/rejected": -1.7330955266952515, |
|
"logps/chosen": -208.68597412109375, |
|
"logps/rejected": -218.7354278564453, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.07796823233366013, |
|
"rewards/margins": 0.038594380021095276, |
|
"rewards/rejected": -0.1165626123547554, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.24571766095688127, |
|
"grad_norm": 1.1579361743422294, |
|
"learning_rate": 4.888662323670913e-07, |
|
"logits/chosen": -1.6541762351989746, |
|
"logits/rejected": -1.7235084772109985, |
|
"logps/chosen": -269.59210205078125, |
|
"logps/rejected": -243.711669921875, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.06561748683452606, |
|
"rewards/margins": 0.06448770314455032, |
|
"rewards/rejected": -0.1301051825284958, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.25044300059066743, |
|
"grad_norm": 1.1521413307343888, |
|
"learning_rate": 4.882925051701568e-07, |
|
"logits/chosen": -1.853175163269043, |
|
"logits/rejected": -1.8978935480117798, |
|
"logps/chosen": -225.21856689453125, |
|
"logps/rejected": -228.90325927734375, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09534727036952972, |
|
"rewards/margins": 0.077830970287323, |
|
"rewards/rejected": -0.17317824065685272, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2551683402244536, |
|
"grad_norm": 1.0619319741777762, |
|
"learning_rate": 4.877047187483582e-07, |
|
"logits/chosen": -1.6998298168182373, |
|
"logits/rejected": -1.7790082693099976, |
|
"logps/chosen": -212.0916748046875, |
|
"logps/rejected": -191.58206176757812, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.10848715156316757, |
|
"rewards/margins": 0.048885174095630646, |
|
"rewards/rejected": -0.1573723405599594, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2598936798582398, |
|
"grad_norm": 1.1160678416368461, |
|
"learning_rate": 4.871029077810132e-07, |
|
"logits/chosen": -1.675370216369629, |
|
"logits/rejected": -1.7553473711013794, |
|
"logps/chosen": -225.4173126220703, |
|
"logps/rejected": -198.70562744140625, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10019448399543762, |
|
"rewards/margins": 0.08370313048362732, |
|
"rewards/rejected": -0.18389761447906494, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.264619019492026, |
|
"grad_norm": 1.180700159151371, |
|
"learning_rate": 4.864871077748857e-07, |
|
"logits/chosen": -2.015566110610962, |
|
"logits/rejected": -2.080444812774658, |
|
"logps/chosen": -229.28094482421875, |
|
"logps/rejected": -215.07102966308594, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.1043662577867508, |
|
"rewards/margins": 0.07935845851898193, |
|
"rewards/rejected": -0.18372471630573273, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.26934435912581217, |
|
"grad_norm": 1.2229942654627657, |
|
"learning_rate": 4.858573550620908e-07, |
|
"logits/chosen": -2.024144411087036, |
|
"logits/rejected": -2.0165395736694336, |
|
"logps/chosen": -266.3519287109375, |
|
"logps/rejected": -233.6390380859375, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.12464563548564911, |
|
"rewards/margins": 0.12967334687709808, |
|
"rewards/rejected": -0.2543190121650696, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.27406969875959836, |
|
"grad_norm": 1.0980503837148203, |
|
"learning_rate": 4.852136867979515e-07, |
|
"logits/chosen": -2.3049449920654297, |
|
"logits/rejected": -2.286456346511841, |
|
"logps/chosen": -187.29776000976562, |
|
"logps/rejected": -195.98187255859375, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.13345083594322205, |
|
"rewards/margins": 0.05029616504907608, |
|
"rewards/rejected": -0.18374700844287872, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.27879503839338454, |
|
"grad_norm": 1.222277907011155, |
|
"learning_rate": 4.845561409588065e-07, |
|
"logits/chosen": -2.3418726921081543, |
|
"logits/rejected": -2.2416603565216064, |
|
"logps/chosen": -184.75540161132812, |
|
"logps/rejected": -186.7329864501953, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10166566073894501, |
|
"rewards/margins": 0.12095416337251663, |
|
"rewards/rejected": -0.22261981666088104, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2835203780271707, |
|
"grad_norm": 1.0214927771783529, |
|
"learning_rate": 4.838847563397693e-07, |
|
"logits/chosen": -1.5123400688171387, |
|
"logits/rejected": -1.6280531883239746, |
|
"logps/chosen": -237.2841796875, |
|
"logps/rejected": -212.3114013671875, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.1481347680091858, |
|
"rewards/margins": 0.09539347887039185, |
|
"rewards/rejected": -0.24352826178073883, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28824571766095686, |
|
"grad_norm": 1.1567243525055237, |
|
"learning_rate": 4.831995725524398e-07, |
|
"logits/chosen": -2.612375497817993, |
|
"logits/rejected": -2.534623861312866, |
|
"logps/chosen": -163.80526733398438, |
|
"logps/rejected": -201.24952697753906, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.14992724359035492, |
|
"rewards/margins": 0.10543593764305115, |
|
"rewards/rejected": -0.2553631663322449, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.29297105729474304, |
|
"grad_norm": 1.2299920626415959, |
|
"learning_rate": 4.825006300225665e-07, |
|
"logits/chosen": -2.0585803985595703, |
|
"logits/rejected": -2.1235456466674805, |
|
"logps/chosen": -218.53924560546875, |
|
"logps/rejected": -219.55938720703125, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.15479950606822968, |
|
"rewards/margins": 0.11182530224323273, |
|
"rewards/rejected": -0.2666248381137848, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2976963969285292, |
|
"grad_norm": 1.203647663122135, |
|
"learning_rate": 4.817879699876622e-07, |
|
"logits/chosen": -1.9584053754806519, |
|
"logits/rejected": -2.047368288040161, |
|
"logps/chosen": -180.12673950195312, |
|
"logps/rejected": -169.58712768554688, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.15420033037662506, |
|
"rewards/margins": 0.07548267394304276, |
|
"rewards/rejected": -0.2296830117702484, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3024217365623154, |
|
"grad_norm": 1.1158018473166338, |
|
"learning_rate": 4.810616344945705e-07, |
|
"logits/chosen": -1.889503002166748, |
|
"logits/rejected": -1.8674815893173218, |
|
"logps/chosen": -205.70799255371094, |
|
"logps/rejected": -200.00503540039062, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.20557425916194916, |
|
"rewards/margins": 0.07413503527641296, |
|
"rewards/rejected": -0.2797092795372009, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3071470761961016, |
|
"grad_norm": 1.1145309518931334, |
|
"learning_rate": 4.803216663969849e-07, |
|
"logits/chosen": -2.5508382320404053, |
|
"logits/rejected": -2.58968186378479, |
|
"logps/chosen": -222.9154052734375, |
|
"logps/rejected": -205.91091918945312, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.16130368411540985, |
|
"rewards/margins": 0.11249940097332001, |
|
"rewards/rejected": -0.27380311489105225, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3118724158298878, |
|
"grad_norm": 1.1907287315768882, |
|
"learning_rate": 4.795681093529209e-07, |
|
"logits/chosen": -1.9771151542663574, |
|
"logits/rejected": -1.9131364822387695, |
|
"logps/chosen": -181.78024291992188, |
|
"logps/rejected": -203.05455017089844, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.17563070356845856, |
|
"rewards/margins": 0.1282222419977188, |
|
"rewards/rejected": -0.30385297536849976, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.31659775546367397, |
|
"grad_norm": 1.0642807389889697, |
|
"learning_rate": 4.7880100782214e-07, |
|
"logits/chosen": -2.139569044113159, |
|
"logits/rejected": -2.139991283416748, |
|
"logps/chosen": -200.4115447998047, |
|
"logps/rejected": -209.0381317138672, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.25353577733039856, |
|
"rewards/margins": 0.025102369487285614, |
|
"rewards/rejected": -0.2786381244659424, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.32132309509746015, |
|
"grad_norm": 1.2361659779944367, |
|
"learning_rate": 4.780204070635266e-07, |
|
"logits/chosen": -2.3622794151306152, |
|
"logits/rejected": -2.3103740215301514, |
|
"logps/chosen": -241.8636474609375, |
|
"logps/rejected": -253.21530151367188, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2274862378835678, |
|
"rewards/margins": 0.18495142459869385, |
|
"rewards/rejected": -0.41243770718574524, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3260484347312463, |
|
"grad_norm": 1.2216304493799137, |
|
"learning_rate": 4.772263531324172e-07, |
|
"logits/chosen": -2.275869369506836, |
|
"logits/rejected": -2.296611785888672, |
|
"logps/chosen": -255.72305297851562, |
|
"logps/rejected": -237.79794311523438, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.24048469960689545, |
|
"rewards/margins": 0.11058272421360016, |
|
"rewards/rejected": -0.351067453622818, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.33077377436503247, |
|
"grad_norm": 1.2564703689206955, |
|
"learning_rate": 4.764188928778843e-07, |
|
"logits/chosen": -1.9974974393844604, |
|
"logits/rejected": -2.011615514755249, |
|
"logps/chosen": -195.90382385253906, |
|
"logps/rejected": -214.2757110595703, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2238045632839203, |
|
"rewards/margins": 0.1463470458984375, |
|
"rewards/rejected": -0.3701516091823578, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33549911399881865, |
|
"grad_norm": 1.2749948068499388, |
|
"learning_rate": 4.755980739399711e-07, |
|
"logits/chosen": -2.3057668209075928, |
|
"logits/rejected": -2.2394864559173584, |
|
"logps/chosen": -193.60552978515625, |
|
"logps/rejected": -239.27389526367188, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2364463210105896, |
|
"rewards/margins": 0.1507556438446045, |
|
"rewards/rejected": -0.3872019648551941, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.34022445363260484, |
|
"grad_norm": 1.3097724846280558, |
|
"learning_rate": 4.747639447468816e-07, |
|
"logits/chosen": -2.2665905952453613, |
|
"logits/rejected": -2.279989719390869, |
|
"logps/chosen": -285.88140869140625, |
|
"logps/rejected": -304.027587890625, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.23830385506153107, |
|
"rewards/margins": 0.22441713511943817, |
|
"rewards/rejected": -0.4627210199832916, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.344949793266391, |
|
"grad_norm": 1.267134897055487, |
|
"learning_rate": 4.739165545121228e-07, |
|
"logits/chosen": -1.9879568815231323, |
|
"logits/rejected": -1.9682663679122925, |
|
"logps/chosen": -246.3922882080078, |
|
"logps/rejected": -247.4912109375, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.3345012366771698, |
|
"rewards/margins": 0.17793008685112, |
|
"rewards/rejected": -0.5124313235282898, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3496751329001772, |
|
"grad_norm": 1.2766840060333855, |
|
"learning_rate": 4.730559532316014e-07, |
|
"logits/chosen": -2.3778185844421387, |
|
"logits/rejected": -2.4101202487945557, |
|
"logps/chosen": -221.90524291992188, |
|
"logps/rejected": -241.57962036132812, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.32600241899490356, |
|
"rewards/margins": 0.14937394857406616, |
|
"rewards/rejected": -0.4753763973712921, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3544004725339634, |
|
"grad_norm": 1.3566530419485627, |
|
"learning_rate": 4.721821916806741e-07, |
|
"logits/chosen": -2.4602699279785156, |
|
"logits/rejected": -2.4688634872436523, |
|
"logps/chosen": -268.510498046875, |
|
"logps/rejected": -286.26165771484375, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.31091028451919556, |
|
"rewards/margins": 0.29035893082618713, |
|
"rewards/rejected": -0.6012692451477051, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3591258121677496, |
|
"grad_norm": 1.4205593895904036, |
|
"learning_rate": 4.7129532141115145e-07, |
|
"logits/chosen": -2.5466036796569824, |
|
"logits/rejected": -2.5945773124694824, |
|
"logps/chosen": -318.98370361328125, |
|
"logps/rejected": -309.7763366699219, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.3561764359474182, |
|
"rewards/margins": 0.18557003140449524, |
|
"rewards/rejected": -0.5417464375495911, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3638511518015357, |
|
"grad_norm": 1.528828829984918, |
|
"learning_rate": 4.7039539474825683e-07, |
|
"logits/chosen": -2.2787909507751465, |
|
"logits/rejected": -2.3680973052978516, |
|
"logps/chosen": -305.41925048828125, |
|
"logps/rejected": -288.8333435058594, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.39139410853385925, |
|
"rewards/margins": 0.2935311794281006, |
|
"rewards/rejected": -0.6849253177642822, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3685764914353219, |
|
"grad_norm": 1.6985476984319592, |
|
"learning_rate": 4.6948246478753903e-07, |
|
"logits/chosen": -2.261338472366333, |
|
"logits/rejected": -2.323387622833252, |
|
"logps/chosen": -247.09033203125, |
|
"logps/rejected": -257.6181640625, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5037412047386169, |
|
"rewards/margins": 0.1979576051235199, |
|
"rewards/rejected": -0.7016987800598145, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3733018310691081, |
|
"grad_norm": 1.729313425365047, |
|
"learning_rate": 4.6855658539173946e-07, |
|
"logits/chosen": -2.698389768600464, |
|
"logits/rejected": -2.646217107772827, |
|
"logps/chosen": -314.0598449707031, |
|
"logps/rejected": -283.12530517578125, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.5247927904129028, |
|
"rewards/margins": 0.22078382968902588, |
|
"rewards/rejected": -0.7455766201019287, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.37802717070289427, |
|
"grad_norm": 1.66621636166947, |
|
"learning_rate": 4.6761781118761446e-07, |
|
"logits/chosen": -2.343153238296509, |
|
"logits/rejected": -2.3706839084625244, |
|
"logps/chosen": -296.689453125, |
|
"logps/rejected": -309.9665222167969, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.44052672386169434, |
|
"rewards/margins": 0.23874229192733765, |
|
"rewards/rejected": -0.6792689561843872, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.38275251033668045, |
|
"grad_norm": 1.5082652270101988, |
|
"learning_rate": 4.666661975627123e-07, |
|
"logits/chosen": -2.4629459381103516, |
|
"logits/rejected": -2.486161470413208, |
|
"logps/chosen": -233.51983642578125, |
|
"logps/rejected": -262.89447021484375, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.4172506630420685, |
|
"rewards/margins": 0.3244992196559906, |
|
"rewards/rejected": -0.7417498826980591, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.38747784997046664, |
|
"grad_norm": 1.5314338973522612, |
|
"learning_rate": 4.657018006621053e-07, |
|
"logits/chosen": -2.1910758018493652, |
|
"logits/rejected": -2.273641586303711, |
|
"logps/chosen": -225.42889404296875, |
|
"logps/rejected": -239.92913818359375, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4517180323600769, |
|
"rewards/margins": 0.22106432914733887, |
|
"rewards/rejected": -0.6727824211120605, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3922031896042528, |
|
"grad_norm": 1.6363344671283797, |
|
"learning_rate": 4.6472467738507724e-07, |
|
"logits/chosen": -2.4740734100341797, |
|
"logits/rejected": -2.593059539794922, |
|
"logps/chosen": -325.74298095703125, |
|
"logps/rejected": -297.570068359375, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6095322370529175, |
|
"rewards/margins": 0.1409136950969696, |
|
"rewards/rejected": -0.7504459619522095, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.396928529238039, |
|
"grad_norm": 1.7032682560793109, |
|
"learning_rate": 4.6373488538176656e-07, |
|
"logits/chosen": -2.627995014190674, |
|
"logits/rejected": -2.585923433303833, |
|
"logps/chosen": -278.2349853515625, |
|
"logps/rejected": -319.6769104003906, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6383107900619507, |
|
"rewards/margins": 0.17859135568141937, |
|
"rewards/rejected": -0.8169021010398865, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.40165386887182514, |
|
"grad_norm": 1.6927306950035537, |
|
"learning_rate": 4.627324830497645e-07, |
|
"logits/chosen": -2.3960776329040527, |
|
"logits/rejected": -2.4317002296447754, |
|
"logps/chosen": -217.64886474609375, |
|
"logps/rejected": -255.28555297851562, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.581438422203064, |
|
"rewards/margins": 0.2544565200805664, |
|
"rewards/rejected": -0.8358950018882751, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4063792085056113, |
|
"grad_norm": 1.517574514685826, |
|
"learning_rate": 4.617175295306701e-07, |
|
"logits/chosen": -2.342132329940796, |
|
"logits/rejected": -2.317411422729492, |
|
"logps/chosen": -249.52786254882812, |
|
"logps/rejected": -272.2576904296875, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4862441420555115, |
|
"rewards/margins": 0.2543086111545563, |
|
"rewards/rejected": -0.7405527830123901, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4063792085056113, |
|
"eval_logits/chosen": -3.0177347660064697, |
|
"eval_logits/rejected": -3.027693748474121, |
|
"eval_logps/chosen": -269.99395751953125, |
|
"eval_logps/rejected": -288.7347106933594, |
|
"eval_loss": 0.6040579080581665, |
|
"eval_rewards/accuracies": 0.6022727489471436, |
|
"eval_rewards/chosen": -0.5876324772834778, |
|
"eval_rewards/margins": 0.3002159297466278, |
|
"eval_rewards/rejected": -0.8878483772277832, |
|
"eval_runtime": 225.4053, |
|
"eval_samples_per_second": 16.22, |
|
"eval_steps_per_second": 0.293, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4111045481393975, |
|
"grad_norm": 1.828247945135766, |
|
"learning_rate": 4.6069008470660057e-07, |
|
"logits/chosen": -2.776036500930786, |
|
"logits/rejected": -2.8447940349578857, |
|
"logps/chosen": -286.0122985839844, |
|
"logps/rejected": -318.0850830078125, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6256659626960754, |
|
"rewards/margins": 0.2720867395401001, |
|
"rewards/rejected": -0.8977527022361755, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4158298877731837, |
|
"grad_norm": 1.7339677719729851, |
|
"learning_rate": 4.596502091966587e-07, |
|
"logits/chosen": -2.6904196739196777, |
|
"logits/rejected": -2.8044652938842773, |
|
"logps/chosen": -318.77557373046875, |
|
"logps/rejected": -326.3057556152344, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.6397165060043335, |
|
"rewards/margins": 0.23243850469589233, |
|
"rewards/rejected": -0.8721550107002258, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4205552274069699, |
|
"grad_norm": 1.5787512385428173, |
|
"learning_rate": 4.5859796435335575e-07, |
|
"logits/chosen": -2.5359115600585938, |
|
"logits/rejected": -2.5303304195404053, |
|
"logps/chosen": -256.5873718261719, |
|
"logps/rejected": -295.2117004394531, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.5649542212486267, |
|
"rewards/margins": 0.2900598347187042, |
|
"rewards/rejected": -0.8550140857696533, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.42528056704075606, |
|
"grad_norm": 1.6858517491129223, |
|
"learning_rate": 4.5753341225899195e-07, |
|
"logits/chosen": -2.4564006328582764, |
|
"logits/rejected": -2.470282793045044, |
|
"logps/chosen": -327.185791015625, |
|
"logps/rejected": -319.4944152832031, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.6457672715187073, |
|
"rewards/margins": 0.2363281399011612, |
|
"rewards/rejected": -0.882095456123352, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.43000590667454225, |
|
"grad_norm": 1.6776313748940133, |
|
"learning_rate": 4.564566157219938e-07, |
|
"logits/chosen": -2.467501163482666, |
|
"logits/rejected": -2.570307970046997, |
|
"logps/chosen": -318.20867919921875, |
|
"logps/rejected": -302.66949462890625, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.5671995878219604, |
|
"rewards/margins": 0.2143457680940628, |
|
"rewards/rejected": -0.7815454006195068, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.43473124630832843, |
|
"grad_norm": 2.0614895261976747, |
|
"learning_rate": 4.5536763827320803e-07, |
|
"logits/chosen": -2.3631057739257812, |
|
"logits/rejected": -2.54693341255188, |
|
"logps/chosen": -273.6968078613281, |
|
"logps/rejected": -246.98641967773438, |
|
"loss": 0.5856, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.4555537700653076, |
|
"rewards/margins": 0.22295869886875153, |
|
"rewards/rejected": -0.6785125136375427, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.43945658594211456, |
|
"grad_norm": 1.8016509044142324, |
|
"learning_rate": 4.5426654416215367e-07, |
|
"logits/chosen": -2.71864652633667, |
|
"logits/rejected": -2.7026920318603516, |
|
"logps/chosen": -288.259033203125, |
|
"logps/rejected": -330.29132080078125, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5275314450263977, |
|
"rewards/margins": 0.5003238916397095, |
|
"rewards/rejected": -1.027855396270752, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.44418192557590075, |
|
"grad_norm": 2.243457029027687, |
|
"learning_rate": 4.5315339835323095e-07, |
|
"logits/chosen": -2.884897232055664, |
|
"logits/rejected": -2.8569068908691406, |
|
"logps/chosen": -269.7425537109375, |
|
"logps/rejected": -297.4085693359375, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.5586040019989014, |
|
"rewards/margins": 0.3271249234676361, |
|
"rewards/rejected": -0.8857288956642151, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.44890726520968693, |
|
"grad_norm": 2.296742463720749, |
|
"learning_rate": 4.520282665218889e-07, |
|
"logits/chosen": -2.790522336959839, |
|
"logits/rejected": -2.84653377532959, |
|
"logps/chosen": -265.41339111328125, |
|
"logps/rejected": -315.9186096191406, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5728757977485657, |
|
"rewards/margins": 0.515011191368103, |
|
"rewards/rejected": -1.0878870487213135, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4536326048434731, |
|
"grad_norm": 1.922804376035695, |
|
"learning_rate": 4.5089121505074987e-07, |
|
"logits/chosen": -2.6556386947631836, |
|
"logits/rejected": -2.806910276412964, |
|
"logps/chosen": -244.31153869628906, |
|
"logps/rejected": -247.36322021484375, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5964667797088623, |
|
"rewards/margins": 0.29180973768234253, |
|
"rewards/rejected": -0.8882765173912048, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4583579444772593, |
|
"grad_norm": 1.6125061422080407, |
|
"learning_rate": 4.4974231102569355e-07, |
|
"logits/chosen": -2.7232208251953125, |
|
"logits/rejected": -2.8808493614196777, |
|
"logps/chosen": -272.30206298828125, |
|
"logps/rejected": -273.55487060546875, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.5672851800918579, |
|
"rewards/margins": 0.39647072553634644, |
|
"rewards/rejected": -0.9637559056282043, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.4630832841110455, |
|
"grad_norm": 2.0273197043813598, |
|
"learning_rate": 4.4858162223189853e-07, |
|
"logits/chosen": -2.691676616668701, |
|
"logits/rejected": -2.7318079471588135, |
|
"logps/chosen": -331.65582275390625, |
|
"logps/rejected": -321.1370544433594, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7305989861488342, |
|
"rewards/margins": 0.20282992720603943, |
|
"rewards/rejected": -0.933428943157196, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.4678086237448317, |
|
"grad_norm": 1.772177669462408, |
|
"learning_rate": 4.474092171498434e-07, |
|
"logits/chosen": -2.5423169136047363, |
|
"logits/rejected": -2.5960371494293213, |
|
"logps/chosen": -260.9735107421875, |
|
"logps/rejected": -277.1812744140625, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.6579495668411255, |
|
"rewards/margins": 0.32558560371398926, |
|
"rewards/rejected": -0.9835351705551147, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.47253396337861786, |
|
"grad_norm": 2.031475270950697, |
|
"learning_rate": 4.462251649512656e-07, |
|
"logits/chosen": -2.805039167404175, |
|
"logits/rejected": -2.7704780101776123, |
|
"logps/chosen": -235.95053100585938, |
|
"logps/rejected": -292.6195373535156, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.612291157245636, |
|
"rewards/margins": 0.49476855993270874, |
|
"rewards/rejected": -1.1070597171783447, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.477259303012404, |
|
"grad_norm": 2.8786657503544033, |
|
"learning_rate": 4.4502953549508135e-07, |
|
"logits/chosen": -2.829331398010254, |
|
"logits/rejected": -2.8344359397888184, |
|
"logps/chosen": -309.11285400390625, |
|
"logps/rejected": -331.1981201171875, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7622178792953491, |
|
"rewards/margins": 0.48595699667930603, |
|
"rewards/rejected": -1.2481749057769775, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4819846426461902, |
|
"grad_norm": 2.6747243514730163, |
|
"learning_rate": 4.438223993232634e-07, |
|
"logits/chosen": -2.71714186668396, |
|
"logits/rejected": -2.7164933681488037, |
|
"logps/chosen": -319.4408264160156, |
|
"logps/rejected": -335.07025146484375, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7349227666854858, |
|
"rewards/margins": 0.5957677960395813, |
|
"rewards/rejected": -1.3306907415390015, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.48670998227997636, |
|
"grad_norm": 2.184532115681571, |
|
"learning_rate": 4.426038276566787e-07, |
|
"logits/chosen": -2.595947027206421, |
|
"logits/rejected": -2.6409239768981934, |
|
"logps/chosen": -302.1788330078125, |
|
"logps/rejected": -300.60736083984375, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.6437785625457764, |
|
"rewards/margins": 0.4648054838180542, |
|
"rewards/rejected": -1.1085840463638306, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.49143532191376255, |
|
"grad_norm": 2.351448463153027, |
|
"learning_rate": 4.413738923908874e-07, |
|
"logits/chosen": -2.820120334625244, |
|
"logits/rejected": -2.881047248840332, |
|
"logps/chosen": -321.73577880859375, |
|
"logps/rejected": -336.362548828125, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6863315105438232, |
|
"rewards/margins": 0.3078695237636566, |
|
"rewards/rejected": -0.9942010641098022, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.49616066154754873, |
|
"grad_norm": 2.0712718540984976, |
|
"learning_rate": 4.4013266609190016e-07, |
|
"logits/chosen": -2.726858139038086, |
|
"logits/rejected": -2.8224053382873535, |
|
"logps/chosen": -318.89129638671875, |
|
"logps/rejected": -321.7113037109375, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.7669743299484253, |
|
"rewards/margins": 0.5382488369941711, |
|
"rewards/rejected": -1.3052233457565308, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5008860011813349, |
|
"grad_norm": 2.7161021836994412, |
|
"learning_rate": 4.3888022199189684e-07, |
|
"logits/chosen": -2.5977838039398193, |
|
"logits/rejected": -2.568969488143921, |
|
"logps/chosen": -270.40380859375, |
|
"logps/rejected": -321.2980041503906, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.6543689966201782, |
|
"rewards/margins": 0.5025177001953125, |
|
"rewards/rejected": -1.1568866968154907, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.505611340815121, |
|
"grad_norm": 2.133467086083558, |
|
"learning_rate": 4.3761663398490634e-07, |
|
"logits/chosen": -2.5719194412231445, |
|
"logits/rejected": -2.569828510284424, |
|
"logps/chosen": -278.3515930175781, |
|
"logps/rejected": -290.2523193359375, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.6445180773735046, |
|
"rewards/margins": 0.44993308186531067, |
|
"rewards/rejected": -1.0944510698318481, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5103366804489072, |
|
"grad_norm": 2.581357357210363, |
|
"learning_rate": 4.363419766224464e-07, |
|
"logits/chosen": -2.5332443714141846, |
|
"logits/rejected": -2.5566651821136475, |
|
"logps/chosen": -255.7954559326172, |
|
"logps/rejected": -285.38604736328125, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7009232044219971, |
|
"rewards/margins": 0.5014970302581787, |
|
"rewards/rejected": -1.2024202346801758, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5150620200826934, |
|
"grad_norm": 2.0545799713212913, |
|
"learning_rate": 4.3505632510912515e-07, |
|
"logits/chosen": -2.5492563247680664, |
|
"logits/rejected": -2.6753411293029785, |
|
"logps/chosen": -252.5239715576172, |
|
"logps/rejected": -250.57691955566406, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.580327033996582, |
|
"rewards/margins": 0.28667935729026794, |
|
"rewards/rejected": -0.8670063614845276, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5197873597164796, |
|
"grad_norm": 2.2034797994763458, |
|
"learning_rate": 4.3375975529820414e-07, |
|
"logits/chosen": -2.492084503173828, |
|
"logits/rejected": -2.432577610015869, |
|
"logps/chosen": -310.48046875, |
|
"logps/rejected": -362.7835388183594, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.6999268531799316, |
|
"rewards/margins": 0.7504494190216064, |
|
"rewards/rejected": -1.450376272201538, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5245126993502658, |
|
"grad_norm": 1.9829084137694295, |
|
"learning_rate": 4.3245234368712304e-07, |
|
"logits/chosen": -2.7556090354919434, |
|
"logits/rejected": -2.754206418991089, |
|
"logps/chosen": -270.7232666015625, |
|
"logps/rejected": -324.1688232421875, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.6123411655426025, |
|
"rewards/margins": 0.4949289858341217, |
|
"rewards/rejected": -1.1072702407836914, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.529238038984052, |
|
"grad_norm": 2.0360711261149596, |
|
"learning_rate": 4.3113416741298616e-07, |
|
"logits/chosen": -2.659914016723633, |
|
"logits/rejected": -2.673081874847412, |
|
"logps/chosen": -292.6357727050781, |
|
"logps/rejected": -286.8974914550781, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.645796537399292, |
|
"rewards/margins": 0.4479163587093353, |
|
"rewards/rejected": -1.0937130451202393, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5339633786178382, |
|
"grad_norm": 2.3215552370547945, |
|
"learning_rate": 4.298053042480114e-07, |
|
"logits/chosen": -2.6102089881896973, |
|
"logits/rejected": -2.666215419769287, |
|
"logps/chosen": -284.019775390625, |
|
"logps/rejected": -313.7834777832031, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.6691496968269348, |
|
"rewards/margins": 0.5124155879020691, |
|
"rewards/rejected": -1.181565284729004, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5386887182516243, |
|
"grad_norm": 2.1290543043289434, |
|
"learning_rate": 4.2846583259494185e-07, |
|
"logits/chosen": -2.795818328857422, |
|
"logits/rejected": -2.9482498168945312, |
|
"logps/chosen": -291.4162292480469, |
|
"logps/rejected": -279.486083984375, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.791326105594635, |
|
"rewards/margins": 0.29795482754707336, |
|
"rewards/rejected": -1.0892809629440308, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5434140578854105, |
|
"grad_norm": 2.123022825050298, |
|
"learning_rate": 4.271158314824199e-07, |
|
"logits/chosen": -2.5966644287109375, |
|
"logits/rejected": -2.67663836479187, |
|
"logps/chosen": -286.2615661621094, |
|
"logps/rejected": -300.17669677734375, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.6490954160690308, |
|
"rewards/margins": 0.37902897596359253, |
|
"rewards/rejected": -1.028124451637268, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5481393975191967, |
|
"grad_norm": 2.3410590679680854, |
|
"learning_rate": 4.2575538056032446e-07, |
|
"logits/chosen": -2.3392884731292725, |
|
"logits/rejected": -2.3976926803588867, |
|
"logps/chosen": -309.89501953125, |
|
"logps/rejected": -347.65264892578125, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.8080885410308838, |
|
"rewards/margins": 0.5476577877998352, |
|
"rewards/rejected": -1.3557462692260742, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5528647371529829, |
|
"grad_norm": 2.1818324792915305, |
|
"learning_rate": 4.2438456009507195e-07, |
|
"logits/chosen": -2.751250743865967, |
|
"logits/rejected": -2.683605909347534, |
|
"logps/chosen": -304.8617248535156, |
|
"logps/rejected": -357.66583251953125, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7783306837081909, |
|
"rewards/margins": 0.6774348020553589, |
|
"rewards/rejected": -1.4557652473449707, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5575900767867691, |
|
"grad_norm": 2.2239562359532994, |
|
"learning_rate": 4.230034509648803e-07, |
|
"logits/chosen": -2.653618335723877, |
|
"logits/rejected": -2.6241607666015625, |
|
"logps/chosen": -311.1373596191406, |
|
"logps/rejected": -379.66473388671875, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6985858678817749, |
|
"rewards/margins": 0.754062294960022, |
|
"rewards/rejected": -1.4526481628417969, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5623154164205553, |
|
"grad_norm": 2.2500972070282024, |
|
"learning_rate": 4.216121346549973e-07, |
|
"logits/chosen": -3.0888874530792236, |
|
"logits/rejected": -2.974677562713623, |
|
"logps/chosen": -266.66705322265625, |
|
"logps/rejected": -335.3798828125, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.6862295866012573, |
|
"rewards/margins": 0.5550753474235535, |
|
"rewards/rejected": -1.2413049936294556, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5670407560543415, |
|
"grad_norm": 2.18698230287775, |
|
"learning_rate": 4.202106932528928e-07, |
|
"logits/chosen": -2.6840288639068604, |
|
"logits/rejected": -2.7219927310943604, |
|
"logps/chosen": -349.6735534667969, |
|
"logps/rejected": -351.0025634765625, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9063121676445007, |
|
"rewards/margins": 0.5141624808311462, |
|
"rewards/rejected": -1.420474648475647, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5717660956881275, |
|
"grad_norm": 2.3004644364082734, |
|
"learning_rate": 4.1879920944341593e-07, |
|
"logits/chosen": -2.914316177368164, |
|
"logits/rejected": -2.9891955852508545, |
|
"logps/chosen": -279.8473205566406, |
|
"logps/rejected": -313.1243591308594, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.754368245601654, |
|
"rewards/margins": 0.5700761079788208, |
|
"rewards/rejected": -1.3244441747665405, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5764914353219137, |
|
"grad_norm": 2.3085447219221504, |
|
"learning_rate": 4.1737776650391625e-07, |
|
"logits/chosen": -2.5704903602600098, |
|
"logits/rejected": -2.6333065032958984, |
|
"logps/chosen": -274.203369140625, |
|
"logps/rejected": -333.9256896972656, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.7395837903022766, |
|
"rewards/margins": 0.5535318851470947, |
|
"rewards/rejected": -1.2931156158447266, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5812167749556999, |
|
"grad_norm": 2.1555107869656234, |
|
"learning_rate": 4.1594644829933074e-07, |
|
"logits/chosen": -3.0889954566955566, |
|
"logits/rejected": -3.00903582572937, |
|
"logps/chosen": -287.2713623046875, |
|
"logps/rejected": -352.50244140625, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7744563221931458, |
|
"rewards/margins": 0.6194370985031128, |
|
"rewards/rejected": -1.3938933610916138, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5859421145894861, |
|
"grad_norm": 2.3124214479042147, |
|
"learning_rate": 4.1450533927723563e-07, |
|
"logits/chosen": -2.7649660110473633, |
|
"logits/rejected": -2.7645654678344727, |
|
"logps/chosen": -323.98663330078125, |
|
"logps/rejected": -377.0577697753906, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.9616256952285767, |
|
"rewards/margins": 0.6629120707511902, |
|
"rewards/rejected": -1.624537706375122, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5906674542232723, |
|
"grad_norm": 2.201123781604531, |
|
"learning_rate": 4.130545244628638e-07, |
|
"logits/chosen": -2.8170711994171143, |
|
"logits/rejected": -2.801412582397461, |
|
"logps/chosen": -284.6482849121094, |
|
"logps/rejected": -339.86328125, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9369969964027405, |
|
"rewards/margins": 0.6950640678405762, |
|
"rewards/rejected": -1.6320611238479614, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5953927938570585, |
|
"grad_norm": 2.4679791573291907, |
|
"learning_rate": 4.11594089454089e-07, |
|
"logits/chosen": -2.7288601398468018, |
|
"logits/rejected": -2.732513427734375, |
|
"logps/chosen": -331.1778259277344, |
|
"logps/rejected": -422.12139892578125, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8497134447097778, |
|
"rewards/margins": 0.6027945280075073, |
|
"rewards/rejected": -1.4525080919265747, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6001181334908446, |
|
"grad_norm": 2.5170488926988073, |
|
"learning_rate": 4.101241204163748e-07, |
|
"logits/chosen": -2.66646671295166, |
|
"logits/rejected": -2.621904134750366, |
|
"logps/chosen": -301.6587829589844, |
|
"logps/rejected": -365.4261474609375, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9232099056243896, |
|
"rewards/margins": 0.4544805884361267, |
|
"rewards/rejected": -1.377690315246582, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6048434731246308, |
|
"grad_norm": 2.3673200269773718, |
|
"learning_rate": 4.086447040776911e-07, |
|
"logits/chosen": -3.108903169631958, |
|
"logits/rejected": -3.056070327758789, |
|
"logps/chosen": -276.7916259765625, |
|
"logps/rejected": -342.23876953125, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8404449820518494, |
|
"rewards/margins": 0.6569064855575562, |
|
"rewards/rejected": -1.4973516464233398, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.609568812758417, |
|
"grad_norm": 3.2943893516301768, |
|
"learning_rate": 4.071559277233975e-07, |
|
"logits/chosen": -3.1619277000427246, |
|
"logits/rejected": -3.1500680446624756, |
|
"logps/chosen": -283.8644104003906, |
|
"logps/rejected": -360.0433044433594, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.969115674495697, |
|
"rewards/margins": 0.5388011932373047, |
|
"rewards/rejected": -1.507916808128357, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.609568812758417, |
|
"eval_logits/chosen": -2.9238109588623047, |
|
"eval_logits/rejected": -2.93009352684021, |
|
"eval_logps/chosen": -304.09130859375, |
|
"eval_logps/rejected": -360.09600830078125, |
|
"eval_loss": 0.5451335906982422, |
|
"eval_rewards/accuracies": 0.6174242496490479, |
|
"eval_rewards/chosen": -0.9286060333251953, |
|
"eval_rewards/margins": 0.6728550791740417, |
|
"eval_rewards/rejected": -1.6014612913131714, |
|
"eval_runtime": 226.5871, |
|
"eval_samples_per_second": 16.135, |
|
"eval_steps_per_second": 0.291, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6142941523922032, |
|
"grad_norm": 2.3717766371975086, |
|
"learning_rate": 4.05657879191093e-07, |
|
"logits/chosen": -2.86085844039917, |
|
"logits/rejected": -3.0779123306274414, |
|
"logps/chosen": -353.9771423339844, |
|
"logps/rejected": -372.76788330078125, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9203246235847473, |
|
"rewards/margins": 0.6556491851806641, |
|
"rewards/rejected": -1.575973629951477, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6190194920259894, |
|
"grad_norm": 2.9558527636533025, |
|
"learning_rate": 4.04150646865434e-07, |
|
"logits/chosen": -2.6664750576019287, |
|
"logits/rejected": -2.6207780838012695, |
|
"logps/chosen": -283.3083801269531, |
|
"logps/rejected": -348.6871032714844, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8656440377235413, |
|
"rewards/margins": 0.7297619581222534, |
|
"rewards/rejected": -1.5954060554504395, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6237448316597756, |
|
"grad_norm": 2.7269741124418165, |
|
"learning_rate": 4.0263431967291934e-07, |
|
"logits/chosen": -2.8708412647247314, |
|
"logits/rejected": -2.836123466491699, |
|
"logps/chosen": -251.37966918945312, |
|
"logps/rejected": -302.320068359375, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8969355821609497, |
|
"rewards/margins": 0.4451577365398407, |
|
"rewards/rejected": -1.3420933485031128, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6284701712935618, |
|
"grad_norm": 2.3559260676043547, |
|
"learning_rate": 4.011089870766437e-07, |
|
"logits/chosen": -2.9185516834259033, |
|
"logits/rejected": -3.0862460136413574, |
|
"logps/chosen": -340.1504211425781, |
|
"logps/rejected": -376.01629638671875, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8409585952758789, |
|
"rewards/margins": 0.9160802364349365, |
|
"rewards/rejected": -1.7570387125015259, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6331955109273479, |
|
"grad_norm": 2.551925501157886, |
|
"learning_rate": 3.995747390710196e-07, |
|
"logits/chosen": -2.895480155944824, |
|
"logits/rejected": -2.939868211746216, |
|
"logps/chosen": -323.12164306640625, |
|
"logps/rejected": -397.951171875, |
|
"loss": 0.5385, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.1145464181900024, |
|
"rewards/margins": 1.0304715633392334, |
|
"rewards/rejected": -2.1450178623199463, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6379208505611341, |
|
"grad_norm": 3.0639374327823625, |
|
"learning_rate": 3.98031666176467e-07, |
|
"logits/chosen": -3.221116781234741, |
|
"logits/rejected": -3.125380516052246, |
|
"logps/chosen": -277.20684814453125, |
|
"logps/rejected": -368.4245300292969, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9265705943107605, |
|
"rewards/margins": 0.8099436163902283, |
|
"rewards/rejected": -1.7365142107009888, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6426461901949203, |
|
"grad_norm": 3.0229894732050537, |
|
"learning_rate": 3.9647985943407345e-07, |
|
"logits/chosen": -2.7229156494140625, |
|
"logits/rejected": -2.624408006668091, |
|
"logps/chosen": -296.76507568359375, |
|
"logps/rejected": -366.4635925292969, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.911621630191803, |
|
"rewards/margins": 0.3694719672203064, |
|
"rewards/rejected": -1.2810935974121094, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6473715298287065, |
|
"grad_norm": 2.777240365107515, |
|
"learning_rate": 3.949194104002224e-07, |
|
"logits/chosen": -3.008553981781006, |
|
"logits/rejected": -3.0245308876037598, |
|
"logps/chosen": -278.2191162109375, |
|
"logps/rejected": -372.57672119140625, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9170963168144226, |
|
"rewards/margins": 0.91133052110672, |
|
"rewards/rejected": -1.8284270763397217, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6520968694624926, |
|
"grad_norm": 2.5983010643513222, |
|
"learning_rate": 3.93350411141191e-07, |
|
"logits/chosen": -2.984111785888672, |
|
"logits/rejected": -2.9940693378448486, |
|
"logps/chosen": -256.7328796386719, |
|
"logps/rejected": -304.14361572265625, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8156505227088928, |
|
"rewards/margins": 0.6031564474105835, |
|
"rewards/rejected": -1.418807029724121, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6568222090962788, |
|
"grad_norm": 2.758553605218817, |
|
"learning_rate": 3.917729542277187e-07, |
|
"logits/chosen": -2.739635944366455, |
|
"logits/rejected": -2.8373708724975586, |
|
"logps/chosen": -355.6564025878906, |
|
"logps/rejected": -444.02471923828125, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.9943738579750061, |
|
"rewards/margins": 0.8635731339454651, |
|
"rewards/rejected": -1.8579471111297607, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6615475487300649, |
|
"grad_norm": 2.5137621293297605, |
|
"learning_rate": 3.901871327295453e-07, |
|
"logits/chosen": -2.6592538356781006, |
|
"logits/rejected": -2.8847031593322754, |
|
"logps/chosen": -330.49609375, |
|
"logps/rejected": -349.5611572265625, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.8463073968887329, |
|
"rewards/margins": 0.6520651578903198, |
|
"rewards/rejected": -1.4983725547790527, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6662728883638511, |
|
"grad_norm": 2.285013562717476, |
|
"learning_rate": 3.885930402099199e-07, |
|
"logits/chosen": -2.5882949829101562, |
|
"logits/rejected": -2.6650753021240234, |
|
"logps/chosen": -335.5899658203125, |
|
"logps/rejected": -360.06451416015625, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9886775016784668, |
|
"rewards/margins": 0.6825906038284302, |
|
"rewards/rejected": -1.6712682247161865, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6709982279976373, |
|
"grad_norm": 2.554720405760107, |
|
"learning_rate": 3.8699077072008085e-07, |
|
"logits/chosen": -2.8670525550842285, |
|
"logits/rejected": -2.866511344909668, |
|
"logps/chosen": -271.18243408203125, |
|
"logps/rejected": -343.53582763671875, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.0047590732574463, |
|
"rewards/margins": 0.5558298826217651, |
|
"rewards/rejected": -1.5605889558792114, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6757235676314235, |
|
"grad_norm": 2.830180877879511, |
|
"learning_rate": 3.8538041879370657e-07, |
|
"logits/chosen": -3.037707567214966, |
|
"logits/rejected": -3.063495397567749, |
|
"logps/chosen": -347.0140380859375, |
|
"logps/rejected": -392.6544494628906, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9989163875579834, |
|
"rewards/margins": 0.8129914402961731, |
|
"rewards/rejected": -1.8119077682495117, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6804489072652097, |
|
"grad_norm": 2.6000164530484224, |
|
"learning_rate": 3.8376207944133817e-07, |
|
"logits/chosen": -3.087387800216675, |
|
"logits/rejected": -3.079148530960083, |
|
"logps/chosen": -316.996337890625, |
|
"logps/rejected": -361.9954528808594, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.111214518547058, |
|
"rewards/margins": 0.5254876017570496, |
|
"rewards/rejected": -1.636702060699463, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6851742468989959, |
|
"grad_norm": 2.6115954536025408, |
|
"learning_rate": 3.8213584814477363e-07, |
|
"logits/chosen": -3.111316442489624, |
|
"logits/rejected": -3.184953451156616, |
|
"logps/chosen": -311.6522521972656, |
|
"logps/rejected": -325.0541076660156, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9516675472259521, |
|
"rewards/margins": 0.6691429018974304, |
|
"rewards/rejected": -1.6208105087280273, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.689899586532782, |
|
"grad_norm": 2.976191491134426, |
|
"learning_rate": 3.8050182085143464e-07, |
|
"logits/chosen": -2.9731078147888184, |
|
"logits/rejected": -3.072920083999634, |
|
"logps/chosen": -309.2756042480469, |
|
"logps/rejected": -352.2410888671875, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.080316185951233, |
|
"rewards/margins": 0.8104506134986877, |
|
"rewards/rejected": -1.8907668590545654, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6946249261665682, |
|
"grad_norm": 2.837971176112728, |
|
"learning_rate": 3.7886009396870564e-07, |
|
"logits/chosen": -2.8793129920959473, |
|
"logits/rejected": -2.8820691108703613, |
|
"logps/chosen": -288.26116943359375, |
|
"logps/rejected": -363.0924072265625, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.1091738939285278, |
|
"rewards/margins": 0.493495374917984, |
|
"rewards/rejected": -1.602669358253479, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6993502658003544, |
|
"grad_norm": 2.928245357032695, |
|
"learning_rate": 3.7721076435824585e-07, |
|
"logits/chosen": -2.7040960788726807, |
|
"logits/rejected": -2.8614137172698975, |
|
"logps/chosen": -415.0562438964844, |
|
"logps/rejected": -420.36297607421875, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.0266618728637695, |
|
"rewards/margins": 0.8255325555801392, |
|
"rewards/rejected": -1.8521945476531982, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7040756054341406, |
|
"grad_norm": 2.983452098073201, |
|
"learning_rate": 3.755539293302742e-07, |
|
"logits/chosen": -2.614259958267212, |
|
"logits/rejected": -2.6951889991760254, |
|
"logps/chosen": -352.3388366699219, |
|
"logps/rejected": -375.2248840332031, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1011898517608643, |
|
"rewards/margins": 0.613980233669281, |
|
"rewards/rejected": -1.715169906616211, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7088009450679268, |
|
"grad_norm": 2.6652814589714997, |
|
"learning_rate": 3.738896866378282e-07, |
|
"logits/chosen": -2.7506563663482666, |
|
"logits/rejected": -2.690138339996338, |
|
"logps/chosen": -322.7042236328125, |
|
"logps/rejected": -373.8511047363281, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9171172380447388, |
|
"rewards/margins": 0.8513570427894592, |
|
"rewards/rejected": -1.7684742212295532, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.713526284701713, |
|
"grad_norm": 2.5934419246508096, |
|
"learning_rate": 3.722181344709969e-07, |
|
"logits/chosen": -2.7295525074005127, |
|
"logits/rejected": -2.855721950531006, |
|
"logps/chosen": -332.5438232421875, |
|
"logps/rejected": -359.502197265625, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9578840136528015, |
|
"rewards/margins": 0.7636557221412659, |
|
"rewards/rejected": -1.7215397357940674, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7182516243354992, |
|
"grad_norm": 3.1206430500674975, |
|
"learning_rate": 3.705393714511268e-07, |
|
"logits/chosen": -2.845468282699585, |
|
"logits/rejected": -2.7837162017822266, |
|
"logps/chosen": -314.1913757324219, |
|
"logps/rejected": -419.7186584472656, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.9525049924850464, |
|
"rewards/margins": 0.6129012703895569, |
|
"rewards/rejected": -1.5654062032699585, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7229769639692853, |
|
"grad_norm": 3.3791075672510336, |
|
"learning_rate": 3.688534966250042e-07, |
|
"logits/chosen": -3.007288932800293, |
|
"logits/rejected": -2.9382283687591553, |
|
"logps/chosen": -297.81622314453125, |
|
"logps/rejected": -355.0559387207031, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8055727481842041, |
|
"rewards/margins": 0.7948654890060425, |
|
"rewards/rejected": -1.6004382371902466, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7277023036030714, |
|
"grad_norm": 2.7374735457469384, |
|
"learning_rate": 3.671606094590108e-07, |
|
"logits/chosen": -2.7088348865509033, |
|
"logits/rejected": -2.7453291416168213, |
|
"logps/chosen": -337.4541931152344, |
|
"logps/rejected": -411.91485595703125, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0184537172317505, |
|
"rewards/margins": 0.8295416235923767, |
|
"rewards/rejected": -1.8479952812194824, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7324276432368576, |
|
"grad_norm": 2.977388710936637, |
|
"learning_rate": 3.6546080983325523e-07, |
|
"logits/chosen": -2.824364185333252, |
|
"logits/rejected": -2.911698579788208, |
|
"logps/chosen": -308.72161865234375, |
|
"logps/rejected": -312.67431640625, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.054483413696289, |
|
"rewards/margins": 0.345467209815979, |
|
"rewards/rejected": -1.399950623512268, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7371529828706438, |
|
"grad_norm": 3.297987493267062, |
|
"learning_rate": 3.6375419803568046e-07, |
|
"logits/chosen": -2.938750743865967, |
|
"logits/rejected": -3.12616229057312, |
|
"logps/chosen": -380.7288513183594, |
|
"logps/rejected": -393.94683837890625, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.0641953945159912, |
|
"rewards/margins": 0.899326741695404, |
|
"rewards/rejected": -1.96352219581604, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.74187832250443, |
|
"grad_norm": 2.5345279316698983, |
|
"learning_rate": 3.6204087475614676e-07, |
|
"logits/chosen": -2.923267364501953, |
|
"logits/rejected": -2.8579440116882324, |
|
"logps/chosen": -302.9458923339844, |
|
"logps/rejected": -387.598388671875, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.0088130235671997, |
|
"rewards/margins": 0.8763782382011414, |
|
"rewards/rejected": -1.8851913213729858, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7466036621382162, |
|
"grad_norm": 3.13993186485945, |
|
"learning_rate": 3.603209410804906e-07, |
|
"logits/chosen": -2.7970800399780273, |
|
"logits/rejected": -2.77022123336792, |
|
"logps/chosen": -260.28485107421875, |
|
"logps/rejected": -377.06585693359375, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9031432867050171, |
|
"rewards/margins": 0.9541431665420532, |
|
"rewards/rejected": -1.8572864532470703, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7513290017720023, |
|
"grad_norm": 2.777966751571199, |
|
"learning_rate": 3.5859449848456123e-07, |
|
"logits/chosen": -2.83420991897583, |
|
"logits/rejected": -2.9197449684143066, |
|
"logps/chosen": -270.993896484375, |
|
"logps/rejected": -329.7164001464844, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.9526958465576172, |
|
"rewards/margins": 0.5992559194564819, |
|
"rewards/rejected": -1.5519516468048096, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7560543414057885, |
|
"grad_norm": 2.816810414145774, |
|
"learning_rate": 3.5686164882823313e-07, |
|
"logits/chosen": -2.4739251136779785, |
|
"logits/rejected": -2.5660862922668457, |
|
"logps/chosen": -325.5205383300781, |
|
"logps/rejected": -357.9018859863281, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.059773564338684, |
|
"rewards/margins": 0.855665922164917, |
|
"rewards/rejected": -1.9154393672943115, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7607796810395747, |
|
"grad_norm": 2.7264976697102217, |
|
"learning_rate": 3.5512249434939634e-07, |
|
"logits/chosen": -3.020364284515381, |
|
"logits/rejected": -3.1138038635253906, |
|
"logps/chosen": -292.44451904296875, |
|
"logps/rejected": -370.27581787109375, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0052980184555054, |
|
"rewards/margins": 0.916731059551239, |
|
"rewards/rejected": -1.9220290184020996, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7655050206733609, |
|
"grad_norm": 4.143376503088087, |
|
"learning_rate": 3.533771376579249e-07, |
|
"logits/chosen": -2.8414347171783447, |
|
"logits/rejected": -2.8189237117767334, |
|
"logps/chosen": -298.5760192871094, |
|
"logps/rejected": -407.37457275390625, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9188227653503418, |
|
"rewards/margins": 1.0365185737609863, |
|
"rewards/rejected": -1.9553413391113281, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7702303603071471, |
|
"grad_norm": 2.851627509613439, |
|
"learning_rate": 3.5162568172962215e-07, |
|
"logits/chosen": -2.737412214279175, |
|
"logits/rejected": -2.8929431438446045, |
|
"logps/chosen": -325.90631103515625, |
|
"logps/rejected": -372.39410400390625, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.134871244430542, |
|
"rewards/margins": 0.6576811075210571, |
|
"rewards/rejected": -1.7925523519515991, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7749556999409333, |
|
"grad_norm": 3.4081328480353985, |
|
"learning_rate": 3.498682299001459e-07, |
|
"logits/chosen": -2.622042179107666, |
|
"logits/rejected": -2.759326696395874, |
|
"logps/chosen": -354.53448486328125, |
|
"logps/rejected": -397.6759338378906, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.1062355041503906, |
|
"rewards/margins": 0.7745749354362488, |
|
"rewards/rejected": -1.8808104991912842, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7796810395747195, |
|
"grad_norm": 5.113522770893731, |
|
"learning_rate": 3.4810488585891103e-07, |
|
"logits/chosen": -2.7235350608825684, |
|
"logits/rejected": -2.8394298553466797, |
|
"logps/chosen": -339.3172912597656, |
|
"logps/rejected": -403.5739440917969, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0079370737075806, |
|
"rewards/margins": 0.9361110925674438, |
|
"rewards/rejected": -1.9440481662750244, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7844063792085056, |
|
"grad_norm": 2.845383053447366, |
|
"learning_rate": 3.4633575364297224e-07, |
|
"logits/chosen": -3.0904507637023926, |
|
"logits/rejected": -3.1447291374206543, |
|
"logps/chosen": -308.4082336425781, |
|
"logps/rejected": -403.6357421875, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1904851198196411, |
|
"rewards/margins": 1.1804500818252563, |
|
"rewards/rejected": -2.3709352016448975, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7891317188422918, |
|
"grad_norm": 3.6331311614486483, |
|
"learning_rate": 3.445609376308857e-07, |
|
"logits/chosen": -2.7289986610412598, |
|
"logits/rejected": -2.64235520362854, |
|
"logps/chosen": -351.40728759765625, |
|
"logps/rejected": -435.42010498046875, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.273624300956726, |
|
"rewards/margins": 1.0576740503311157, |
|
"rewards/rejected": -2.331298351287842, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.793857058476078, |
|
"grad_norm": 3.6152830110317655, |
|
"learning_rate": 3.4278054253655086e-07, |
|
"logits/chosen": -2.8024775981903076, |
|
"logits/rejected": -2.853891611099243, |
|
"logps/chosen": -322.92181396484375, |
|
"logps/rejected": -396.2481689453125, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.0655136108398438, |
|
"rewards/margins": 0.8890769481658936, |
|
"rewards/rejected": -1.9545905590057373, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7985823981098642, |
|
"grad_norm": 3.119357172634371, |
|
"learning_rate": 3.4099467340303214e-07, |
|
"logits/chosen": -3.0272624492645264, |
|
"logits/rejected": -3.140334129333496, |
|
"logps/chosen": -307.60479736328125, |
|
"logps/rejected": -376.73590087890625, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1390501260757446, |
|
"rewards/margins": 0.9899504780769348, |
|
"rewards/rejected": -2.129000425338745, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8033077377436503, |
|
"grad_norm": 3.457360676304788, |
|
"learning_rate": 3.392034355963614e-07, |
|
"logits/chosen": -2.8242180347442627, |
|
"logits/rejected": -2.8203928470611572, |
|
"logps/chosen": -338.4170837402344, |
|
"logps/rejected": -375.84893798828125, |
|
"loss": 0.5257, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.182822346687317, |
|
"rewards/margins": 0.7907478213310242, |
|
"rewards/rejected": -1.9735702276229858, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8080330773774365, |
|
"grad_norm": 5.011470232701305, |
|
"learning_rate": 3.374069347993218e-07, |
|
"logits/chosen": -2.6921019554138184, |
|
"logits/rejected": -2.7788760662078857, |
|
"logps/chosen": -329.470703125, |
|
"logps/rejected": -467.9057922363281, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1266531944274902, |
|
"rewards/margins": 0.8941323757171631, |
|
"rewards/rejected": -2.0207855701446533, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8127584170112226, |
|
"grad_norm": 3.7253097168704294, |
|
"learning_rate": 3.356052770052119e-07, |
|
"logits/chosen": -2.716782331466675, |
|
"logits/rejected": -2.6556971073150635, |
|
"logps/chosen": -309.70672607421875, |
|
"logps/rejected": -426.3639221191406, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2285258769989014, |
|
"rewards/margins": 0.8051817417144775, |
|
"rewards/rejected": -2.033707618713379, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8127584170112226, |
|
"eval_logits/chosen": -2.9802942276000977, |
|
"eval_logits/rejected": -2.9883527755737305, |
|
"eval_logps/chosen": -339.8587646484375, |
|
"eval_logps/rejected": -423.5323791503906, |
|
"eval_loss": 0.51226407289505, |
|
"eval_rewards/accuracies": 0.6287878751754761, |
|
"eval_rewards/chosen": -1.2862800359725952, |
|
"eval_rewards/margins": 0.9495444893836975, |
|
"eval_rewards/rejected": -2.2358245849609375, |
|
"eval_runtime": 225.4382, |
|
"eval_samples_per_second": 16.217, |
|
"eval_steps_per_second": 0.293, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8174837566450088, |
|
"grad_norm": 4.039346342004204, |
|
"learning_rate": 3.337985685115926e-07, |
|
"logits/chosen": -2.9982471466064453, |
|
"logits/rejected": -2.934654712677002, |
|
"logps/chosen": -345.1435546875, |
|
"logps/rejected": -418.8468017578125, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.2590099573135376, |
|
"rewards/margins": 0.7078821659088135, |
|
"rewards/rejected": -1.9668920040130615, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.822209096278795, |
|
"grad_norm": 3.355839857834348, |
|
"learning_rate": 3.319869159140152e-07, |
|
"logits/chosen": -2.412257432937622, |
|
"logits/rejected": -2.5528626441955566, |
|
"logps/chosen": -313.64422607421875, |
|
"logps/rejected": -387.2737121582031, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2666703462600708, |
|
"rewards/margins": 0.9918266534805298, |
|
"rewards/rejected": -2.2584969997406006, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8269344359125812, |
|
"grad_norm": 3.8883187343016528, |
|
"learning_rate": 3.301704260997325e-07, |
|
"logits/chosen": -2.835768222808838, |
|
"logits/rejected": -2.861588716506958, |
|
"logps/chosen": -310.4635314941406, |
|
"logps/rejected": -418.094970703125, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.149375557899475, |
|
"rewards/margins": 1.2631374597549438, |
|
"rewards/rejected": -2.41251277923584, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8316597755463674, |
|
"grad_norm": 3.943606898054145, |
|
"learning_rate": 3.283492062413925e-07, |
|
"logits/chosen": -2.8773105144500732, |
|
"logits/rejected": -2.919630527496338, |
|
"logps/chosen": -315.5, |
|
"logps/rejected": -414.37908935546875, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2905051708221436, |
|
"rewards/margins": 1.0659555196762085, |
|
"rewards/rejected": -2.3564605712890625, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8363851151801536, |
|
"grad_norm": 4.394795298842495, |
|
"learning_rate": 3.2652336379071506e-07, |
|
"logits/chosen": -2.7635695934295654, |
|
"logits/rejected": -2.8052563667297363, |
|
"logps/chosen": -371.2409973144531, |
|
"logps/rejected": -429.0415954589844, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2244281768798828, |
|
"rewards/margins": 1.045049786567688, |
|
"rewards/rejected": -2.2694778442382812, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8411104548139398, |
|
"grad_norm": 3.6672724994810593, |
|
"learning_rate": 3.246930064721523e-07, |
|
"logits/chosen": -2.7490479946136475, |
|
"logits/rejected": -2.6954996585845947, |
|
"logps/chosen": -265.0568542480469, |
|
"logps/rejected": -373.4683532714844, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.1422216892242432, |
|
"rewards/margins": 1.010473370552063, |
|
"rewards/rejected": -2.1526949405670166, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8458357944477259, |
|
"grad_norm": 3.438810171868027, |
|
"learning_rate": 3.228582422765331e-07, |
|
"logits/chosen": -2.661006212234497, |
|
"logits/rejected": -2.756809711456299, |
|
"logps/chosen": -363.7588806152344, |
|
"logps/rejected": -392.8025207519531, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.341508388519287, |
|
"rewards/margins": 0.7005224227905273, |
|
"rewards/rejected": -2.0420308113098145, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8505611340815121, |
|
"grad_norm": 4.113182426182398, |
|
"learning_rate": 3.2101917945469135e-07, |
|
"logits/chosen": -2.700942277908325, |
|
"logits/rejected": -2.82185435295105, |
|
"logps/chosen": -364.240234375, |
|
"logps/rejected": -429.81292724609375, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1254488229751587, |
|
"rewards/margins": 0.9122301340103149, |
|
"rewards/rejected": -2.0376789569854736, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8552864737152983, |
|
"grad_norm": 4.022329679746118, |
|
"learning_rate": 3.1917592651107927e-07, |
|
"logits/chosen": -2.8973255157470703, |
|
"logits/rejected": -2.794524908065796, |
|
"logps/chosen": -352.9599914550781, |
|
"logps/rejected": -422.71197509765625, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.2179690599441528, |
|
"rewards/margins": 0.7948740124702454, |
|
"rewards/rejected": -2.012843132019043, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8600118133490845, |
|
"grad_norm": 3.7009474178127353, |
|
"learning_rate": 3.173285921973657e-07, |
|
"logits/chosen": -2.793835401535034, |
|
"logits/rejected": -2.7703464031219482, |
|
"logps/chosen": -320.6808776855469, |
|
"logps/rejected": -454.6962890625, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.1430678367614746, |
|
"rewards/margins": 1.6093158721923828, |
|
"rewards/rejected": -2.7523837089538574, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8647371529828707, |
|
"grad_norm": 3.2328530891675182, |
|
"learning_rate": 3.1547728550601983e-07, |
|
"logits/chosen": -2.6808881759643555, |
|
"logits/rejected": -2.676565170288086, |
|
"logps/chosen": -301.3103332519531, |
|
"logps/rejected": -402.32281494140625, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1162338256835938, |
|
"rewards/margins": 1.0167958736419678, |
|
"rewards/rejected": -2.1330299377441406, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8694624926166569, |
|
"grad_norm": 3.899851002215307, |
|
"learning_rate": 3.1362211566388057e-07, |
|
"logits/chosen": -2.8450677394866943, |
|
"logits/rejected": -2.836167097091675, |
|
"logps/chosen": -341.335693359375, |
|
"logps/rejected": -407.4953918457031, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.2810035943984985, |
|
"rewards/margins": 0.6536043882369995, |
|
"rewards/rejected": -1.934607982635498, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.874187832250443, |
|
"grad_norm": 3.2712338921521518, |
|
"learning_rate": 3.1176319212571204e-07, |
|
"logits/chosen": -2.405541181564331, |
|
"logits/rejected": -2.4331328868865967, |
|
"logps/chosen": -288.65325927734375, |
|
"logps/rejected": -364.7303466796875, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2161575555801392, |
|
"rewards/margins": 0.9309273362159729, |
|
"rewards/rejected": -2.147084951400757, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8789131718842291, |
|
"grad_norm": 3.8818256203788852, |
|
"learning_rate": 3.099006245677461e-07, |
|
"logits/chosen": -2.4217336177825928, |
|
"logits/rejected": -2.360973358154297, |
|
"logps/chosen": -381.958740234375, |
|
"logps/rejected": -487.387451171875, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2569398880004883, |
|
"rewards/margins": 0.7359199523925781, |
|
"rewards/rejected": -1.9928598403930664, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8836385115180153, |
|
"grad_norm": 3.241850575925625, |
|
"learning_rate": 3.0803452288121113e-07, |
|
"logits/chosen": -2.6186816692352295, |
|
"logits/rejected": -2.5035552978515625, |
|
"logps/chosen": -356.3555603027344, |
|
"logps/rejected": -538.8609619140625, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2594354152679443, |
|
"rewards/margins": 1.667135238647461, |
|
"rewards/rejected": -2.9265708923339844, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8883638511518015, |
|
"grad_norm": 2.994070703824197, |
|
"learning_rate": 3.0616499716584874e-07, |
|
"logits/chosen": -2.936795473098755, |
|
"logits/rejected": -2.855689525604248, |
|
"logps/chosen": -339.5811767578125, |
|
"logps/rejected": -430.01483154296875, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1258800029754639, |
|
"rewards/margins": 0.6621576547622681, |
|
"rewards/rejected": -1.7880375385284424, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8930891907855877, |
|
"grad_norm": 3.4626212317934146, |
|
"learning_rate": 3.042921577234177e-07, |
|
"logits/chosen": -2.6947526931762695, |
|
"logits/rejected": -2.8166098594665527, |
|
"logps/chosen": -328.1815185546875, |
|
"logps/rejected": -340.3358154296875, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0728776454925537, |
|
"rewards/margins": 0.6414874792098999, |
|
"rewards/rejected": -1.7143651247024536, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8978145304193739, |
|
"grad_norm": 3.1647602539221182, |
|
"learning_rate": 3.024161150511861e-07, |
|
"logits/chosen": -2.9085636138916016, |
|
"logits/rejected": -3.0228068828582764, |
|
"logps/chosen": -323.0364074707031, |
|
"logps/rejected": -358.6117248535156, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.1753543615341187, |
|
"rewards/margins": 0.9408324956893921, |
|
"rewards/rejected": -2.1161868572235107, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.90253987005316, |
|
"grad_norm": 3.025256770279786, |
|
"learning_rate": 3.0053697983541247e-07, |
|
"logits/chosen": -2.545339345932007, |
|
"logits/rejected": -2.5873234272003174, |
|
"logps/chosen": -364.4934387207031, |
|
"logps/rejected": -404.4205322265625, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3082777261734009, |
|
"rewards/margins": 0.9447382688522339, |
|
"rewards/rejected": -2.2530159950256348, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.9072652096869462, |
|
"grad_norm": 3.2107060614205682, |
|
"learning_rate": 2.986548629448146e-07, |
|
"logits/chosen": -2.5320749282836914, |
|
"logits/rejected": -2.6858551502227783, |
|
"logps/chosen": -357.6763916015625, |
|
"logps/rejected": -418.71173095703125, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.0752955675125122, |
|
"rewards/margins": 1.176898717880249, |
|
"rewards/rejected": -2.252194404602051, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.9119905493207324, |
|
"grad_norm": 4.290813644018959, |
|
"learning_rate": 2.967698754240289e-07, |
|
"logits/chosen": -2.6695892810821533, |
|
"logits/rejected": -2.660761833190918, |
|
"logps/chosen": -347.8155517578125, |
|
"logps/rejected": -403.8181457519531, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.207077145576477, |
|
"rewards/margins": 0.7051151394844055, |
|
"rewards/rejected": -1.9121922254562378, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.9167158889545186, |
|
"grad_norm": 3.656365041410898, |
|
"learning_rate": 2.948821284870585e-07, |
|
"logits/chosen": -3.009221315383911, |
|
"logits/rejected": -2.9382755756378174, |
|
"logps/chosen": -338.1603698730469, |
|
"logps/rejected": -425.35931396484375, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3645328283309937, |
|
"rewards/margins": 0.9410390257835388, |
|
"rewards/rejected": -2.305572032928467, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9214412285883048, |
|
"grad_norm": 3.166267065915439, |
|
"learning_rate": 2.9299173351071176e-07, |
|
"logits/chosen": -2.544590950012207, |
|
"logits/rejected": -2.5580813884735107, |
|
"logps/chosen": -375.34112548828125, |
|
"logps/rejected": -437.5019836425781, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1270973682403564, |
|
"rewards/margins": 0.8198693990707397, |
|
"rewards/rejected": -1.9469666481018066, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.926166568222091, |
|
"grad_norm": 2.9416082082853805, |
|
"learning_rate": 2.9109880202803097e-07, |
|
"logits/chosen": -2.5898144245147705, |
|
"logits/rejected": -2.590017795562744, |
|
"logps/chosen": -325.3600769042969, |
|
"logps/rejected": -417.5689392089844, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1922552585601807, |
|
"rewards/margins": 1.2383495569229126, |
|
"rewards/rejected": -2.4306044578552246, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9308919078558772, |
|
"grad_norm": 4.344454107439186, |
|
"learning_rate": 2.892034457217119e-07, |
|
"logits/chosen": -2.793138027191162, |
|
"logits/rejected": -2.6765213012695312, |
|
"logps/chosen": -393.07080078125, |
|
"logps/rejected": -508.9129638671875, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.202684998512268, |
|
"rewards/margins": 1.2748346328735352, |
|
"rewards/rejected": -2.4775197505950928, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9356172474896634, |
|
"grad_norm": 4.341995602773921, |
|
"learning_rate": 2.8730577641751474e-07, |
|
"logits/chosen": -2.5986645221710205, |
|
"logits/rejected": -2.7119102478027344, |
|
"logps/chosen": -301.242431640625, |
|
"logps/rejected": -423.28619384765625, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1418884992599487, |
|
"rewards/margins": 1.2161611318588257, |
|
"rewards/rejected": -2.3580498695373535, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9403425871234495, |
|
"grad_norm": 3.5092494052777536, |
|
"learning_rate": 2.854059060776659e-07, |
|
"logits/chosen": -2.4758505821228027, |
|
"logits/rejected": -2.5398991107940674, |
|
"logps/chosen": -311.9541015625, |
|
"logps/rejected": -413.2162780761719, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.084080457687378, |
|
"rewards/margins": 1.512494683265686, |
|
"rewards/rejected": -2.5965750217437744, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.9450679267572357, |
|
"grad_norm": 3.5187855585130294, |
|
"learning_rate": 2.835039467942529e-07, |
|
"logits/chosen": -2.5920052528381348, |
|
"logits/rejected": -2.7105183601379395, |
|
"logps/chosen": -340.66351318359375, |
|
"logps/rejected": -416.5780944824219, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0971770286560059, |
|
"rewards/margins": 0.9351829290390015, |
|
"rewards/rejected": -2.032360315322876, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9497932663910219, |
|
"grad_norm": 3.793717608121411, |
|
"learning_rate": 2.8160001078261055e-07, |
|
"logits/chosen": -2.498663902282715, |
|
"logits/rejected": -2.4728269577026367, |
|
"logps/chosen": -301.9998474121094, |
|
"logps/rejected": -426.3241271972656, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.031512975692749, |
|
"rewards/margins": 1.083221197128296, |
|
"rewards/rejected": -2.114734172821045, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.954518606024808, |
|
"grad_norm": 2.9737105768220244, |
|
"learning_rate": 2.7969421037470033e-07, |
|
"logits/chosen": -2.7715539932250977, |
|
"logits/rejected": -2.7849650382995605, |
|
"logps/chosen": -345.68963623046875, |
|
"logps/rejected": -444.9283752441406, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.262139081954956, |
|
"rewards/margins": 0.9420502185821533, |
|
"rewards/rejected": -2.2041893005371094, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9592439456585942, |
|
"grad_norm": 3.4484596414693893, |
|
"learning_rate": 2.777866580124829e-07, |
|
"logits/chosen": -2.491079330444336, |
|
"logits/rejected": -2.7043981552124023, |
|
"logps/chosen": -395.48211669921875, |
|
"logps/rejected": -463.1803894042969, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2879524230957031, |
|
"rewards/margins": 0.926118791103363, |
|
"rewards/rejected": -2.214071273803711, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9639692852923804, |
|
"grad_norm": 3.553922161828863, |
|
"learning_rate": 2.758774662412838e-07, |
|
"logits/chosen": -2.5517563819885254, |
|
"logits/rejected": -2.4807627201080322, |
|
"logps/chosen": -305.34637451171875, |
|
"logps/rejected": -486.2059631347656, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.346076250076294, |
|
"rewards/margins": 1.699803352355957, |
|
"rewards/rejected": -3.04587984085083, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9686946249261665, |
|
"grad_norm": 3.191519565656703, |
|
"learning_rate": 2.739667477031538e-07, |
|
"logits/chosen": -2.7191619873046875, |
|
"logits/rejected": -2.7111451625823975, |
|
"logps/chosen": -344.4493408203125, |
|
"logps/rejected": -480.70135498046875, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2324879169464111, |
|
"rewards/margins": 1.2888904809951782, |
|
"rewards/rejected": -2.521378517150879, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.9734199645599527, |
|
"grad_norm": 4.672920315021906, |
|
"learning_rate": 2.7205461513022233e-07, |
|
"logits/chosen": -2.0949220657348633, |
|
"logits/rejected": -2.1367533206939697, |
|
"logps/chosen": -390.1219482421875, |
|
"logps/rejected": -415.38037109375, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.0024380683898926, |
|
"rewards/margins": 0.9189928770065308, |
|
"rewards/rejected": -1.9214308261871338, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.9781453041937389, |
|
"grad_norm": 3.5306370227740262, |
|
"learning_rate": 2.70141181338047e-07, |
|
"logits/chosen": -2.406205177307129, |
|
"logits/rejected": -2.38879132270813, |
|
"logps/chosen": -356.2658386230469, |
|
"logps/rejected": -456.2667236328125, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.220351219177246, |
|
"rewards/margins": 1.0728812217712402, |
|
"rewards/rejected": -2.2932324409484863, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.9828706438275251, |
|
"grad_norm": 3.479933152007338, |
|
"learning_rate": 2.6822655921895693e-07, |
|
"logits/chosen": -2.446387767791748, |
|
"logits/rejected": -2.346810817718506, |
|
"logps/chosen": -312.9122619628906, |
|
"logps/rejected": -459.10040283203125, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -1.142736792564392, |
|
"rewards/margins": 1.2611982822418213, |
|
"rewards/rejected": -2.403934955596924, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9875959834613113, |
|
"grad_norm": 3.8539520740205018, |
|
"learning_rate": 2.663108617353926e-07, |
|
"logits/chosen": -2.7008585929870605, |
|
"logits/rejected": -2.7585320472717285, |
|
"logps/chosen": -410.00634765625, |
|
"logps/rejected": -455.63812255859375, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.2332756519317627, |
|
"rewards/margins": 0.693227231502533, |
|
"rewards/rejected": -1.9265029430389404, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9923213230950975, |
|
"grad_norm": 3.845322423212567, |
|
"learning_rate": 2.6439420191324064e-07, |
|
"logits/chosen": -2.462545871734619, |
|
"logits/rejected": -2.5116636753082275, |
|
"logps/chosen": -305.0135803222656, |
|
"logps/rejected": -405.6048583984375, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.231694221496582, |
|
"rewards/margins": 0.9429072141647339, |
|
"rewards/rejected": -2.1746013164520264, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9970466627288836, |
|
"grad_norm": 5.6022942181300905, |
|
"learning_rate": 2.6247669283516556e-07, |
|
"logits/chosen": -2.544132947921753, |
|
"logits/rejected": -2.5758156776428223, |
|
"logps/chosen": -366.85009765625, |
|
"logps/rejected": -442.0340881347656, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.4171723127365112, |
|
"rewards/margins": 1.0115364789962769, |
|
"rewards/rejected": -2.428708791732788, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.0017720023626697, |
|
"grad_norm": 3.186023679965871, |
|
"learning_rate": 2.60558447633938e-07, |
|
"logits/chosen": -2.244544506072998, |
|
"logits/rejected": -2.3220551013946533, |
|
"logps/chosen": -366.2366638183594, |
|
"logps/rejected": -452.18707275390625, |
|
"loss": 0.5058, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4039456844329834, |
|
"rewards/margins": 1.031445860862732, |
|
"rewards/rejected": -2.435391426086426, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.006497341996456, |
|
"grad_norm": 3.6450488482030523, |
|
"learning_rate": 2.5863957948575963e-07, |
|
"logits/chosen": -2.3453848361968994, |
|
"logits/rejected": -2.468796968460083, |
|
"logps/chosen": -326.9305114746094, |
|
"logps/rejected": -368.9020690917969, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1048694849014282, |
|
"rewards/margins": 0.7470109462738037, |
|
"rewards/rejected": -1.8518803119659424, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.011222681630242, |
|
"grad_norm": 4.137227043648897, |
|
"learning_rate": 2.567202016035859e-07, |
|
"logits/chosen": -2.5369303226470947, |
|
"logits/rejected": -2.535897731781006, |
|
"logps/chosen": -310.4713439941406, |
|
"logps/rejected": -399.39398193359375, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2701810598373413, |
|
"rewards/margins": 0.7798756957054138, |
|
"rewards/rejected": -2.0500564575195312, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0159480212640284, |
|
"grad_norm": 4.02450997349156, |
|
"learning_rate": 2.5480042723044653e-07, |
|
"logits/chosen": -2.567810535430908, |
|
"logits/rejected": -2.575981378555298, |
|
"logps/chosen": -350.37957763671875, |
|
"logps/rejected": -428.45745849609375, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.4323270320892334, |
|
"rewards/margins": 1.0882251262664795, |
|
"rewards/rejected": -2.520552396774292, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0159480212640284, |
|
"eval_logits/chosen": -2.584289789199829, |
|
"eval_logits/rejected": -2.5910158157348633, |
|
"eval_logps/chosen": -361.1751708984375, |
|
"eval_logps/rejected": -463.7195129394531, |
|
"eval_loss": 0.49447911977767944, |
|
"eval_rewards/accuracies": 0.6439393758773804, |
|
"eval_rewards/chosen": -1.499444603919983, |
|
"eval_rewards/margins": 1.138251781463623, |
|
"eval_rewards/rejected": -2.6376962661743164, |
|
"eval_runtime": 224.7997, |
|
"eval_samples_per_second": 16.263, |
|
"eval_steps_per_second": 0.294, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0206733608978145, |
|
"grad_norm": 4.359766282431931, |
|
"learning_rate": 2.5288036963276414e-07, |
|
"logits/chosen": -2.4916322231292725, |
|
"logits/rejected": -2.482870101928711, |
|
"logps/chosen": -297.8340759277344, |
|
"logps/rejected": -387.14703369140625, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1933519840240479, |
|
"rewards/margins": 1.0262653827667236, |
|
"rewards/rejected": -2.2196173667907715, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.0253987005316008, |
|
"grad_norm": 3.593941578167617, |
|
"learning_rate": 2.509601420936717e-07, |
|
"logits/chosen": -2.5306057929992676, |
|
"logits/rejected": -2.440415382385254, |
|
"logps/chosen": -341.2530212402344, |
|
"logps/rejected": -464.18487548828125, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4438025951385498, |
|
"rewards/margins": 1.340946078300476, |
|
"rewards/rejected": -2.7847485542297363, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.0301240401653868, |
|
"grad_norm": 3.664745354346814, |
|
"learning_rate": 2.490398579063283e-07, |
|
"logits/chosen": -2.6102001667022705, |
|
"logits/rejected": -2.529940128326416, |
|
"logps/chosen": -345.69677734375, |
|
"logps/rejected": -484.93035888671875, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5673726797103882, |
|
"rewards/margins": 1.314273476600647, |
|
"rewards/rejected": -2.8816463947296143, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.0348493797991731, |
|
"grad_norm": 5.084728530421535, |
|
"learning_rate": 2.4711963036723583e-07, |
|
"logits/chosen": -2.3622119426727295, |
|
"logits/rejected": -2.365530490875244, |
|
"logps/chosen": -367.52984619140625, |
|
"logps/rejected": -406.59375, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4226937294006348, |
|
"rewards/margins": 0.49759745597839355, |
|
"rewards/rejected": -1.9202911853790283, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.0395747194329592, |
|
"grad_norm": 3.6814125755312093, |
|
"learning_rate": 2.451995727695535e-07, |
|
"logits/chosen": -2.4771206378936768, |
|
"logits/rejected": -2.5038909912109375, |
|
"logps/chosen": -365.91815185546875, |
|
"logps/rejected": -477.00347900390625, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5666453838348389, |
|
"rewards/margins": 1.2746491432189941, |
|
"rewards/rejected": -2.841294527053833, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0443000590667455, |
|
"grad_norm": 4.348311431123137, |
|
"learning_rate": 2.432797983964141e-07, |
|
"logits/chosen": -2.514589786529541, |
|
"logits/rejected": -2.460106134414673, |
|
"logps/chosen": -358.013427734375, |
|
"logps/rejected": -444.3507080078125, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.4816346168518066, |
|
"rewards/margins": 0.9881500601768494, |
|
"rewards/rejected": -2.469784736633301, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.0490253987005316, |
|
"grad_norm": 4.474335447495181, |
|
"learning_rate": 2.413604205142404e-07, |
|
"logits/chosen": -2.2786381244659424, |
|
"logits/rejected": -2.371419668197632, |
|
"logps/chosen": -377.5779113769531, |
|
"logps/rejected": -431.80316162109375, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8127973079681396, |
|
"rewards/margins": 0.9888743162155151, |
|
"rewards/rejected": -2.8016717433929443, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.0537507383343179, |
|
"grad_norm": 3.9171412475826557, |
|
"learning_rate": 2.3944155236606196e-07, |
|
"logits/chosen": -2.4430832862854004, |
|
"logits/rejected": -2.409916639328003, |
|
"logps/chosen": -344.40399169921875, |
|
"logps/rejected": -461.02349853515625, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4063448905944824, |
|
"rewards/margins": 1.1203367710113525, |
|
"rewards/rejected": -2.526681661605835, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.058476077968104, |
|
"grad_norm": 5.279702970403167, |
|
"learning_rate": 2.3752330716483444e-07, |
|
"logits/chosen": -2.7835280895233154, |
|
"logits/rejected": -2.7787797451019287, |
|
"logps/chosen": -353.5722351074219, |
|
"logps/rejected": -415.2908935546875, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6296898126602173, |
|
"rewards/margins": 0.9214704632759094, |
|
"rewards/rejected": -2.5511598587036133, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.0632014176018902, |
|
"grad_norm": 3.8586230520693157, |
|
"learning_rate": 2.356057980867594e-07, |
|
"logits/chosen": -2.547018527984619, |
|
"logits/rejected": -2.5479323863983154, |
|
"logps/chosen": -411.9010009765625, |
|
"logps/rejected": -543.1708984375, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.6013555526733398, |
|
"rewards/margins": 1.6427891254425049, |
|
"rewards/rejected": -3.244144916534424, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0679267572356763, |
|
"grad_norm": 3.836310963297125, |
|
"learning_rate": 2.3368913826460742e-07, |
|
"logits/chosen": -2.622213840484619, |
|
"logits/rejected": -2.699857473373413, |
|
"logps/chosen": -383.7267150878906, |
|
"logps/rejected": -406.25152587890625, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.4025287628173828, |
|
"rewards/margins": 0.56195068359375, |
|
"rewards/rejected": -1.9644795656204224, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.0726520968694624, |
|
"grad_norm": 4.999193459222086, |
|
"learning_rate": 2.3177344078104305e-07, |
|
"logits/chosen": -2.6717772483825684, |
|
"logits/rejected": -2.672889232635498, |
|
"logps/chosen": -346.08544921875, |
|
"logps/rejected": -417.90545654296875, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.6554023027420044, |
|
"rewards/margins": 0.8562977313995361, |
|
"rewards/rejected": -2.51170015335083, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.0773774365032487, |
|
"grad_norm": 4.6410271396992595, |
|
"learning_rate": 2.2985881866195304e-07, |
|
"logits/chosen": -2.646639347076416, |
|
"logits/rejected": -2.7165169715881348, |
|
"logps/chosen": -355.2235107421875, |
|
"logps/rejected": -395.57550048828125, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.3660098314285278, |
|
"rewards/margins": 0.7286862134933472, |
|
"rewards/rejected": -2.094696044921875, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.0821027761370348, |
|
"grad_norm": 3.9004711748243412, |
|
"learning_rate": 2.2794538486977765e-07, |
|
"logits/chosen": -2.573826313018799, |
|
"logits/rejected": -2.6808300018310547, |
|
"logps/chosen": -355.0180969238281, |
|
"logps/rejected": -473.90252685546875, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4073047637939453, |
|
"rewards/margins": 1.1750006675720215, |
|
"rewards/rejected": -2.582305431365967, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.086828115770821, |
|
"grad_norm": 3.6316010874552704, |
|
"learning_rate": 2.2603325229684628e-07, |
|
"logits/chosen": -2.4409735202789307, |
|
"logits/rejected": -2.4068901538848877, |
|
"logps/chosen": -306.1089172363281, |
|
"logps/rejected": -447.4771423339844, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.4156140089035034, |
|
"rewards/margins": 1.0425467491149902, |
|
"rewards/rejected": -2.458160877227783, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0915534554046071, |
|
"grad_norm": 4.420363258356965, |
|
"learning_rate": 2.2412253375871618e-07, |
|
"logits/chosen": -2.681562662124634, |
|
"logits/rejected": -2.5477468967437744, |
|
"logps/chosen": -338.50775146484375, |
|
"logps/rejected": -500.01568603515625, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4533276557922363, |
|
"rewards/margins": 1.5830377340316772, |
|
"rewards/rejected": -3.036365270614624, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.0962787950383934, |
|
"grad_norm": 3.3398932376367116, |
|
"learning_rate": 2.2221334198751717e-07, |
|
"logits/chosen": -2.566534996032715, |
|
"logits/rejected": -2.663203716278076, |
|
"logps/chosen": -423.2144470214844, |
|
"logps/rejected": -525.0465087890625, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.552110195159912, |
|
"rewards/margins": 1.6297154426574707, |
|
"rewards/rejected": -3.181825876235962, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.1010041346721795, |
|
"grad_norm": 3.258958216613353, |
|
"learning_rate": 2.2030578962529964e-07, |
|
"logits/chosen": -2.533161163330078, |
|
"logits/rejected": -2.598869800567627, |
|
"logps/chosen": -384.85443115234375, |
|
"logps/rejected": -436.78302001953125, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.1795471906661987, |
|
"rewards/margins": 0.7562814354896545, |
|
"rewards/rejected": -1.935828685760498, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.1057294743059658, |
|
"grad_norm": 4.3977570998277695, |
|
"learning_rate": 2.1839998921738948e-07, |
|
"logits/chosen": -2.682744026184082, |
|
"logits/rejected": -2.8130013942718506, |
|
"logps/chosen": -375.19012451171875, |
|
"logps/rejected": -446.29486083984375, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.338588833808899, |
|
"rewards/margins": 0.9564355611801147, |
|
"rewards/rejected": -2.2950241565704346, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.1104548139397519, |
|
"grad_norm": 3.711007822076339, |
|
"learning_rate": 2.1649605320574715e-07, |
|
"logits/chosen": -2.770697832107544, |
|
"logits/rejected": -2.7958450317382812, |
|
"logps/chosen": -263.7291259765625, |
|
"logps/rejected": -410.407470703125, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2530757188796997, |
|
"rewards/margins": 1.3193039894104004, |
|
"rewards/rejected": -2.5723795890808105, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.1151801535735382, |
|
"grad_norm": 5.205159446883962, |
|
"learning_rate": 2.1459409392233414e-07, |
|
"logits/chosen": -2.509124279022217, |
|
"logits/rejected": -2.4804513454437256, |
|
"logps/chosen": -431.5054931640625, |
|
"logps/rejected": -569.149169921875, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6995575428009033, |
|
"rewards/margins": 1.6153841018676758, |
|
"rewards/rejected": -3.3149421215057373, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.1199054932073242, |
|
"grad_norm": 3.6720237761578276, |
|
"learning_rate": 2.1269422358248534e-07, |
|
"logits/chosen": -2.2790334224700928, |
|
"logits/rejected": -2.5470128059387207, |
|
"logps/chosen": -368.87750244140625, |
|
"logps/rejected": -383.09478759765625, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3113298416137695, |
|
"rewards/margins": 0.9517258405685425, |
|
"rewards/rejected": -2.2630558013916016, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.1246308328411105, |
|
"grad_norm": 4.38203569485804, |
|
"learning_rate": 2.1079655427828804e-07, |
|
"logits/chosen": -2.484546184539795, |
|
"logits/rejected": -2.513195753097534, |
|
"logps/chosen": -329.1628723144531, |
|
"logps/rejected": -448.17327880859375, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3690838813781738, |
|
"rewards/margins": 1.233577847480774, |
|
"rewards/rejected": -2.602661609649658, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.1293561724748966, |
|
"grad_norm": 4.1915678394143665, |
|
"learning_rate": 2.0890119797196904e-07, |
|
"logits/chosen": -2.489327907562256, |
|
"logits/rejected": -2.619502544403076, |
|
"logps/chosen": -367.636474609375, |
|
"logps/rejected": -442.066650390625, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.260221004486084, |
|
"rewards/margins": 1.034293293952942, |
|
"rewards/rejected": -2.2945144176483154, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.1340815121086827, |
|
"grad_norm": 4.912500043289106, |
|
"learning_rate": 2.0700826648928827e-07, |
|
"logits/chosen": -2.445549964904785, |
|
"logits/rejected": -2.4925429821014404, |
|
"logps/chosen": -403.63580322265625, |
|
"logps/rejected": -492.6475830078125, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4245916604995728, |
|
"rewards/margins": 1.291830062866211, |
|
"rewards/rejected": -2.7164220809936523, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.138806851742469, |
|
"grad_norm": 3.973403985509937, |
|
"learning_rate": 2.0511787151294153e-07, |
|
"logits/chosen": -2.6269099712371826, |
|
"logits/rejected": -2.6633949279785156, |
|
"logps/chosen": -362.7601318359375, |
|
"logps/rejected": -465.685791015625, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.3287506103515625, |
|
"rewards/margins": 1.2157796621322632, |
|
"rewards/rejected": -2.544530153274536, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.1435321913762553, |
|
"grad_norm": 3.706128513697955, |
|
"learning_rate": 2.0323012457597113e-07, |
|
"logits/chosen": -2.6716468334198, |
|
"logits/rejected": -2.5537989139556885, |
|
"logps/chosen": -313.87188720703125, |
|
"logps/rejected": -456.5760498046875, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.3276413679122925, |
|
"rewards/margins": 1.286893367767334, |
|
"rewards/rejected": -2.614534854888916, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.1482575310100414, |
|
"grad_norm": 4.436897843537744, |
|
"learning_rate": 2.0134513705518544e-07, |
|
"logits/chosen": -2.442168951034546, |
|
"logits/rejected": -2.475062608718872, |
|
"logps/chosen": -349.3672180175781, |
|
"logps/rejected": -437.71075439453125, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4281671047210693, |
|
"rewards/margins": 1.169339656829834, |
|
"rewards/rejected": -2.5975069999694824, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.1529828706438274, |
|
"grad_norm": 3.739187250501708, |
|
"learning_rate": 1.9946302016458754e-07, |
|
"logits/chosen": -2.5069191455841064, |
|
"logits/rejected": -2.4427642822265625, |
|
"logps/chosen": -378.0785827636719, |
|
"logps/rejected": -515.2130126953125, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5397615432739258, |
|
"rewards/margins": 1.4216468334197998, |
|
"rewards/rejected": -2.9614078998565674, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.1577082102776137, |
|
"grad_norm": 4.618667609463329, |
|
"learning_rate": 1.975838849488139e-07, |
|
"logits/chosen": -2.6840784549713135, |
|
"logits/rejected": -2.608031988143921, |
|
"logps/chosen": -353.92388916015625, |
|
"logps/rejected": -443.1690368652344, |
|
"loss": 0.4789, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.5656286478042603, |
|
"rewards/margins": 0.7766789793968201, |
|
"rewards/rejected": -2.3423075675964355, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.1624335499113998, |
|
"grad_norm": 3.85370799421685, |
|
"learning_rate": 1.957078422765823e-07, |
|
"logits/chosen": -2.512361764907837, |
|
"logits/rejected": -2.664332389831543, |
|
"logps/chosen": -423.3899230957031, |
|
"logps/rejected": -493.901123046875, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5689921379089355, |
|
"rewards/margins": 1.2216757535934448, |
|
"rewards/rejected": -2.790667772293091, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.167158889545186, |
|
"grad_norm": 4.071196958136995, |
|
"learning_rate": 1.9383500283415127e-07, |
|
"logits/chosen": -2.7708868980407715, |
|
"logits/rejected": -2.928670644760132, |
|
"logps/chosen": -413.68780517578125, |
|
"logps/rejected": -419.13287353515625, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6208066940307617, |
|
"rewards/margins": 0.8534724116325378, |
|
"rewards/rejected": -2.4742789268493652, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.1718842291789722, |
|
"grad_norm": 4.6057615724394445, |
|
"learning_rate": 1.9196547711878882e-07, |
|
"logits/chosen": -2.7272331714630127, |
|
"logits/rejected": -2.7651190757751465, |
|
"logps/chosen": -391.68218994140625, |
|
"logps/rejected": -536.7830810546875, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3025808334350586, |
|
"rewards/margins": 1.699794888496399, |
|
"rewards/rejected": -3.002375602722168, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.1766095688127585, |
|
"grad_norm": 4.356687184481579, |
|
"learning_rate": 1.9009937543225393e-07, |
|
"logits/chosen": -2.6314167976379395, |
|
"logits/rejected": -2.5595688819885254, |
|
"logps/chosen": -311.2470703125, |
|
"logps/rejected": -458.4511413574219, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.5766427516937256, |
|
"rewards/margins": 1.1243988275527954, |
|
"rewards/rejected": -2.7010414600372314, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.1813349084465445, |
|
"grad_norm": 3.8646816254377168, |
|
"learning_rate": 1.8823680787428804e-07, |
|
"logits/chosen": -2.628770589828491, |
|
"logits/rejected": -2.6632299423217773, |
|
"logps/chosen": -362.2929382324219, |
|
"logps/rejected": -451.97686767578125, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.444336175918579, |
|
"rewards/margins": 1.026198387145996, |
|
"rewards/rejected": -2.470534324645996, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1860602480803308, |
|
"grad_norm": 4.972160856261013, |
|
"learning_rate": 1.8637788433611946e-07, |
|
"logits/chosen": -2.6531898975372314, |
|
"logits/rejected": -2.6115176677703857, |
|
"logps/chosen": -374.62554931640625, |
|
"logps/rejected": -556.6736450195312, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.495612382888794, |
|
"rewards/margins": 1.505082368850708, |
|
"rewards/rejected": -3.000694751739502, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.190785587714117, |
|
"grad_norm": 5.493325018181506, |
|
"learning_rate": 1.8452271449398015e-07, |
|
"logits/chosen": -2.6560559272766113, |
|
"logits/rejected": -2.7018203735351562, |
|
"logps/chosen": -352.3895568847656, |
|
"logps/rejected": -441.25994873046875, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6333725452423096, |
|
"rewards/margins": 1.1176269054412842, |
|
"rewards/rejected": -2.7509994506835938, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.1955109273479032, |
|
"grad_norm": 4.859694388307905, |
|
"learning_rate": 1.8267140780263424e-07, |
|
"logits/chosen": -2.634824514389038, |
|
"logits/rejected": -2.638603448867798, |
|
"logps/chosen": -336.9228515625, |
|
"logps/rejected": -454.23883056640625, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.460708498954773, |
|
"rewards/margins": 1.1815128326416016, |
|
"rewards/rejected": -2.642221450805664, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.2002362669816893, |
|
"grad_norm": 4.23344077907717, |
|
"learning_rate": 1.8082407348892076e-07, |
|
"logits/chosen": -2.581425666809082, |
|
"logits/rejected": -2.4411489963531494, |
|
"logps/chosen": -360.4097595214844, |
|
"logps/rejected": -511.65484619140625, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.5460442304611206, |
|
"rewards/margins": 1.2290416955947876, |
|
"rewards/rejected": -2.775085687637329, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.2049616066154756, |
|
"grad_norm": 5.021641074779002, |
|
"learning_rate": 1.7898082054530868e-07, |
|
"logits/chosen": -2.5814576148986816, |
|
"logits/rejected": -2.562331199645996, |
|
"logps/chosen": -391.0801696777344, |
|
"logps/rejected": -486.2853698730469, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5095934867858887, |
|
"rewards/margins": 0.9785588979721069, |
|
"rewards/rejected": -2.488152503967285, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2096869462492617, |
|
"grad_norm": 4.032966746831722, |
|
"learning_rate": 1.7714175772346683e-07, |
|
"logits/chosen": -2.73325514793396, |
|
"logits/rejected": -2.8232052326202393, |
|
"logps/chosen": -361.27423095703125, |
|
"logps/rejected": -461.78125, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.4946091175079346, |
|
"rewards/margins": 1.1646876335144043, |
|
"rewards/rejected": -2.659296751022339, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.2144122858830477, |
|
"grad_norm": 4.348859426367674, |
|
"learning_rate": 1.753069935278477e-07, |
|
"logits/chosen": -2.5508053302764893, |
|
"logits/rejected": -2.6324799060821533, |
|
"logps/chosen": -349.6580810546875, |
|
"logps/rejected": -436.2727966308594, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.5449261665344238, |
|
"rewards/margins": 1.1617025136947632, |
|
"rewards/rejected": -2.7066287994384766, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.219137625516834, |
|
"grad_norm": 3.7399337348111246, |
|
"learning_rate": 1.7347663620928494e-07, |
|
"logits/chosen": -2.6396665573120117, |
|
"logits/rejected": -2.686690330505371, |
|
"logps/chosen": -373.62762451171875, |
|
"logps/rejected": -456.91876220703125, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5388782024383545, |
|
"rewards/margins": 1.0700188875198364, |
|
"rewards/rejected": -2.6088972091674805, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.219137625516834, |
|
"eval_logits/chosen": -2.7951161861419678, |
|
"eval_logits/rejected": -2.8025708198547363, |
|
"eval_logps/chosen": -369.3280029296875, |
|
"eval_logps/rejected": -488.8177490234375, |
|
"eval_loss": 0.48161956667900085, |
|
"eval_rewards/accuracies": 0.6401515007019043, |
|
"eval_rewards/chosen": -1.5809730291366577, |
|
"eval_rewards/margins": 1.307705044746399, |
|
"eval_rewards/rejected": -2.8886778354644775, |
|
"eval_runtime": 225.1752, |
|
"eval_samples_per_second": 16.236, |
|
"eval_steps_per_second": 0.293, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.2238629651506203, |
|
"grad_norm": 4.565788447191084, |
|
"learning_rate": 1.7165079375860752e-07, |
|
"logits/chosen": -2.7769393920898438, |
|
"logits/rejected": -2.7542011737823486, |
|
"logps/chosen": -327.9093322753906, |
|
"logps/rejected": -441.0569763183594, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.4275953769683838, |
|
"rewards/margins": 1.174314022064209, |
|
"rewards/rejected": -2.6019093990325928, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.2285883047844064, |
|
"grad_norm": 3.8161499063717206, |
|
"learning_rate": 1.6982957390026748e-07, |
|
"logits/chosen": -2.5881879329681396, |
|
"logits/rejected": -2.5274972915649414, |
|
"logps/chosen": -380.05670166015625, |
|
"logps/rejected": -548.7481079101562, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.7176233530044556, |
|
"rewards/margins": 1.5351811647415161, |
|
"rewards/rejected": -3.2528045177459717, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2333136444181925, |
|
"grad_norm": 4.44964808900954, |
|
"learning_rate": 1.680130840859848e-07, |
|
"logits/chosen": -2.6286840438842773, |
|
"logits/rejected": -2.606605291366577, |
|
"logps/chosen": -325.35833740234375, |
|
"logps/rejected": -435.7322082519531, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.2944529056549072, |
|
"rewards/margins": 1.2223491668701172, |
|
"rewards/rejected": -2.5168020725250244, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.2380389840519788, |
|
"grad_norm": 4.264258297752704, |
|
"learning_rate": 1.662014314884074e-07, |
|
"logits/chosen": -2.6938886642456055, |
|
"logits/rejected": -2.6410956382751465, |
|
"logps/chosen": -341.89141845703125, |
|
"logps/rejected": -533.8803100585938, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.6077433824539185, |
|
"rewards/margins": 1.743915319442749, |
|
"rewards/rejected": -3.351658821105957, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.2427643236857648, |
|
"grad_norm": 3.8551209282619374, |
|
"learning_rate": 1.64394722994788e-07, |
|
"logits/chosen": -2.644559383392334, |
|
"logits/rejected": -2.672788381576538, |
|
"logps/chosen": -332.5156555175781, |
|
"logps/rejected": -419.4230041503906, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4292283058166504, |
|
"rewards/margins": 0.7848268151283264, |
|
"rewards/rejected": -2.214055299758911, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.2474896633195511, |
|
"grad_norm": 5.862600550730873, |
|
"learning_rate": 1.625930652006782e-07, |
|
"logits/chosen": -2.806763172149658, |
|
"logits/rejected": -2.818608283996582, |
|
"logps/chosen": -332.0267639160156, |
|
"logps/rejected": -421.3615417480469, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.3579407930374146, |
|
"rewards/margins": 1.1036723852157593, |
|
"rewards/rejected": -2.461613178253174, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.2522150029533372, |
|
"grad_norm": 4.952960168127134, |
|
"learning_rate": 1.607965644036386e-07, |
|
"logits/chosen": -2.397037982940674, |
|
"logits/rejected": -2.370586633682251, |
|
"logps/chosen": -380.63433837890625, |
|
"logps/rejected": -567.65478515625, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.3215548992156982, |
|
"rewards/margins": 1.9972357749938965, |
|
"rewards/rejected": -3.318790912628174, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.2569403425871235, |
|
"grad_norm": 4.451231917032084, |
|
"learning_rate": 1.5900532659696786e-07, |
|
"logits/chosen": -2.5172245502471924, |
|
"logits/rejected": -2.549943208694458, |
|
"logps/chosen": -310.1299743652344, |
|
"logps/rejected": -405.5203857421875, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.271753191947937, |
|
"rewards/margins": 0.9632126688957214, |
|
"rewards/rejected": -2.2349658012390137, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.2616656822209096, |
|
"grad_norm": 3.9305163776837313, |
|
"learning_rate": 1.5721945746344914e-07, |
|
"logits/chosen": -2.5553438663482666, |
|
"logits/rejected": -2.6015634536743164, |
|
"logps/chosen": -336.09326171875, |
|
"logps/rejected": -444.8936462402344, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.417650580406189, |
|
"rewards/margins": 1.3679817914962769, |
|
"rewards/rejected": -2.785632371902466, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.2663910218546959, |
|
"grad_norm": 3.688073971085107, |
|
"learning_rate": 1.5543906236911423e-07, |
|
"logits/chosen": -2.798358917236328, |
|
"logits/rejected": -2.803248167037964, |
|
"logps/chosen": -307.88250732421875, |
|
"logps/rejected": -455.02081298828125, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4341715574264526, |
|
"rewards/margins": 1.0945826768875122, |
|
"rewards/rejected": -2.528754234313965, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.271116361488482, |
|
"grad_norm": 4.072630703078056, |
|
"learning_rate": 1.5366424635702773e-07, |
|
"logits/chosen": -2.4071907997131348, |
|
"logits/rejected": -2.469078540802002, |
|
"logps/chosen": -344.1185302734375, |
|
"logps/rejected": -481.708740234375, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.4194681644439697, |
|
"rewards/margins": 1.5102123022079468, |
|
"rewards/rejected": -2.929680109024048, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.2758417011222682, |
|
"grad_norm": 4.509575176085059, |
|
"learning_rate": 1.5189511414108902e-07, |
|
"logits/chosen": -2.6352696418762207, |
|
"logits/rejected": -2.5769996643066406, |
|
"logps/chosen": -299.04486083984375, |
|
"logps/rejected": -428.2113952636719, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.261251449584961, |
|
"rewards/margins": 1.3529876470565796, |
|
"rewards/rejected": -2.61423921585083, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2805670407560543, |
|
"grad_norm": 4.337503774611465, |
|
"learning_rate": 1.5013177009985412e-07, |
|
"logits/chosen": -2.492708683013916, |
|
"logits/rejected": -2.5158631801605225, |
|
"logps/chosen": -369.5946960449219, |
|
"logps/rejected": -539.3471069335938, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.4575221538543701, |
|
"rewards/margins": 1.7974143028259277, |
|
"rewards/rejected": -3.2549362182617188, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.2852923803898406, |
|
"grad_norm": 3.7985527417770797, |
|
"learning_rate": 1.4837431827037786e-07, |
|
"logits/chosen": -2.588874101638794, |
|
"logits/rejected": -2.6791810989379883, |
|
"logps/chosen": -399.30633544921875, |
|
"logps/rejected": -391.2076416015625, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.1733232736587524, |
|
"rewards/margins": 0.8876461982727051, |
|
"rewards/rejected": -2.060969591140747, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.2900177200236267, |
|
"grad_norm": 4.177927201443006, |
|
"learning_rate": 1.466228623420751e-07, |
|
"logits/chosen": -2.498131036758423, |
|
"logits/rejected": -2.5749406814575195, |
|
"logps/chosen": -323.9309997558594, |
|
"logps/rejected": -450.7756042480469, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2552162408828735, |
|
"rewards/margins": 1.4370646476745605, |
|
"rewards/rejected": -2.6922807693481445, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.2947430596574128, |
|
"grad_norm": 3.6814005701875523, |
|
"learning_rate": 1.448775056506036e-07, |
|
"logits/chosen": -2.469701051712036, |
|
"logits/rejected": -2.5528130531311035, |
|
"logps/chosen": -382.1181335449219, |
|
"logps/rejected": -487.5738525390625, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.516263723373413, |
|
"rewards/margins": 1.4078060388565063, |
|
"rewards/rejected": -2.92406964302063, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.299468399291199, |
|
"grad_norm": 3.7200492832656766, |
|
"learning_rate": 1.4313835117176692e-07, |
|
"logits/chosen": -2.9147932529449463, |
|
"logits/rejected": -2.988351821899414, |
|
"logps/chosen": -376.6600341796875, |
|
"logps/rejected": -456.061279296875, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.3018884658813477, |
|
"rewards/margins": 1.2087831497192383, |
|
"rewards/rejected": -2.510671615600586, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.3041937389249854, |
|
"grad_norm": 4.704523030577493, |
|
"learning_rate": 1.4140550151543872e-07, |
|
"logits/chosen": -2.5561208724975586, |
|
"logits/rejected": -2.669656276702881, |
|
"logps/chosen": -391.99273681640625, |
|
"logps/rejected": -470.0566711425781, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.384698510169983, |
|
"rewards/margins": 1.2832698822021484, |
|
"rewards/rejected": -2.667968511581421, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.3089190785587714, |
|
"grad_norm": 3.890211089107488, |
|
"learning_rate": 1.3967905891950936e-07, |
|
"logits/chosen": -2.525979995727539, |
|
"logits/rejected": -2.5187900066375732, |
|
"logps/chosen": -329.0636291503906, |
|
"logps/rejected": -518.5650024414062, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.3041456937789917, |
|
"rewards/margins": 1.7594897747039795, |
|
"rewards/rejected": -3.0636353492736816, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.3136444181925575, |
|
"grad_norm": 3.900954884610721, |
|
"learning_rate": 1.3795912524385322e-07, |
|
"logits/chosen": -2.6802122592926025, |
|
"logits/rejected": -2.7386527061462402, |
|
"logps/chosen": -394.46905517578125, |
|
"logps/rejected": -530.927978515625, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5945687294006348, |
|
"rewards/margins": 1.4686897993087769, |
|
"rewards/rejected": -3.063258647918701, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.3183697578263438, |
|
"grad_norm": 5.012829563655198, |
|
"learning_rate": 1.3624580196431952e-07, |
|
"logits/chosen": -2.735568046569824, |
|
"logits/rejected": -2.7510178089141846, |
|
"logps/chosen": -375.1429748535156, |
|
"logps/rejected": -484.94390869140625, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.396562099456787, |
|
"rewards/margins": 1.301504135131836, |
|
"rewards/rejected": -2.698065996170044, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.3230950974601299, |
|
"grad_norm": 6.486088065803327, |
|
"learning_rate": 1.3453919016674483e-07, |
|
"logits/chosen": -2.5972790718078613, |
|
"logits/rejected": -2.6828713417053223, |
|
"logps/chosen": -317.81280517578125, |
|
"logps/rejected": -378.3294982910156, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.1421467065811157, |
|
"rewards/margins": 1.074657917022705, |
|
"rewards/rejected": -2.2168045043945312, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3278204370939162, |
|
"grad_norm": 4.08450025143174, |
|
"learning_rate": 1.328393905409892e-07, |
|
"logits/chosen": -2.4976108074188232, |
|
"logits/rejected": -2.5105791091918945, |
|
"logps/chosen": -394.51141357421875, |
|
"logps/rejected": -482.7172546386719, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.521199107170105, |
|
"rewards/margins": 1.0865689516067505, |
|
"rewards/rejected": -2.6077680587768555, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.3325457767277022, |
|
"grad_norm": 4.246089354170224, |
|
"learning_rate": 1.3114650337499578e-07, |
|
"logits/chosen": -2.629361152648926, |
|
"logits/rejected": -2.595665693283081, |
|
"logps/chosen": -334.8639831542969, |
|
"logps/rejected": -423.8031921386719, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5956919193267822, |
|
"rewards/margins": 0.8941479921340942, |
|
"rewards/rejected": -2.489840030670166, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.3372711163614885, |
|
"grad_norm": 3.6260185771038103, |
|
"learning_rate": 1.2946062854887314e-07, |
|
"logits/chosen": -2.430432081222534, |
|
"logits/rejected": -2.3944997787475586, |
|
"logps/chosen": -371.2943115234375, |
|
"logps/rejected": -522.4259033203125, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5247899293899536, |
|
"rewards/margins": 1.4179781675338745, |
|
"rewards/rejected": -2.942767858505249, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.3419964559952746, |
|
"grad_norm": 4.374381091295728, |
|
"learning_rate": 1.2778186552900316e-07, |
|
"logits/chosen": -2.7117838859558105, |
|
"logits/rejected": -2.761711597442627, |
|
"logps/chosen": -400.6455078125, |
|
"logps/rejected": -503.24371337890625, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.717864990234375, |
|
"rewards/margins": 1.5052025318145752, |
|
"rewards/rejected": -3.2230677604675293, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.346721795629061, |
|
"grad_norm": 3.8541867456367847, |
|
"learning_rate": 1.261103133621718e-07, |
|
"logits/chosen": -2.594248056411743, |
|
"logits/rejected": -2.603362798690796, |
|
"logps/chosen": -360.4008483886719, |
|
"logps/rejected": -533.9642944335938, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.626147747039795, |
|
"rewards/margins": 1.3955036401748657, |
|
"rewards/rejected": -3.021651268005371, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.351447135262847, |
|
"grad_norm": 3.8033607627593815, |
|
"learning_rate": 1.2444607066972583e-07, |
|
"logits/chosen": -2.385476589202881, |
|
"logits/rejected": -2.4700021743774414, |
|
"logps/chosen": -379.0484924316406, |
|
"logps/rejected": -446.36932373046875, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.5443971157073975, |
|
"rewards/margins": 1.0206142663955688, |
|
"rewards/rejected": -2.5650112628936768, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.356172474896633, |
|
"grad_norm": 3.9322741636171017, |
|
"learning_rate": 1.227892356417542e-07, |
|
"logits/chosen": -2.8771088123321533, |
|
"logits/rejected": -2.838731050491333, |
|
"logps/chosen": -366.8982849121094, |
|
"logps/rejected": -519.6290893554688, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.7020255327224731, |
|
"rewards/margins": 1.5927929878234863, |
|
"rewards/rejected": -3.29481840133667, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.3608978145304194, |
|
"grad_norm": 4.441774997254426, |
|
"learning_rate": 1.211399060312943e-07, |
|
"logits/chosen": -2.6161060333251953, |
|
"logits/rejected": -2.6837587356567383, |
|
"logps/chosen": -333.47747802734375, |
|
"logps/rejected": -396.8973388671875, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4345372915267944, |
|
"rewards/margins": 0.6499552726745605, |
|
"rewards/rejected": -2.0844926834106445, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.3656231541642057, |
|
"grad_norm": 4.102742239919904, |
|
"learning_rate": 1.1949817914856539e-07, |
|
"logits/chosen": -2.6814827919006348, |
|
"logits/rejected": -2.6281232833862305, |
|
"logps/chosen": -309.97998046875, |
|
"logps/rejected": -483.1641540527344, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.347307801246643, |
|
"rewards/margins": 1.4000307321548462, |
|
"rewards/rejected": -2.7473385334014893, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.3703484937979917, |
|
"grad_norm": 3.510044851118614, |
|
"learning_rate": 1.1786415185522644e-07, |
|
"logits/chosen": -2.4141433238983154, |
|
"logits/rejected": -2.440483331680298, |
|
"logps/chosen": -371.58050537109375, |
|
"logps/rejected": -485.5816345214844, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.6337575912475586, |
|
"rewards/margins": 1.3851597309112549, |
|
"rewards/rejected": -3.0189173221588135, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.3750738334317778, |
|
"grad_norm": 4.439828210645555, |
|
"learning_rate": 1.1623792055866182e-07, |
|
"logits/chosen": -2.9460198879241943, |
|
"logits/rejected": -2.792397975921631, |
|
"logps/chosen": -306.8702392578125, |
|
"logps/rejected": -499.4562072753906, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.5415101051330566, |
|
"rewards/margins": 1.6944992542266846, |
|
"rewards/rejected": -3.2360095977783203, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.379799173065564, |
|
"grad_norm": 3.808563735975941, |
|
"learning_rate": 1.1461958120629345e-07, |
|
"logits/chosen": -2.601799488067627, |
|
"logits/rejected": -2.6055119037628174, |
|
"logps/chosen": -349.54119873046875, |
|
"logps/rejected": -453.0423278808594, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.38273286819458, |
|
"rewards/margins": 1.1617132425308228, |
|
"rewards/rejected": -2.5444459915161133, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.3845245126993504, |
|
"grad_norm": 3.614960145049863, |
|
"learning_rate": 1.1300922927991912e-07, |
|
"logits/chosen": -2.3846492767333984, |
|
"logits/rejected": -2.35745906829834, |
|
"logps/chosen": -377.6761169433594, |
|
"logps/rejected": -496.96405029296875, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5498136281967163, |
|
"rewards/margins": 1.1908408403396606, |
|
"rewards/rejected": -2.740654468536377, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.3892498523331365, |
|
"grad_norm": 5.27301677074656, |
|
"learning_rate": 1.1140695979008017e-07, |
|
"logits/chosen": -2.359983205795288, |
|
"logits/rejected": -2.346726894378662, |
|
"logps/chosen": -323.6959533691406, |
|
"logps/rejected": -452.9242248535156, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4340472221374512, |
|
"rewards/margins": 1.28817880153656, |
|
"rewards/rejected": -2.722226142883301, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.3939751919669225, |
|
"grad_norm": 3.7242744476436704, |
|
"learning_rate": 1.0981286727045483e-07, |
|
"logits/chosen": -2.4720327854156494, |
|
"logits/rejected": -2.353053331375122, |
|
"logps/chosen": -352.6996765136719, |
|
"logps/rejected": -499.5018615722656, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4100433588027954, |
|
"rewards/margins": 1.4266828298568726, |
|
"rewards/rejected": -2.836726188659668, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.3987005316007088, |
|
"grad_norm": 4.955539561562781, |
|
"learning_rate": 1.0822704577228131e-07, |
|
"logits/chosen": -2.642940044403076, |
|
"logits/rejected": -2.639770746231079, |
|
"logps/chosen": -338.60595703125, |
|
"logps/rejected": -491.56805419921875, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.4772026538848877, |
|
"rewards/margins": 1.517985224723816, |
|
"rewards/rejected": -2.995187759399414, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.403425871234495, |
|
"grad_norm": 4.549161233388332, |
|
"learning_rate": 1.0664958885880901e-07, |
|
"logits/chosen": -2.5309808254241943, |
|
"logits/rejected": -2.6360573768615723, |
|
"logps/chosen": -335.5814514160156, |
|
"logps/rejected": -437.315673828125, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.466670036315918, |
|
"rewards/margins": 1.2143834829330444, |
|
"rewards/rejected": -2.681053638458252, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.4081512108682812, |
|
"grad_norm": 3.7991551857385533, |
|
"learning_rate": 1.0508058959977756e-07, |
|
"logits/chosen": -2.673210382461548, |
|
"logits/rejected": -2.5948569774627686, |
|
"logps/chosen": -333.4312744140625, |
|
"logps/rejected": -518.317626953125, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2860051393508911, |
|
"rewards/margins": 1.857041358947754, |
|
"rewards/rejected": -3.1430463790893555, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.4128765505020673, |
|
"grad_norm": 4.382508044742225, |
|
"learning_rate": 1.0352014056592653e-07, |
|
"logits/chosen": -2.746319055557251, |
|
"logits/rejected": -2.8318300247192383, |
|
"logps/chosen": -361.51220703125, |
|
"logps/rejected": -441.3756103515625, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3820509910583496, |
|
"rewards/margins": 1.1350202560424805, |
|
"rewards/rejected": -2.517071485519409, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.4176018901358536, |
|
"grad_norm": 4.674467616661016, |
|
"learning_rate": 1.0196833382353303e-07, |
|
"logits/chosen": -2.731412887573242, |
|
"logits/rejected": -2.6908507347106934, |
|
"logps/chosen": -322.404052734375, |
|
"logps/rejected": -462.2546081542969, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4098780155181885, |
|
"rewards/margins": 1.386682391166687, |
|
"rewards/rejected": -2.796560287475586, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4223272297696397, |
|
"grad_norm": 4.067329027842824, |
|
"learning_rate": 1.0042526092898049e-07, |
|
"logits/chosen": -2.8876852989196777, |
|
"logits/rejected": -2.7597179412841797, |
|
"logps/chosen": -328.04437255859375, |
|
"logps/rejected": -431.107666015625, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4123650789260864, |
|
"rewards/margins": 1.0988068580627441, |
|
"rewards/rejected": -2.511171817779541, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.4223272297696397, |
|
"eval_logits/chosen": -2.751302719116211, |
|
"eval_logits/rejected": -2.7585830688476562, |
|
"eval_logps/chosen": -369.27899169921875, |
|
"eval_logps/rejected": -500.55902099609375, |
|
"eval_loss": 0.47641003131866455, |
|
"eval_rewards/accuracies": 0.6401515007019043, |
|
"eval_rewards/chosen": -1.5804827213287354, |
|
"eval_rewards/margins": 1.4256082773208618, |
|
"eval_rewards/rejected": -3.0060908794403076, |
|
"eval_runtime": 225.4536, |
|
"eval_samples_per_second": 16.216, |
|
"eval_steps_per_second": 0.293, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.427052569403426, |
|
"grad_norm": 4.636151437447421, |
|
"learning_rate": 9.889101292335625e-08, |
|
"logits/chosen": -2.6484196186065674, |
|
"logits/rejected": -2.6770687103271484, |
|
"logps/chosen": -402.7945251464844, |
|
"logps/rejected": -453.43646240234375, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.1831938028335571, |
|
"rewards/margins": 1.1924808025360107, |
|
"rewards/rejected": -2.3756744861602783, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.431777909037212, |
|
"grad_norm": 4.004369409074385, |
|
"learning_rate": 9.736568032708068e-08, |
|
"logits/chosen": -2.5602633953094482, |
|
"logits/rejected": -2.6748807430267334, |
|
"logps/chosen": -366.6900634765625, |
|
"logps/rejected": -485.0727844238281, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.4285987615585327, |
|
"rewards/margins": 1.416813611984253, |
|
"rewards/rejected": -2.845412254333496, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.436503248670998, |
|
"grad_norm": 4.878881366827008, |
|
"learning_rate": 9.584935313456596e-08, |
|
"logits/chosen": -2.231307029724121, |
|
"logits/rejected": -2.287929058074951, |
|
"logps/chosen": -352.46282958984375, |
|
"logps/rejected": -453.6793212890625, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5504112243652344, |
|
"rewards/margins": 1.3251781463623047, |
|
"rewards/rejected": -2.875589609146118, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.4412285883047844, |
|
"grad_norm": 4.241730784056723, |
|
"learning_rate": 9.4342120808907e-08, |
|
"logits/chosen": -2.703420639038086, |
|
"logits/rejected": -2.6591320037841797, |
|
"logps/chosen": -383.8643493652344, |
|
"logps/rejected": -594.2821655273438, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5567654371261597, |
|
"rewards/margins": 2.0447824001312256, |
|
"rewards/rejected": -3.6015477180480957, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.4459539279385707, |
|
"grad_norm": 3.8843212431707284, |
|
"learning_rate": 9.284407227660249e-08, |
|
"logits/chosen": -2.8023083209991455, |
|
"logits/rejected": -2.7946949005126953, |
|
"logps/chosen": -337.86090087890625, |
|
"logps/rejected": -434.48260498046875, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2694463729858398, |
|
"rewards/margins": 1.1360807418823242, |
|
"rewards/rejected": -2.405527114868164, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.4506792675723568, |
|
"grad_norm": 3.978483022316159, |
|
"learning_rate": 9.13552959223089e-08, |
|
"logits/chosen": -2.676576852798462, |
|
"logits/rejected": -2.5387959480285645, |
|
"logps/chosen": -313.46954345703125, |
|
"logps/rejected": -421.6120300292969, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3958847522735596, |
|
"rewards/margins": 0.8875546455383301, |
|
"rewards/rejected": -2.2834396362304688, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.4554046072061428, |
|
"grad_norm": 4.395780053942796, |
|
"learning_rate": 8.987587958362516e-08, |
|
"logits/chosen": -2.856872320175171, |
|
"logits/rejected": -2.8188376426696777, |
|
"logps/chosen": -371.1302795410156, |
|
"logps/rejected": -465.9201354980469, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.4001535177230835, |
|
"rewards/margins": 1.085311770439148, |
|
"rewards/rejected": -2.4854652881622314, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.4601299468399291, |
|
"grad_norm": 3.8656297909933772, |
|
"learning_rate": 8.840591054591096e-08, |
|
"logits/chosen": -2.5090444087982178, |
|
"logits/rejected": -2.6375505924224854, |
|
"logps/chosen": -410.7342529296875, |
|
"logps/rejected": -467.5154724121094, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.2629905939102173, |
|
"rewards/margins": 1.3948893547058105, |
|
"rewards/rejected": -2.6578800678253174, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.4648552864737154, |
|
"grad_norm": 4.334534835752672, |
|
"learning_rate": 8.694547553713618e-08, |
|
"logits/chosen": -2.759681224822998, |
|
"logits/rejected": -2.73026180267334, |
|
"logps/chosen": -355.33624267578125, |
|
"logps/rejected": -529.0985107421875, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.521816372871399, |
|
"rewards/margins": 1.6343505382537842, |
|
"rewards/rejected": -3.1561670303344727, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.4695806261075015, |
|
"grad_norm": 4.853059543604455, |
|
"learning_rate": 8.54946607227644e-08, |
|
"logits/chosen": -2.4591803550720215, |
|
"logits/rejected": -2.5936403274536133, |
|
"logps/chosen": -391.37298583984375, |
|
"logps/rejected": -434.3221435546875, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5846033096313477, |
|
"rewards/margins": 1.0046393871307373, |
|
"rewards/rejected": -2.589242696762085, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.4743059657412876, |
|
"grad_norm": 5.028900388618828, |
|
"learning_rate": 8.405355170066925e-08, |
|
"logits/chosen": -2.7181220054626465, |
|
"logits/rejected": -2.590919017791748, |
|
"logps/chosen": -380.94818115234375, |
|
"logps/rejected": -548.143798828125, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.6279135942459106, |
|
"rewards/margins": 1.5961647033691406, |
|
"rewards/rejected": -3.2240781784057617, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.4790313053750739, |
|
"grad_norm": 4.261294839829192, |
|
"learning_rate": 8.262223349608366e-08, |
|
"logits/chosen": -2.7533867359161377, |
|
"logits/rejected": -2.8102259635925293, |
|
"logps/chosen": -394.7510681152344, |
|
"logps/rejected": -499.4160461425781, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.5835039615631104, |
|
"rewards/margins": 1.1018283367156982, |
|
"rewards/rejected": -2.6853325366973877, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.48375664500886, |
|
"grad_norm": 4.418632236406977, |
|
"learning_rate": 8.120079055658402e-08, |
|
"logits/chosen": -2.642446279525757, |
|
"logits/rejected": -2.6767466068267822, |
|
"logps/chosen": -323.5433349609375, |
|
"logps/rejected": -469.7967834472656, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2628949880599976, |
|
"rewards/margins": 1.5510131120681763, |
|
"rewards/rejected": -2.813908100128174, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.4884819846426462, |
|
"grad_norm": 4.312754452868253, |
|
"learning_rate": 7.978930674710719e-08, |
|
"logits/chosen": -2.4338035583496094, |
|
"logits/rejected": -2.444002151489258, |
|
"logps/chosen": -374.0926513671875, |
|
"logps/rejected": -496.3292236328125, |
|
"loss": 0.4875, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.539766788482666, |
|
"rewards/margins": 1.5390400886535645, |
|
"rewards/rejected": -3.0788071155548096, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.4932073242764323, |
|
"grad_norm": 4.036827603807639, |
|
"learning_rate": 7.838786534500269e-08, |
|
"logits/chosen": -2.7101027965545654, |
|
"logits/rejected": -2.7315640449523926, |
|
"logps/chosen": -367.41107177734375, |
|
"logps/rejected": -477.13916015625, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.501757025718689, |
|
"rewards/margins": 1.17995023727417, |
|
"rewards/rejected": -2.6817073822021484, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.4979326639102186, |
|
"grad_norm": 4.1034959013822645, |
|
"learning_rate": 7.699654903511971e-08, |
|
"logits/chosen": -2.4980247020721436, |
|
"logits/rejected": -2.569985866546631, |
|
"logps/chosen": -306.99346923828125, |
|
"logps/rejected": -443.2658996582031, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.0971927642822266, |
|
"rewards/margins": 1.3962390422821045, |
|
"rewards/rejected": -2.49343204498291, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.5026580035440047, |
|
"grad_norm": 3.8908709553914083, |
|
"learning_rate": 7.561543990492803e-08, |
|
"logits/chosen": -2.5545809268951416, |
|
"logits/rejected": -2.7232065200805664, |
|
"logps/chosen": -382.45367431640625, |
|
"logps/rejected": -496.840087890625, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5967504978179932, |
|
"rewards/margins": 1.5658732652664185, |
|
"rewards/rejected": -3.162623643875122, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.507383343177791, |
|
"grad_norm": 4.1097836221394095, |
|
"learning_rate": 7.424461943967555e-08, |
|
"logits/chosen": -2.5873563289642334, |
|
"logits/rejected": -2.7303099632263184, |
|
"logps/chosen": -394.63916015625, |
|
"logps/rejected": -551.1517333984375, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5619065761566162, |
|
"rewards/margins": 1.4349513053894043, |
|
"rewards/rejected": -2.9968576431274414, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.512108682811577, |
|
"grad_norm": 3.8818233399377373, |
|
"learning_rate": 7.288416851758016e-08, |
|
"logits/chosen": -2.638657808303833, |
|
"logits/rejected": -2.547767162322998, |
|
"logps/chosen": -392.5679626464844, |
|
"logps/rejected": -627.8018188476562, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5840284824371338, |
|
"rewards/margins": 2.276487112045288, |
|
"rewards/rejected": -3.860515594482422, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5168340224453631, |
|
"grad_norm": 4.298412046444391, |
|
"learning_rate": 7.153416740505814e-08, |
|
"logits/chosen": -2.473698854446411, |
|
"logits/rejected": -2.580441951751709, |
|
"logps/chosen": -402.406982421875, |
|
"logps/rejected": -486.8596496582031, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5713368654251099, |
|
"rewards/margins": 1.1730860471725464, |
|
"rewards/rejected": -2.7444231510162354, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.5215593620791494, |
|
"grad_norm": 3.844411445149988, |
|
"learning_rate": 7.01946957519886e-08, |
|
"logits/chosen": -2.414036273956299, |
|
"logits/rejected": -2.5014243125915527, |
|
"logps/chosen": -408.16455078125, |
|
"logps/rejected": -474.1005859375, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.5195822715759277, |
|
"rewards/margins": 1.162341594696045, |
|
"rewards/rejected": -2.6819238662719727, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.5262847017129357, |
|
"grad_norm": 4.081324399203426, |
|
"learning_rate": 6.88658325870138e-08, |
|
"logits/chosen": -2.6832616329193115, |
|
"logits/rejected": -2.7357096672058105, |
|
"logps/chosen": -385.5028076171875, |
|
"logps/rejected": -469.4779052734375, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.399859070777893, |
|
"rewards/margins": 1.3238856792449951, |
|
"rewards/rejected": -2.7237446308135986, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.5310100413467218, |
|
"grad_norm": 5.228883712742776, |
|
"learning_rate": 6.754765631287695e-08, |
|
"logits/chosen": -2.544619560241699, |
|
"logits/rejected": -2.655355453491211, |
|
"logps/chosen": -355.68328857421875, |
|
"logps/rejected": -468.37738037109375, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6572238206863403, |
|
"rewards/margins": 1.6221368312835693, |
|
"rewards/rejected": -3.27936053276062, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.5357353809805079, |
|
"grad_norm": 3.85359128112212, |
|
"learning_rate": 6.62402447017959e-08, |
|
"logits/chosen": -2.255566358566284, |
|
"logits/rejected": -2.2861790657043457, |
|
"logps/chosen": -374.5622863769531, |
|
"logps/rejected": -525.822265625, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5297114849090576, |
|
"rewards/margins": 1.5574190616607666, |
|
"rewards/rejected": -3.087130546569824, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.5404607206142942, |
|
"grad_norm": 3.9384759163947733, |
|
"learning_rate": 6.494367489087488e-08, |
|
"logits/chosen": -2.310734987258911, |
|
"logits/rejected": -2.331479072570801, |
|
"logps/chosen": -353.3757629394531, |
|
"logps/rejected": -456.4471740722656, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.2544013261795044, |
|
"rewards/margins": 0.9933812618255615, |
|
"rewards/rejected": -2.2477827072143555, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.5451860602480805, |
|
"grad_norm": 4.707537372396568, |
|
"learning_rate": 6.365802337755364e-08, |
|
"logits/chosen": -2.5162229537963867, |
|
"logits/rejected": -2.5781514644622803, |
|
"logps/chosen": -359.474365234375, |
|
"logps/rejected": -459.92413330078125, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3576124906539917, |
|
"rewards/margins": 1.2539989948272705, |
|
"rewards/rejected": -2.6116113662719727, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.5499113998818665, |
|
"grad_norm": 3.5347435278013024, |
|
"learning_rate": 6.238336601509364e-08, |
|
"logits/chosen": -2.4307329654693604, |
|
"logits/rejected": -2.413248300552368, |
|
"logps/chosen": -346.82537841796875, |
|
"logps/rejected": -487.83111572265625, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.5922647714614868, |
|
"rewards/margins": 1.7097985744476318, |
|
"rewards/rejected": -3.302063465118408, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.5546367395156526, |
|
"grad_norm": 4.5192043219385925, |
|
"learning_rate": 6.111977800810316e-08, |
|
"logits/chosen": -2.4796946048736572, |
|
"logits/rejected": -2.3877992630004883, |
|
"logps/chosen": -332.8540344238281, |
|
"logps/rejected": -508.25360107421875, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.7624458074569702, |
|
"rewards/margins": 1.4423408508300781, |
|
"rewards/rejected": -3.204786777496338, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.559362079149439, |
|
"grad_norm": 4.386475280759837, |
|
"learning_rate": 5.986733390809993e-08, |
|
"logits/chosen": -2.400326728820801, |
|
"logits/rejected": -2.2906858921051025, |
|
"logps/chosen": -386.3865966796875, |
|
"logps/rejected": -556.1158447265625, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.8006043434143066, |
|
"rewards/margins": 1.7274423837661743, |
|
"rewards/rejected": -3.5280466079711914, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.564087418783225, |
|
"grad_norm": 4.101101033266814, |
|
"learning_rate": 5.862610760911257e-08, |
|
"logits/chosen": -2.4113216400146484, |
|
"logits/rejected": -2.3908936977386475, |
|
"logps/chosen": -364.8887939453125, |
|
"logps/rejected": -459.69854736328125, |
|
"loss": 0.4372, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4839426279067993, |
|
"rewards/margins": 1.3043051958084106, |
|
"rewards/rejected": -2.78824782371521, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.5688127584170113, |
|
"grad_norm": 4.330222141211974, |
|
"learning_rate": 5.739617234332131e-08, |
|
"logits/chosen": -2.6859869956970215, |
|
"logits/rejected": -2.7122979164123535, |
|
"logps/chosen": -405.88018798828125, |
|
"logps/rejected": -452.80926513671875, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.5999912023544312, |
|
"rewards/margins": 0.9445231556892395, |
|
"rewards/rejected": -2.5445144176483154, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.5735380980507974, |
|
"grad_norm": 5.485683540439284, |
|
"learning_rate": 5.6177600676736656e-08, |
|
"logits/chosen": -2.5637035369873047, |
|
"logits/rejected": -2.5589380264282227, |
|
"logps/chosen": -393.2574462890625, |
|
"logps/rejected": -529.729248046875, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7628885507583618, |
|
"rewards/margins": 1.4583940505981445, |
|
"rewards/rejected": -3.221282482147217, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.5782634376845834, |
|
"grad_norm": 3.9160914653232983, |
|
"learning_rate": 5.4970464504918654e-08, |
|
"logits/chosen": -2.7670090198516846, |
|
"logits/rejected": -2.7049059867858887, |
|
"logps/chosen": -369.9678039550781, |
|
"logps/rejected": -494.07196044921875, |
|
"loss": 0.4565, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4675428867340088, |
|
"rewards/margins": 1.4338531494140625, |
|
"rewards/rejected": -2.901395797729492, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.5829887773183697, |
|
"grad_norm": 3.637994981997343, |
|
"learning_rate": 5.37748350487344e-08, |
|
"logits/chosen": -2.6616098880767822, |
|
"logits/rejected": -2.6261179447174072, |
|
"logps/chosen": -340.9493408203125, |
|
"logps/rejected": -505.6307067871094, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.4446711540222168, |
|
"rewards/margins": 1.6602320671081543, |
|
"rewards/rejected": -3.10490345954895, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.587714116952156, |
|
"grad_norm": 3.742063853340133, |
|
"learning_rate": 5.2590782850156667e-08, |
|
"logits/chosen": -2.755837917327881, |
|
"logits/rejected": -2.6713008880615234, |
|
"logps/chosen": -421.58154296875, |
|
"logps/rejected": -625.4422607421875, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.701414704322815, |
|
"rewards/margins": 1.9440244436264038, |
|
"rewards/rejected": -3.6454391479492188, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.592439456585942, |
|
"grad_norm": 3.5742676774235154, |
|
"learning_rate": 5.14183777681014e-08, |
|
"logits/chosen": -2.466548442840576, |
|
"logits/rejected": -2.5580849647521973, |
|
"logps/chosen": -414.5927734375, |
|
"logps/rejected": -506.8092956542969, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.518671989440918, |
|
"rewards/margins": 1.1154457330703735, |
|
"rewards/rejected": -2.634117603302002, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.5971647962197282, |
|
"grad_norm": 3.6653171076626205, |
|
"learning_rate": 5.0257688974306436e-08, |
|
"logits/chosen": -2.8738627433776855, |
|
"logits/rejected": -3.015981435775757, |
|
"logps/chosen": -381.5254211425781, |
|
"logps/rejected": -457.140869140625, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.607730507850647, |
|
"rewards/margins": 1.3426533937454224, |
|
"rewards/rejected": -2.9503836631774902, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.6018901358535145, |
|
"grad_norm": 3.837456752855929, |
|
"learning_rate": 4.910878494925008e-08, |
|
"logits/chosen": -2.6002864837646484, |
|
"logits/rejected": -2.6144702434539795, |
|
"logps/chosen": -402.1292724609375, |
|
"logps/rejected": -593.8958740234375, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5465564727783203, |
|
"rewards/margins": 1.739166498184204, |
|
"rewards/rejected": -3.2857229709625244, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.6066154754873008, |
|
"grad_norm": 3.9821598438730565, |
|
"learning_rate": 4.7971733478111094e-08, |
|
"logits/chosen": -2.5669634342193604, |
|
"logits/rejected": -2.5983939170837402, |
|
"logps/chosen": -384.938720703125, |
|
"logps/rejected": -557.4461669921875, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5989502668380737, |
|
"rewards/margins": 1.8718353509902954, |
|
"rewards/rejected": -3.4707858562469482, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6113408151210868, |
|
"grad_norm": 4.1505854551229735, |
|
"learning_rate": 4.684660164676896e-08, |
|
"logits/chosen": -2.4149627685546875, |
|
"logits/rejected": -2.3984737396240234, |
|
"logps/chosen": -367.2955322265625, |
|
"logps/rejected": -546.0213623046875, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5166963338851929, |
|
"rewards/margins": 1.747159719467163, |
|
"rewards/rejected": -3.2638559341430664, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.616066154754873, |
|
"grad_norm": 4.195319716535671, |
|
"learning_rate": 4.5733455837846325e-08, |
|
"logits/chosen": -2.6863296031951904, |
|
"logits/rejected": -2.740741014480591, |
|
"logps/chosen": -388.5705261230469, |
|
"logps/rejected": -527.72265625, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.659536600112915, |
|
"rewards/margins": 1.501134991645813, |
|
"rewards/rejected": -3.1606712341308594, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.6207914943886592, |
|
"grad_norm": 4.414273407573815, |
|
"learning_rate": 4.4632361726791914e-08, |
|
"logits/chosen": -2.6036033630371094, |
|
"logits/rejected": -2.6751937866210938, |
|
"logps/chosen": -401.4815979003906, |
|
"logps/rejected": -471.8222961425781, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.435232162475586, |
|
"rewards/margins": 1.3340187072753906, |
|
"rewards/rejected": -2.7692506313323975, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.6255168340224455, |
|
"grad_norm": 5.041321789664628, |
|
"learning_rate": 4.354338427800619e-08, |
|
"logits/chosen": -2.6457765102386475, |
|
"logits/rejected": -2.5664923191070557, |
|
"logps/chosen": -325.753662109375, |
|
"logps/rejected": -515.0615844726562, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4971141815185547, |
|
"rewards/margins": 1.5831940174102783, |
|
"rewards/rejected": -3.080307960510254, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.6255168340224455, |
|
"eval_logits/chosen": -2.623720645904541, |
|
"eval_logits/rejected": -2.6295571327209473, |
|
"eval_logps/chosen": -379.5492858886719, |
|
"eval_logps/rejected": -517.3630981445312, |
|
"eval_loss": 0.47301965951919556, |
|
"eval_rewards/accuracies": 0.6382575631141663, |
|
"eval_rewards/chosen": -1.683185338973999, |
|
"eval_rewards/margins": 1.4909465312957764, |
|
"eval_rewards/rejected": -3.1741318702697754, |
|
"eval_runtime": 225.158, |
|
"eval_samples_per_second": 16.237, |
|
"eval_steps_per_second": 0.293, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.6302421736562316, |
|
"grad_norm": 3.9177181536561396, |
|
"learning_rate": 4.246658774100803e-08, |
|
"logits/chosen": -2.5313777923583984, |
|
"logits/rejected": -2.6277084350585938, |
|
"logps/chosen": -407.18621826171875, |
|
"logps/rejected": -496.5007629394531, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.6058672666549683, |
|
"rewards/margins": 1.1191009283065796, |
|
"rewards/rejected": -2.724968194961548, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.6349675132900177, |
|
"grad_norm": 3.841031231021936, |
|
"learning_rate": 4.140203564664421e-08, |
|
"logits/chosen": -2.6209938526153564, |
|
"logits/rejected": -2.650219678878784, |
|
"logps/chosen": -361.2805480957031, |
|
"logps/rejected": -478.2210998535156, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4036047458648682, |
|
"rewards/margins": 1.4318450689315796, |
|
"rewards/rejected": -2.8354499340057373, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.6396928529238037, |
|
"grad_norm": 3.522512280432432, |
|
"learning_rate": 4.0349790803341274e-08, |
|
"logits/chosen": -2.8207521438598633, |
|
"logits/rejected": -2.7012057304382324, |
|
"logps/chosen": -348.5565490722656, |
|
"logps/rejected": -503.2954406738281, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.5178475379943848, |
|
"rewards/margins": 1.5804749727249146, |
|
"rewards/rejected": -3.0983223915100098, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.64441819255759, |
|
"grad_norm": 4.0111825885134245, |
|
"learning_rate": 3.930991529339936e-08, |
|
"logits/chosen": -2.629011631011963, |
|
"logits/rejected": -2.583953380584717, |
|
"logps/chosen": -376.8917236328125, |
|
"logps/rejected": -623.347412109375, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.57673978805542, |
|
"rewards/margins": 2.396878719329834, |
|
"rewards/rejected": -3.973618268966675, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.6491435321913763, |
|
"grad_norm": 4.471413252688273, |
|
"learning_rate": 3.828247046932992e-08, |
|
"logits/chosen": -2.6523804664611816, |
|
"logits/rejected": -2.6967380046844482, |
|
"logps/chosen": -339.6617126464844, |
|
"logps/rejected": -441.6883239746094, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.344817876815796, |
|
"rewards/margins": 1.1860837936401367, |
|
"rewards/rejected": -2.5309014320373535, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.6538688718251624, |
|
"grad_norm": 3.9015948159102902, |
|
"learning_rate": 3.7267516950235525e-08, |
|
"logits/chosen": -2.590344190597534, |
|
"logits/rejected": -2.6862494945526123, |
|
"logps/chosen": -347.7746887207031, |
|
"logps/rejected": -500.335693359375, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.4618185758590698, |
|
"rewards/margins": 1.6277704238891602, |
|
"rewards/rejected": -3.0895891189575195, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.6585942114589485, |
|
"grad_norm": 4.642097450354513, |
|
"learning_rate": 3.62651146182334e-08, |
|
"logits/chosen": -2.656710624694824, |
|
"logits/rejected": -2.673964738845825, |
|
"logps/chosen": -364.386962890625, |
|
"logps/rejected": -458.63348388671875, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.533918023109436, |
|
"rewards/margins": 1.1320774555206299, |
|
"rewards/rejected": -2.6659955978393555, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.6633195510927348, |
|
"grad_norm": 4.219548915754237, |
|
"learning_rate": 3.527532261492272e-08, |
|
"logits/chosen": -2.572221517562866, |
|
"logits/rejected": -2.526576042175293, |
|
"logps/chosen": -370.37396240234375, |
|
"logps/rejected": -487.1659240722656, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.4427790641784668, |
|
"rewards/margins": 1.223225712776184, |
|
"rewards/rejected": -2.6660046577453613, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.668044890726521, |
|
"grad_norm": 4.167493075518676, |
|
"learning_rate": 3.4298199337894685e-08, |
|
"logits/chosen": -2.6304097175598145, |
|
"logits/rejected": -2.6287808418273926, |
|
"logps/chosen": -387.1392822265625, |
|
"logps/rejected": -581.6126708984375, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.8162624835968018, |
|
"rewards/margins": 2.150383234024048, |
|
"rewards/rejected": -3.9666457176208496, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.6727702303603071, |
|
"grad_norm": 5.897960463157494, |
|
"learning_rate": 3.333380243728773e-08, |
|
"logits/chosen": -2.4372665882110596, |
|
"logits/rejected": -2.5481488704681396, |
|
"logps/chosen": -380.5074157714844, |
|
"logps/rejected": -474.3066711425781, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3929729461669922, |
|
"rewards/margins": 1.5269252061843872, |
|
"rewards/rejected": -2.919898271560669, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.6774955699940932, |
|
"grad_norm": 4.6734891602402, |
|
"learning_rate": 3.238218881238558e-08, |
|
"logits/chosen": -2.68146014213562, |
|
"logits/rejected": -2.764531135559082, |
|
"logps/chosen": -385.89788818359375, |
|
"logps/rejected": -453.99371337890625, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.5357441902160645, |
|
"rewards/margins": 1.310141921043396, |
|
"rewards/rejected": -2.845885992050171, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.6822209096278795, |
|
"grad_norm": 3.9989351330319503, |
|
"learning_rate": 3.1443414608260526e-08, |
|
"logits/chosen": -2.7942676544189453, |
|
"logits/rejected": -2.7728328704833984, |
|
"logps/chosen": -417.6398010253906, |
|
"logps/rejected": -534.0235595703125, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.648948073387146, |
|
"rewards/margins": 1.4638010263442993, |
|
"rewards/rejected": -3.1127490997314453, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.6869462492616658, |
|
"grad_norm": 4.114134264165304, |
|
"learning_rate": 3.0517535212460946e-08, |
|
"logits/chosen": -2.6477482318878174, |
|
"logits/rejected": -2.7860238552093506, |
|
"logps/chosen": -470.87103271484375, |
|
"logps/rejected": -513.6318969726562, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.568166732788086, |
|
"rewards/margins": 0.9764127731323242, |
|
"rewards/rejected": -2.5445797443389893, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.6916715888954519, |
|
"grad_norm": 4.425834666691968, |
|
"learning_rate": 2.960460525174313e-08, |
|
"logits/chosen": -2.8916306495666504, |
|
"logits/rejected": -2.853088855743408, |
|
"logps/chosen": -339.81610107421875, |
|
"logps/rejected": -483.9287109375, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.479756236076355, |
|
"rewards/margins": 1.5092005729675293, |
|
"rewards/rejected": -2.9889566898345947, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.696396928529238, |
|
"grad_norm": 4.184257952905371, |
|
"learning_rate": 2.8704678588848535e-08, |
|
"logits/chosen": -2.52712345123291, |
|
"logits/rejected": -2.5050594806671143, |
|
"logps/chosen": -368.9576416015625, |
|
"logps/rejected": -528.0908813476562, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.523732304573059, |
|
"rewards/margins": 1.6147029399871826, |
|
"rewards/rejected": -3.1384353637695312, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.7011222681630243, |
|
"grad_norm": 4.172727890906571, |
|
"learning_rate": 2.781780831932595e-08, |
|
"logits/chosen": -2.6111361980438232, |
|
"logits/rejected": -2.634065628051758, |
|
"logps/chosen": -437.898681640625, |
|
"logps/rejected": -492.3134460449219, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5102522373199463, |
|
"rewards/margins": 0.8861820697784424, |
|
"rewards/rejected": -2.3964343070983887, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7058476077968105, |
|
"grad_norm": 3.6954976996048376, |
|
"learning_rate": 2.6944046768398565e-08, |
|
"logits/chosen": -2.5292959213256836, |
|
"logits/rejected": -2.5336508750915527, |
|
"logps/chosen": -348.8612060546875, |
|
"logps/rejected": -474.258056640625, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.3767406940460205, |
|
"rewards/margins": 1.3499197959899902, |
|
"rewards/rejected": -2.7266602516174316, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.7105729474305966, |
|
"grad_norm": 4.557110158231638, |
|
"learning_rate": 2.608344548787722e-08, |
|
"logits/chosen": -2.493603229522705, |
|
"logits/rejected": -2.6656622886657715, |
|
"logps/chosen": -443.8504638671875, |
|
"logps/rejected": -544.0814819335938, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7300480604171753, |
|
"rewards/margins": 1.5911731719970703, |
|
"rewards/rejected": -3.321221113204956, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.7152982870643827, |
|
"grad_norm": 4.456300726365723, |
|
"learning_rate": 2.523605525311842e-08, |
|
"logits/chosen": -2.5634899139404297, |
|
"logits/rejected": -2.5102734565734863, |
|
"logps/chosen": -380.3689270019531, |
|
"logps/rejected": -480.8353271484375, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.3825726509094238, |
|
"rewards/margins": 0.8929464221000671, |
|
"rewards/rejected": -2.2755191326141357, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.7200236266981688, |
|
"grad_norm": 4.384534987558066, |
|
"learning_rate": 2.440192606002889e-08, |
|
"logits/chosen": -2.7241060733795166, |
|
"logits/rejected": -2.700328826904297, |
|
"logps/chosen": -400.6265869140625, |
|
"logps/rejected": -509.753173828125, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.817798137664795, |
|
"rewards/margins": 1.4839262962341309, |
|
"rewards/rejected": -3.3017241954803467, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.724748966331955, |
|
"grad_norm": 4.665836153801188, |
|
"learning_rate": 2.3581107122115723e-08, |
|
"logits/chosen": -2.7754364013671875, |
|
"logits/rejected": -2.7966537475585938, |
|
"logps/chosen": -398.6808166503906, |
|
"logps/rejected": -486.95263671875, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.9782556295394897, |
|
"rewards/margins": 1.371333360671997, |
|
"rewards/rejected": -3.3495888710021973, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.7294743059657414, |
|
"grad_norm": 4.147614147053133, |
|
"learning_rate": 2.2773646867582763e-08, |
|
"logits/chosen": -2.626425266265869, |
|
"logits/rejected": -2.57529354095459, |
|
"logps/chosen": -398.737548828125, |
|
"logps/rejected": -586.5382690429688, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.5953752994537354, |
|
"rewards/margins": 1.6417709589004517, |
|
"rewards/rejected": -3.2371463775634766, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.7341996455995274, |
|
"grad_norm": 4.0997514353208775, |
|
"learning_rate": 2.19795929364735e-08, |
|
"logits/chosen": -2.473259449005127, |
|
"logits/rejected": -2.6184892654418945, |
|
"logps/chosen": -344.3788757324219, |
|
"logps/rejected": -420.8124694824219, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3588237762451172, |
|
"rewards/margins": 1.1373924016952515, |
|
"rewards/rejected": -2.496216058731079, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.7389249852333135, |
|
"grad_norm": 4.374774389574994, |
|
"learning_rate": 2.119899217785995e-08, |
|
"logits/chosen": -2.467965841293335, |
|
"logits/rejected": -2.5698554515838623, |
|
"logps/chosen": -387.9084777832031, |
|
"logps/rejected": -457.1981201171875, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.393808364868164, |
|
"rewards/margins": 1.2519505023956299, |
|
"rewards/rejected": -2.645759105682373, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.7436503248670998, |
|
"grad_norm": 4.049649530225524, |
|
"learning_rate": 2.0431890647079093e-08, |
|
"logits/chosen": -2.407700538635254, |
|
"logits/rejected": -2.458270788192749, |
|
"logps/chosen": -414.94561767578125, |
|
"logps/rejected": -544.7018432617188, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8057122230529785, |
|
"rewards/margins": 1.333924412727356, |
|
"rewards/rejected": -3.139636754989624, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.748375664500886, |
|
"grad_norm": 4.671069391094125, |
|
"learning_rate": 1.967833360301513e-08, |
|
"logits/chosen": -2.637674331665039, |
|
"logits/rejected": -2.722353458404541, |
|
"logps/chosen": -357.71142578125, |
|
"logps/rejected": -436.8525390625, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.5406451225280762, |
|
"rewards/margins": 1.1408922672271729, |
|
"rewards/rejected": -2.681537389755249, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.7531010041346722, |
|
"grad_norm": 4.49257028234618, |
|
"learning_rate": 1.8938365505429544e-08, |
|
"logits/chosen": -2.710331678390503, |
|
"logits/rejected": -2.7880280017852783, |
|
"logps/chosen": -386.74945068359375, |
|
"logps/rejected": -489.5121154785156, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6727674007415771, |
|
"rewards/margins": 1.33446204662323, |
|
"rewards/rejected": -3.0072293281555176, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.7578263437684583, |
|
"grad_norm": 3.5465325285307228, |
|
"learning_rate": 1.8212030012337704e-08, |
|
"logits/chosen": -2.757737874984741, |
|
"logits/rejected": -2.687244415283203, |
|
"logps/chosen": -360.2991943359375, |
|
"logps/rejected": -512.8646240234375, |
|
"loss": 0.4577, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.4896515607833862, |
|
"rewards/margins": 1.5841938257217407, |
|
"rewards/rejected": -3.0738449096679688, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.7625516834022446, |
|
"grad_norm": 4.417035193749527, |
|
"learning_rate": 1.7499369977433453e-08, |
|
"logits/chosen": -2.6367974281311035, |
|
"logits/rejected": -2.677651882171631, |
|
"logps/chosen": -362.3944396972656, |
|
"logps/rejected": -460.9113464355469, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5859841108322144, |
|
"rewards/margins": 1.0494407415390015, |
|
"rewards/rejected": -2.6354243755340576, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.7672770230360308, |
|
"grad_norm": 4.392962342030237, |
|
"learning_rate": 1.680042744756016e-08, |
|
"logits/chosen": -2.9928336143493652, |
|
"logits/rejected": -2.9165008068084717, |
|
"logps/chosen": -366.8034973144531, |
|
"logps/rejected": -550.0221557617188, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5658485889434814, |
|
"rewards/margins": 1.7793083190917969, |
|
"rewards/rejected": -3.3451569080352783, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.772002362669817, |
|
"grad_norm": 4.856077218434121, |
|
"learning_rate": 1.611524366023062e-08, |
|
"logits/chosen": -2.722025156021118, |
|
"logits/rejected": -2.8585572242736816, |
|
"logps/chosen": -347.9723815917969, |
|
"logps/rejected": -446.63604736328125, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.4827804565429688, |
|
"rewards/margins": 1.0786786079406738, |
|
"rewards/rejected": -2.5614588260650635, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.776727702303603, |
|
"grad_norm": 3.752916160722341, |
|
"learning_rate": 1.544385904119344e-08, |
|
"logits/chosen": -2.913771867752075, |
|
"logits/rejected": -2.971179962158203, |
|
"logps/chosen": -360.3616638183594, |
|
"logps/rejected": -423.8080749511719, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.482804298400879, |
|
"rewards/margins": 1.2081830501556396, |
|
"rewards/rejected": -2.6909875869750977, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.7814530419373893, |
|
"grad_norm": 4.12751400895733, |
|
"learning_rate": 1.4786313202048456e-08, |
|
"logits/chosen": -2.6907248497009277, |
|
"logits/rejected": -2.816317319869995, |
|
"logps/chosen": -403.627685546875, |
|
"logps/rejected": -487.9366760253906, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.6239163875579834, |
|
"rewards/margins": 1.5215915441513062, |
|
"rewards/rejected": -3.1455078125, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.7861783815711756, |
|
"grad_norm": 4.590826584263257, |
|
"learning_rate": 1.4142644937909203e-08, |
|
"logits/chosen": -2.6118569374084473, |
|
"logits/rejected": -2.6019818782806396, |
|
"logps/chosen": -341.8358154296875, |
|
"logps/rejected": -419.81829833984375, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3214576244354248, |
|
"rewards/margins": 0.8901782631874084, |
|
"rewards/rejected": -2.2116360664367676, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.7909037212049617, |
|
"grad_norm": 4.146292704965862, |
|
"learning_rate": 1.351289222511426e-08, |
|
"logits/chosen": -2.610994577407837, |
|
"logits/rejected": -2.78933048248291, |
|
"logps/chosen": -369.8974609375, |
|
"logps/rejected": -480.7877197265625, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5018396377563477, |
|
"rewards/margins": 1.5635069608688354, |
|
"rewards/rejected": -3.0653464794158936, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.7956290608387477, |
|
"grad_norm": 5.1734179405150895, |
|
"learning_rate": 1.2897092218986716e-08, |
|
"logits/chosen": -2.523732900619507, |
|
"logits/rejected": -2.6424248218536377, |
|
"logps/chosen": -425.2537536621094, |
|
"logps/rejected": -560.3030395507812, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5777177810668945, |
|
"rewards/margins": 1.5381261110305786, |
|
"rewards/rejected": -3.1158437728881836, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8003544004725338, |
|
"grad_norm": 3.865564251130276, |
|
"learning_rate": 1.2295281251641698e-08, |
|
"logits/chosen": -2.653510570526123, |
|
"logits/rejected": -2.6763200759887695, |
|
"logps/chosen": -392.48577880859375, |
|
"logps/rejected": -508.2447509765625, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.5738812685012817, |
|
"rewards/margins": 1.344365119934082, |
|
"rewards/rejected": -2.918246269226074, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.80507974010632, |
|
"grad_norm": 3.9007660136930062, |
|
"learning_rate": 1.1707494829843207e-08, |
|
"logits/chosen": -2.4932150840759277, |
|
"logits/rejected": -2.525545835494995, |
|
"logps/chosen": -393.6082763671875, |
|
"logps/rejected": -494.412109375, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.6010535955429077, |
|
"rewards/margins": 1.5001673698425293, |
|
"rewards/rejected": -3.1012210845947266, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.8098050797401064, |
|
"grad_norm": 3.789687566531843, |
|
"learning_rate": 1.1133767632908798e-08, |
|
"logits/chosen": -2.773787021636963, |
|
"logits/rejected": -2.7511181831359863, |
|
"logps/chosen": -374.2195739746094, |
|
"logps/rejected": -590.141357421875, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.784470558166504, |
|
"rewards/margins": 1.8250298500061035, |
|
"rewards/rejected": -3.6095001697540283, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.8145304193738925, |
|
"grad_norm": 4.27781176287506, |
|
"learning_rate": 1.0574133510663747e-08, |
|
"logits/chosen": -2.4717490673065186, |
|
"logits/rejected": -2.580211639404297, |
|
"logps/chosen": -404.38250732421875, |
|
"logps/rejected": -501.7881774902344, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4478615522384644, |
|
"rewards/margins": 1.3333170413970947, |
|
"rewards/rejected": -2.7811787128448486, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.8192557590076786, |
|
"grad_norm": 4.660154426544148, |
|
"learning_rate": 1.0028625481443981e-08, |
|
"logits/chosen": -2.6154394149780273, |
|
"logits/rejected": -2.6079351902008057, |
|
"logps/chosen": -332.8676452636719, |
|
"logps/rejected": -475.0517272949219, |
|
"loss": 0.4503, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.4882500171661377, |
|
"rewards/margins": 1.4799097776412964, |
|
"rewards/rejected": -2.9681599140167236, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.8239810986414648, |
|
"grad_norm": 4.32107981908328, |
|
"learning_rate": 9.497275730147774e-09, |
|
"logits/chosen": -2.57356333732605, |
|
"logits/rejected": -2.566416025161743, |
|
"logps/chosen": -400.1445007324219, |
|
"logps/rejected": -566.9680786132812, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.724095344543457, |
|
"rewards/margins": 1.8485801219940186, |
|
"rewards/rejected": -3.5726757049560547, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.8287064382752511, |
|
"grad_norm": 4.382882347588404, |
|
"learning_rate": 8.980115606337046e-09, |
|
"logits/chosen": -2.744180202484131, |
|
"logits/rejected": -2.6595263481140137, |
|
"logps/chosen": -318.4762268066406, |
|
"logps/rejected": -484.2186584472656, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4589486122131348, |
|
"rewards/margins": 1.453284502029419, |
|
"rewards/rejected": -2.912233352661133, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.8287064382752511, |
|
"eval_logits/chosen": -2.637449264526367, |
|
"eval_logits/rejected": -2.6433615684509277, |
|
"eval_logps/chosen": -379.1832275390625, |
|
"eval_logps/rejected": -518.951416015625, |
|
"eval_loss": 0.4717705249786377, |
|
"eval_rewards/accuracies": 0.6420454382896423, |
|
"eval_rewards/chosen": -1.679525375366211, |
|
"eval_rewards/margins": 1.5104897022247314, |
|
"eval_rewards/rejected": -3.1900153160095215, |
|
"eval_runtime": 226.5578, |
|
"eval_samples_per_second": 16.137, |
|
"eval_steps_per_second": 0.291, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.8334317779090372, |
|
"grad_norm": 4.767778942870907, |
|
"learning_rate": 8.47717562238756e-09, |
|
"logits/chosen": -2.504225254058838, |
|
"logits/rejected": -2.5545616149902344, |
|
"logps/chosen": -355.3597717285156, |
|
"logps/rejected": -519.2000732421875, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5064764022827148, |
|
"rewards/margins": 1.655312418937683, |
|
"rewards/rejected": -3.1617889404296875, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.8381571175428233, |
|
"grad_norm": 4.59007333084006, |
|
"learning_rate": 7.988485451688815e-09, |
|
"logits/chosen": -2.8325204849243164, |
|
"logits/rejected": -2.8123779296875, |
|
"logps/chosen": -341.4407043457031, |
|
"logps/rejected": -501.8046875, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.6452869176864624, |
|
"rewards/margins": 1.5238325595855713, |
|
"rewards/rejected": -3.1691195964813232, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.8428824571766096, |
|
"grad_norm": 4.836517410103638, |
|
"learning_rate": 7.514073926893432e-09, |
|
"logits/chosen": -2.441648006439209, |
|
"logits/rejected": -2.528268814086914, |
|
"logps/chosen": -386.49493408203125, |
|
"logps/rejected": -454.25616455078125, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.5899585485458374, |
|
"rewards/margins": 0.9383600950241089, |
|
"rewards/rejected": -2.5283186435699463, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.8476077968103959, |
|
"grad_norm": 4.204493754201481, |
|
"learning_rate": 7.053969038215674e-09, |
|
"logits/chosen": -2.7649574279785156, |
|
"logits/rejected": -2.6198995113372803, |
|
"logps/chosen": -405.90283203125, |
|
"logps/rejected": -581.5423583984375, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.7321686744689941, |
|
"rewards/margins": 1.7127902507781982, |
|
"rewards/rejected": -3.4449586868286133, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.852333136444182, |
|
"grad_norm": 3.805689590990249, |
|
"learning_rate": 6.608197931780496e-09, |
|
"logits/chosen": -2.5429623126983643, |
|
"logits/rejected": -2.4480888843536377, |
|
"logps/chosen": -373.41375732421875, |
|
"logps/rejected": -588.4783935546875, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4610140323638916, |
|
"rewards/margins": 1.9005025625228882, |
|
"rewards/rejected": -3.3615164756774902, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.857058476077968, |
|
"grad_norm": 4.640569841501701, |
|
"learning_rate": 6.176786908021453e-09, |
|
"logits/chosen": -2.661363363265991, |
|
"logits/rejected": -2.734570026397705, |
|
"logps/chosen": -411.58306884765625, |
|
"logps/rejected": -510.5803527832031, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6014535427093506, |
|
"rewards/margins": 1.3125067949295044, |
|
"rewards/rejected": -2.9139604568481445, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.8617838157117543, |
|
"grad_norm": 4.140728148044369, |
|
"learning_rate": 5.759761420129322e-09, |
|
"logits/chosen": -2.901158332824707, |
|
"logits/rejected": -2.942783832550049, |
|
"logps/chosen": -331.9306945800781, |
|
"logps/rejected": -466.704833984375, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4073690176010132, |
|
"rewards/margins": 1.4787429571151733, |
|
"rewards/rejected": -2.8861119747161865, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.8665091553455406, |
|
"grad_norm": 3.736333802301315, |
|
"learning_rate": 5.357146072550278e-09, |
|
"logits/chosen": -2.4809322357177734, |
|
"logits/rejected": -2.499831438064575, |
|
"logps/chosen": -393.06109619140625, |
|
"logps/rejected": -467.2709655761719, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.567178726196289, |
|
"rewards/margins": 0.7932885885238647, |
|
"rewards/rejected": -2.3604674339294434, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.8712344949793267, |
|
"grad_norm": 4.263089946197189, |
|
"learning_rate": 4.968964619534138e-09, |
|
"logits/chosen": -2.6070809364318848, |
|
"logits/rejected": -2.5771572589874268, |
|
"logps/chosen": -346.75042724609375, |
|
"logps/rejected": -519.9202880859375, |
|
"loss": 0.443, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.525521993637085, |
|
"rewards/margins": 1.564245581626892, |
|
"rewards/rejected": -3.0897674560546875, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.8759598346131128, |
|
"grad_norm": 4.104385303806953, |
|
"learning_rate": 4.595239963733011e-09, |
|
"logits/chosen": -2.834834575653076, |
|
"logits/rejected": -2.8623130321502686, |
|
"logps/chosen": -387.9482727050781, |
|
"logps/rejected": -544.7717895507812, |
|
"loss": 0.427, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8901216983795166, |
|
"rewards/margins": 1.8027396202087402, |
|
"rewards/rejected": -3.6928610801696777, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.8806851742468988, |
|
"grad_norm": 3.9166519315043407, |
|
"learning_rate": 4.2359941548499035e-09, |
|
"logits/chosen": -2.438992977142334, |
|
"logits/rejected": -2.4675240516662598, |
|
"logps/chosen": -377.0653076171875, |
|
"logps/rejected": -579.7406616210938, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.7994550466537476, |
|
"rewards/margins": 1.7710639238357544, |
|
"rewards/rejected": -3.570518970489502, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.8854105138806851, |
|
"grad_norm": 3.980027730791039, |
|
"learning_rate": 3.891248388337847e-09, |
|
"logits/chosen": -2.461378574371338, |
|
"logits/rejected": -2.3893392086029053, |
|
"logps/chosen": -391.1559753417969, |
|
"logps/rejected": -487.46112060546875, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5593876838684082, |
|
"rewards/margins": 1.065595030784607, |
|
"rewards/rejected": -2.6249828338623047, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.8901358535144714, |
|
"grad_norm": 4.768587791597909, |
|
"learning_rate": 3.5610230041494828e-09, |
|
"logits/chosen": -2.3206558227539062, |
|
"logits/rejected": -2.3519835472106934, |
|
"logps/chosen": -392.8856506347656, |
|
"logps/rejected": -517.7825317382812, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5414066314697266, |
|
"rewards/margins": 1.4560898542404175, |
|
"rewards/rejected": -2.9974963665008545, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.8948611931482575, |
|
"grad_norm": 4.155264459662159, |
|
"learning_rate": 3.2453374855367366e-09, |
|
"logits/chosen": -2.830385684967041, |
|
"logits/rejected": -2.893541097640991, |
|
"logps/chosen": -356.2238464355469, |
|
"logps/rejected": -443.7274169921875, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.463138461112976, |
|
"rewards/margins": 1.1921138763427734, |
|
"rewards/rejected": -2.65525221824646, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.8995865327820436, |
|
"grad_norm": 4.0526022624158955, |
|
"learning_rate": 2.9442104579016356e-09, |
|
"logits/chosen": -2.313565731048584, |
|
"logits/rejected": -2.4396870136260986, |
|
"logps/chosen": -441.73736572265625, |
|
"logps/rejected": -458.2696533203125, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.3574057817459106, |
|
"rewards/margins": 0.8890299797058105, |
|
"rewards/rejected": -2.2464358806610107, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.9043118724158299, |
|
"grad_norm": 3.860540669652651, |
|
"learning_rate": 2.657659687697156e-09, |
|
"logits/chosen": -2.6873679161071777, |
|
"logits/rejected": -2.549762725830078, |
|
"logps/chosen": -317.7322082519531, |
|
"logps/rejected": -429.63787841796875, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.5817471742630005, |
|
"rewards/margins": 1.0039292573928833, |
|
"rewards/rejected": -2.5856761932373047, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.9090372120496162, |
|
"grad_norm": 4.53787076168368, |
|
"learning_rate": 2.385702081379143e-09, |
|
"logits/chosen": -2.3574860095977783, |
|
"logits/rejected": -2.427652359008789, |
|
"logps/chosen": -431.2988586425781, |
|
"logps/rejected": -567.0403442382812, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6895103454589844, |
|
"rewards/margins": 1.765432357788086, |
|
"rewards/rejected": -3.4549427032470703, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.9137625516834023, |
|
"grad_norm": 3.735971197718604, |
|
"learning_rate": 2.1283536844087513e-09, |
|
"logits/chosen": -2.5645933151245117, |
|
"logits/rejected": -2.5676231384277344, |
|
"logps/chosen": -362.2431335449219, |
|
"logps/rejected": -531.4500122070312, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.5010064840316772, |
|
"rewards/margins": 1.6250488758087158, |
|
"rewards/rejected": -3.1260552406311035, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.9184878913171883, |
|
"grad_norm": 3.613782865337248, |
|
"learning_rate": 1.885629680305867e-09, |
|
"logits/chosen": -2.569301128387451, |
|
"logits/rejected": -2.568390130996704, |
|
"logps/chosen": -384.6716613769531, |
|
"logps/rejected": -521.9403076171875, |
|
"loss": 0.4467, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.6642777919769287, |
|
"rewards/margins": 1.619908332824707, |
|
"rewards/rejected": -3.284186363220215, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.9232132309509746, |
|
"grad_norm": 4.461343319483535, |
|
"learning_rate": 1.6575443897531294e-09, |
|
"logits/chosen": -2.5552725791931152, |
|
"logits/rejected": -2.436856985092163, |
|
"logps/chosen": -367.128662109375, |
|
"logps/rejected": -545.8114624023438, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6492211818695068, |
|
"rewards/margins": 1.592346429824829, |
|
"rewards/rejected": -3.241567373275757, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.927938570584761, |
|
"grad_norm": 4.454271459686257, |
|
"learning_rate": 1.4441112697511638e-09, |
|
"logits/chosen": -2.6292550563812256, |
|
"logits/rejected": -2.6084656715393066, |
|
"logps/chosen": -421.9637756347656, |
|
"logps/rejected": -506.38897705078125, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6983362436294556, |
|
"rewards/margins": 1.19191312789917, |
|
"rewards/rejected": -2.890249252319336, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.932663910218547, |
|
"grad_norm": 4.002197751374577, |
|
"learning_rate": 1.2453429128245762e-09, |
|
"logits/chosen": -2.5047454833984375, |
|
"logits/rejected": -2.451660633087158, |
|
"logps/chosen": -431.7708740234375, |
|
"logps/rejected": -614.2373657226562, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9678634405136108, |
|
"rewards/margins": 1.840644359588623, |
|
"rewards/rejected": -3.8085079193115234, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.937389249852333, |
|
"grad_norm": 4.283657038062509, |
|
"learning_rate": 1.061251046278938e-09, |
|
"logits/chosen": -2.5947906970977783, |
|
"logits/rejected": -2.700439214706421, |
|
"logps/chosen": -392.77850341796875, |
|
"logps/rejected": -458.53570556640625, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.4087834358215332, |
|
"rewards/margins": 1.301395297050476, |
|
"rewards/rejected": -2.7101786136627197, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.9421145894861194, |
|
"grad_norm": 4.320389688019286, |
|
"learning_rate": 8.918465315088941e-10, |
|
"logits/chosen": -2.529167413711548, |
|
"logits/rejected": -2.595038652420044, |
|
"logps/chosen": -436.0292663574219, |
|
"logps/rejected": -522.2467041015625, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7954521179199219, |
|
"rewards/margins": 1.5213781595230103, |
|
"rewards/rejected": -3.3168301582336426, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.9468399291199054, |
|
"grad_norm": 3.917488632599818, |
|
"learning_rate": 7.371393633574252e-10, |
|
"logits/chosen": -2.508873224258423, |
|
"logits/rejected": -2.5457823276519775, |
|
"logps/chosen": -384.64923095703125, |
|
"logps/rejected": -475.94134521484375, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.4391266107559204, |
|
"rewards/margins": 1.211023211479187, |
|
"rewards/rejected": -2.6501498222351074, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.9515652687536917, |
|
"grad_norm": 3.9351124104785584, |
|
"learning_rate": 5.971386695260705e-10, |
|
"logits/chosen": -2.465684652328491, |
|
"logits/rejected": -2.5066022872924805, |
|
"logps/chosen": -385.0665283203125, |
|
"logps/rejected": -479.13214111328125, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.4245293140411377, |
|
"rewards/margins": 1.249526858329773, |
|
"rewards/rejected": -2.6740562915802, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.9562906083874778, |
|
"grad_norm": 3.704041812352823, |
|
"learning_rate": 4.718527100364134e-10, |
|
"logits/chosen": -2.6545798778533936, |
|
"logits/rejected": -2.6624419689178467, |
|
"logps/chosen": -366.58184814453125, |
|
"logps/rejected": -510.66436767578125, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5549781322479248, |
|
"rewards/margins": 1.5164021253585815, |
|
"rewards/rejected": -3.071380376815796, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.9610159480212639, |
|
"grad_norm": 4.499954109459605, |
|
"learning_rate": 3.6128887674272133e-10, |
|
"logits/chosen": -2.5801494121551514, |
|
"logits/rejected": -2.551297187805176, |
|
"logps/chosen": -360.39935302734375, |
|
"logps/rejected": -539.8038330078125, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6478443145751953, |
|
"rewards/margins": 1.7761938571929932, |
|
"rewards/rejected": -3.4240384101867676, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.9657412876550502, |
|
"grad_norm": 4.539840176257443, |
|
"learning_rate": 2.6545369289587836e-10, |
|
"logits/chosen": -2.506206750869751, |
|
"logits/rejected": -2.729609489440918, |
|
"logps/chosen": -423.645751953125, |
|
"logps/rejected": -465.3453674316406, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2937572002410889, |
|
"rewards/margins": 1.3675696849822998, |
|
"rewards/rejected": -2.6613268852233887, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.9704666272888365, |
|
"grad_norm": 4.977216825161169, |
|
"learning_rate": 1.843528127584981e-10, |
|
"logits/chosen": -2.5777127742767334, |
|
"logits/rejected": -2.611386775970459, |
|
"logps/chosen": -396.95892333984375, |
|
"logps/rejected": -500.09375, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.7503533363342285, |
|
"rewards/margins": 1.2793786525726318, |
|
"rewards/rejected": -3.0297319889068604, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.9751919669226226, |
|
"grad_norm": 4.452268262277202, |
|
"learning_rate": 1.17991021271302e-10, |
|
"logits/chosen": -2.4709465503692627, |
|
"logits/rejected": -2.399839401245117, |
|
"logps/chosen": -379.194091796875, |
|
"logps/rejected": -520.7940063476562, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5808072090148926, |
|
"rewards/margins": 1.5406217575073242, |
|
"rewards/rejected": -3.121428966522217, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.9799173065564086, |
|
"grad_norm": 4.060313383389935, |
|
"learning_rate": 6.637223377078949e-11, |
|
"logits/chosen": -2.822049856185913, |
|
"logits/rejected": -2.800950765609741, |
|
"logps/chosen": -309.0566711425781, |
|
"logps/rejected": -446.6823425292969, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.39582097530365, |
|
"rewards/margins": 1.2124607563018799, |
|
"rewards/rejected": -2.6082818508148193, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.984642646190195, |
|
"grad_norm": 4.291839752698122, |
|
"learning_rate": 2.949949575833943e-11, |
|
"logits/chosen": -2.625284433364868, |
|
"logits/rejected": -2.6156272888183594, |
|
"logps/chosen": -342.9141845703125, |
|
"logps/rejected": -463.14312744140625, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5005218982696533, |
|
"rewards/margins": 1.2724413871765137, |
|
"rewards/rejected": -2.772963047027588, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.9893679858239812, |
|
"grad_norm": 4.75166895203396, |
|
"learning_rate": 7.374982720326217e-12, |
|
"logits/chosen": -2.7700271606445312, |
|
"logits/rejected": -2.7563564777374268, |
|
"logps/chosen": -397.4788818359375, |
|
"logps/rejected": -582.6298828125, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -1.6806347370147705, |
|
"rewards/margins": 1.8375290632247925, |
|
"rewards/rejected": -3.5181639194488525, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.9940933254577673, |
|
"grad_norm": 4.42823795907204, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.568351984024048, |
|
"logits/rejected": -2.728876829147339, |
|
"logps/chosen": -369.7898254394531, |
|
"logps/rejected": -406.9199523925781, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3815803527832031, |
|
"rewards/margins": 0.7950088977813721, |
|
"rewards/rejected": -2.176589250564575, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.9940933254577673, |
|
"step": 422, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5345145690638872, |
|
"train_runtime": 33183.7631, |
|
"train_samples_per_second": 5.711, |
|
"train_steps_per_second": 0.013 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 422, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 43, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|