|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.99562408835174, |
|
"eval_steps": 200, |
|
"global_step": 1797, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 26.02830696105957, |
|
"learning_rate": 9.999523086940423e-06, |
|
"logits/chosen": -1.1374095678329468, |
|
"logits/rejected": -1.1327173709869385, |
|
"logps/chosen": -142.34921264648438, |
|
"logps/rejected": -155.80406188964844, |
|
"loss": 1.5859, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 13.082440376281738, |
|
"rewards/margins": 0.9329651594161987, |
|
"rewards/rejected": 12.14947509765625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 21.4633846282959, |
|
"learning_rate": 9.997817603030276e-06, |
|
"logits/chosen": -1.1412668228149414, |
|
"logits/rejected": -1.1413795948028564, |
|
"logps/chosen": -150.35586547851562, |
|
"logps/rejected": -164.76641845703125, |
|
"loss": 1.786, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 13.009861946105957, |
|
"rewards/margins": 0.8635275959968567, |
|
"rewards/rejected": 12.146333694458008, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 23.037744522094727, |
|
"learning_rate": 9.994471383754724e-06, |
|
"logits/chosen": -1.1316417455673218, |
|
"logits/rejected": -1.130651831626892, |
|
"logps/chosen": -151.8024444580078, |
|
"logps/rejected": -168.8718719482422, |
|
"loss": 1.4256, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 13.176553726196289, |
|
"rewards/margins": 1.613856554031372, |
|
"rewards/rejected": 11.562695503234863, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 14.40039348602295, |
|
"learning_rate": 9.990154489175436e-06, |
|
"logits/chosen": -1.1126848459243774, |
|
"logits/rejected": -1.1085669994354248, |
|
"logps/chosen": -149.564697265625, |
|
"logps/rejected": -167.00857543945312, |
|
"loss": 1.5826, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 12.614360809326172, |
|
"rewards/margins": 1.426288366317749, |
|
"rewards/rejected": 11.18807315826416, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 30.87997817993164, |
|
"learning_rate": 9.983908955774398e-06, |
|
"logits/chosen": -1.121843934059143, |
|
"logits/rejected": -1.124288558959961, |
|
"logps/chosen": -148.73220825195312, |
|
"logps/rejected": -175.08212280273438, |
|
"loss": 1.4624, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 12.753301620483398, |
|
"rewards/margins": 1.8802299499511719, |
|
"rewards/rejected": 10.873071670532227, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 23.71029281616211, |
|
"learning_rate": 9.976140032846158e-06, |
|
"logits/chosen": -1.154342532157898, |
|
"logits/rejected": -1.1551573276519775, |
|
"logps/chosen": -148.70213317871094, |
|
"logps/rejected": -169.8200225830078, |
|
"loss": 1.5662, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 12.446992874145508, |
|
"rewards/margins": 1.6254791021347046, |
|
"rewards/rejected": 10.821515083312988, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 27.385414123535156, |
|
"learning_rate": 9.966850095052043e-06, |
|
"logits/chosen": -1.1726210117340088, |
|
"logits/rejected": -1.1690549850463867, |
|
"logps/chosen": -150.37200927734375, |
|
"logps/rejected": -183.83358764648438, |
|
"loss": 1.3566, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 12.22251033782959, |
|
"rewards/margins": 2.0067410469055176, |
|
"rewards/rejected": 10.21576976776123, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 23.488914489746094, |
|
"learning_rate": 9.956041981969192e-06, |
|
"logits/chosen": -1.2129347324371338, |
|
"logits/rejected": -1.2055721282958984, |
|
"logps/chosen": -139.1160125732422, |
|
"logps/rejected": -180.86448669433594, |
|
"loss": 1.3657, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 12.533506393432617, |
|
"rewards/margins": 2.3238863945007324, |
|
"rewards/rejected": 10.209619522094727, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 27.516918182373047, |
|
"learning_rate": 9.943718997222616e-06, |
|
"logits/chosen": -1.2162883281707764, |
|
"logits/rejected": -1.2150709629058838, |
|
"logps/chosen": -153.258544921875, |
|
"logps/rejected": -169.685302734375, |
|
"loss": 1.3773, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 12.494550704956055, |
|
"rewards/margins": 2.459848403930664, |
|
"rewards/rejected": 10.034701347351074, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 30.299776077270508, |
|
"learning_rate": 9.929884907475405e-06, |
|
"logits/chosen": -1.2348403930664062, |
|
"logits/rejected": -1.2337309122085571, |
|
"logps/chosen": -141.0696563720703, |
|
"logps/rejected": -172.27981567382812, |
|
"loss": 1.3082, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 12.868219375610352, |
|
"rewards/margins": 2.447009325027466, |
|
"rewards/rejected": 10.421208381652832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.773354530334473, |
|
"learning_rate": 9.914543941277401e-06, |
|
"logits/chosen": -1.2390023469924927, |
|
"logits/rejected": -1.2259633541107178, |
|
"logps/chosen": -151.1343536376953, |
|
"logps/rejected": -166.46087646484375, |
|
"loss": 1.2816, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 12.653604507446289, |
|
"rewards/margins": 2.4828662872314453, |
|
"rewards/rejected": 10.170738220214844, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 18.021488189697266, |
|
"learning_rate": 9.897700787772703e-06, |
|
"logits/chosen": -1.2141873836517334, |
|
"logits/rejected": -1.212436556816101, |
|
"logps/chosen": -145.79354858398438, |
|
"logps/rejected": -174.75421142578125, |
|
"loss": 1.4305, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 12.540283203125, |
|
"rewards/margins": 2.2728052139282227, |
|
"rewards/rejected": 10.26747989654541, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 14.722694396972656, |
|
"learning_rate": 9.879360595266359e-06, |
|
"logits/chosen": -1.2301462888717651, |
|
"logits/rejected": -1.2223972082138062, |
|
"logps/chosen": -141.97836303710938, |
|
"logps/rejected": -190.95040893554688, |
|
"loss": 1.1746, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 12.985305786132812, |
|
"rewards/margins": 3.339768886566162, |
|
"rewards/rejected": 9.645538330078125, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 19.521358489990234, |
|
"learning_rate": 9.861579077506591e-06, |
|
"logits/chosen": -1.2298915386199951, |
|
"logits/rejected": -1.221884846687317, |
|
"logps/chosen": -148.5527801513672, |
|
"logps/rejected": -180.9561004638672, |
|
"loss": 1.3081, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 12.90058422088623, |
|
"rewards/margins": 2.527609348297119, |
|
"rewards/rejected": 10.372976303100586, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 21.75788688659668, |
|
"learning_rate": 9.84041033194796e-06, |
|
"logits/chosen": -1.1978212594985962, |
|
"logits/rejected": -1.2021763324737549, |
|
"logps/chosen": -149.8433837890625, |
|
"logps/rejected": -183.22714233398438, |
|
"loss": 1.3711, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 12.765899658203125, |
|
"rewards/margins": 2.4878742694854736, |
|
"rewards/rejected": 10.27802562713623, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 31.972171783447266, |
|
"learning_rate": 9.817762058879405e-06, |
|
"logits/chosen": -1.1968965530395508, |
|
"logits/rejected": -1.2041929960250854, |
|
"logps/chosen": -141.44479370117188, |
|
"logps/rejected": -172.67782592773438, |
|
"loss": 1.4901, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 12.295498847961426, |
|
"rewards/margins": 1.9750818014144897, |
|
"rewards/rejected": 10.320415496826172, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.441612243652344, |
|
"learning_rate": 9.793641181008042e-06, |
|
"logits/chosen": -1.1921595335006714, |
|
"logits/rejected": -1.1772682666778564, |
|
"logps/chosen": -158.01004028320312, |
|
"logps/rejected": -174.64419555664062, |
|
"loss": 1.3182, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 11.994952201843262, |
|
"rewards/margins": 3.0716311931610107, |
|
"rewards/rejected": 8.923321723937988, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 23.175006866455078, |
|
"learning_rate": 9.76805507115971e-06, |
|
"logits/chosen": -1.1862366199493408, |
|
"logits/rejected": -1.1827598810195923, |
|
"logps/chosen": -150.92054748535156, |
|
"logps/rejected": -191.04061889648438, |
|
"loss": 1.3317, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 12.52110767364502, |
|
"rewards/margins": 2.7398293018341064, |
|
"rewards/rejected": 9.781278610229492, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 20.24986457824707, |
|
"learning_rate": 9.741011550025385e-06, |
|
"logits/chosen": -1.1844813823699951, |
|
"logits/rejected": -1.1813442707061768, |
|
"logps/chosen": -146.86256408691406, |
|
"logps/rejected": -182.008544921875, |
|
"loss": 1.3589, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 12.942377090454102, |
|
"rewards/margins": 2.665095329284668, |
|
"rewards/rejected": 10.27728271484375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 31.18330955505371, |
|
"learning_rate": 9.71251888377069e-06, |
|
"logits/chosen": -1.198872685432434, |
|
"logits/rejected": -1.195305347442627, |
|
"logps/chosen": -140.82937622070312, |
|
"logps/rejected": -173.56301879882812, |
|
"loss": 1.3287, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 12.571944236755371, |
|
"rewards/margins": 2.418260335922241, |
|
"rewards/rejected": 10.153684616088867, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_logits/chosen": -1.186182975769043, |
|
"eval_logits/rejected": -1.1862589120864868, |
|
"eval_logps/chosen": -142.31016540527344, |
|
"eval_logps/rejected": -167.46771240234375, |
|
"eval_loss": 1.8699389696121216, |
|
"eval_rewards/accuracies": 0.631205677986145, |
|
"eval_rewards/chosen": 9.93428897857666, |
|
"eval_rewards/margins": 0.8060780763626099, |
|
"eval_rewards/rejected": 9.12821102142334, |
|
"eval_runtime": 280.3161, |
|
"eval_samples_per_second": 2.515, |
|
"eval_steps_per_second": 2.515, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 23.425613403320312, |
|
"learning_rate": 9.682585781509243e-06, |
|
"logits/chosen": -1.2091820240020752, |
|
"logits/rejected": -1.2033774852752686, |
|
"logps/chosen": -147.17410278320312, |
|
"logps/rejected": -185.01951599121094, |
|
"loss": 1.1424, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 13.107488632202148, |
|
"rewards/margins": 3.5036494731903076, |
|
"rewards/rejected": 9.603840827941895, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 24.199848175048828, |
|
"learning_rate": 9.651221392640626e-06, |
|
"logits/chosen": -1.2453477382659912, |
|
"logits/rejected": -1.2410730123519897, |
|
"logps/chosen": -142.30599975585938, |
|
"logps/rejected": -187.09365844726562, |
|
"loss": 1.3393, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 12.409756660461426, |
|
"rewards/margins": 2.767655849456787, |
|
"rewards/rejected": 9.642101287841797, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 5.732828140258789, |
|
"learning_rate": 9.618435304053756e-06, |
|
"logits/chosen": -1.2778840065002441, |
|
"logits/rejected": -1.2746034860610962, |
|
"logps/chosen": -135.27261352539062, |
|
"logps/rejected": -201.36862182617188, |
|
"loss": 1.0207, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 13.215780258178711, |
|
"rewards/margins": 4.094855308532715, |
|
"rewards/rejected": 9.120925903320312, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 33.938697814941406, |
|
"learning_rate": 9.584237537196539e-06, |
|
"logits/chosen": -1.264520287513733, |
|
"logits/rejected": -1.2698160409927368, |
|
"logps/chosen": -139.2551727294922, |
|
"logps/rejected": -185.9132537841797, |
|
"loss": 1.3149, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 12.885518074035645, |
|
"rewards/margins": 3.4518959522247314, |
|
"rewards/rejected": 9.433621406555176, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 19.174827575683594, |
|
"learning_rate": 9.548638545012714e-06, |
|
"logits/chosen": -1.2648355960845947, |
|
"logits/rejected": -1.2563896179199219, |
|
"logps/chosen": -143.2262725830078, |
|
"logps/rejected": -177.64321899414062, |
|
"loss": 1.2689, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 12.918134689331055, |
|
"rewards/margins": 3.194645881652832, |
|
"rewards/rejected": 9.723487854003906, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 12.325043678283691, |
|
"learning_rate": 9.511649208746768e-06, |
|
"logits/chosen": -1.2492735385894775, |
|
"logits/rejected": -1.2509915828704834, |
|
"logps/chosen": -140.66622924804688, |
|
"logps/rejected": -186.30052185058594, |
|
"loss": 1.2776, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 13.131103515625, |
|
"rewards/margins": 2.9642741680145264, |
|
"rewards/rejected": 10.166828155517578, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 29.295560836791992, |
|
"learning_rate": 9.473280834617975e-06, |
|
"logits/chosen": -1.2667304277420044, |
|
"logits/rejected": -1.2650549411773682, |
|
"logps/chosen": -143.45948791503906, |
|
"logps/rejected": -180.34130859375, |
|
"loss": 1.3095, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 12.880398750305176, |
|
"rewards/margins": 2.869231939315796, |
|
"rewards/rejected": 10.011167526245117, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 24.3317928314209, |
|
"learning_rate": 9.43354515036451e-06, |
|
"logits/chosen": -1.2491796016693115, |
|
"logits/rejected": -1.2445125579833984, |
|
"logps/chosen": -140.8796844482422, |
|
"logps/rejected": -178.88314819335938, |
|
"loss": 1.3453, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 12.978363037109375, |
|
"rewards/margins": 3.1743595600128174, |
|
"rewards/rejected": 9.80400276184082, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 31.15982437133789, |
|
"learning_rate": 9.392454301658734e-06, |
|
"logits/chosen": -1.2521940469741821, |
|
"logits/rejected": -1.2475926876068115, |
|
"logps/chosen": -150.509765625, |
|
"logps/rejected": -201.6109161376953, |
|
"loss": 1.3024, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 13.243515014648438, |
|
"rewards/margins": 3.826166868209839, |
|
"rewards/rejected": 9.417348861694336, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 21.953458786010742, |
|
"learning_rate": 9.350020848394722e-06, |
|
"logits/chosen": -1.233689308166504, |
|
"logits/rejected": -1.2306249141693115, |
|
"logps/chosen": -142.88519287109375, |
|
"logps/rejected": -176.23739624023438, |
|
"loss": 1.3095, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 12.999292373657227, |
|
"rewards/margins": 3.4122116565704346, |
|
"rewards/rejected": 9.587080955505371, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.4461750984191895, |
|
"learning_rate": 9.306257760849198e-06, |
|
"logits/chosen": -1.243290662765503, |
|
"logits/rejected": -1.238360047340393, |
|
"logps/chosen": -135.43121337890625, |
|
"logps/rejected": -184.3162078857422, |
|
"loss": 1.1376, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 13.632291793823242, |
|
"rewards/margins": 3.4248099327087402, |
|
"rewards/rejected": 10.207484245300293, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 5.681567668914795, |
|
"learning_rate": 9.261178415717006e-06, |
|
"logits/chosen": -1.2789077758789062, |
|
"logits/rejected": -1.2725191116333008, |
|
"logps/chosen": -145.27374267578125, |
|
"logps/rejected": -171.4091339111328, |
|
"loss": 1.1728, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 13.61553955078125, |
|
"rewards/margins": 3.5283050537109375, |
|
"rewards/rejected": 10.087234497070312, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.140364646911621, |
|
"learning_rate": 9.214796592022378e-06, |
|
"logits/chosen": -1.322906732559204, |
|
"logits/rejected": -1.308996558189392, |
|
"logps/chosen": -141.2882537841797, |
|
"logps/rejected": -186.32119750976562, |
|
"loss": 1.1413, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 13.753789901733398, |
|
"rewards/margins": 3.8648266792297363, |
|
"rewards/rejected": 9.88896369934082, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 29.466781616210938, |
|
"learning_rate": 9.167126466907215e-06, |
|
"logits/chosen": -1.333762288093567, |
|
"logits/rejected": -1.33591628074646, |
|
"logps/chosen": -135.62503051757812, |
|
"logps/rejected": -185.78758239746094, |
|
"loss": 1.2402, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 13.778650283813477, |
|
"rewards/margins": 3.8923568725585938, |
|
"rewards/rejected": 9.886293411254883, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 24.269454956054688, |
|
"learning_rate": 9.118182611297665e-06, |
|
"logits/chosen": -1.309342622756958, |
|
"logits/rejected": -1.3070811033248901, |
|
"logps/chosen": -136.98138427734375, |
|
"logps/rejected": -191.55300903320312, |
|
"loss": 1.327, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 13.252286911010742, |
|
"rewards/margins": 3.4166321754455566, |
|
"rewards/rejected": 9.835655212402344, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 45.82386779785156, |
|
"learning_rate": 9.067979985450377e-06, |
|
"logits/chosen": -1.3228009939193726, |
|
"logits/rejected": -1.3171498775482178, |
|
"logps/chosen": -146.18157958984375, |
|
"logps/rejected": -188.90127563476562, |
|
"loss": 1.1041, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 13.495672225952148, |
|
"rewards/margins": 3.9597671031951904, |
|
"rewards/rejected": 9.535904884338379, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 30.16645622253418, |
|
"learning_rate": 9.016533934379697e-06, |
|
"logits/chosen": -1.33194100856781, |
|
"logits/rejected": -1.3219845294952393, |
|
"logps/chosen": -138.00164794921875, |
|
"logps/rejected": -186.89862060546875, |
|
"loss": 1.0528, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 13.405718803405762, |
|
"rewards/margins": 4.230135440826416, |
|
"rewards/rejected": 9.17558479309082, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 23.7780704498291, |
|
"learning_rate": 8.96386018316731e-06, |
|
"logits/chosen": -1.332335114479065, |
|
"logits/rejected": -1.3253917694091797, |
|
"logps/chosen": -135.97877502441406, |
|
"logps/rejected": -190.83480834960938, |
|
"loss": 1.0856, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 14.198583602905273, |
|
"rewards/margins": 4.640327453613281, |
|
"rewards/rejected": 9.558258056640625, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 37.86054229736328, |
|
"learning_rate": 8.909974832155667e-06, |
|
"logits/chosen": -1.3102099895477295, |
|
"logits/rejected": -1.3026459217071533, |
|
"logps/chosen": -145.63626098632812, |
|
"logps/rejected": -176.83547973632812, |
|
"loss": 1.2607, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 13.206341743469238, |
|
"rewards/margins": 3.159071683883667, |
|
"rewards/rejected": 10.047269821166992, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 21.949071884155273, |
|
"learning_rate": 8.854894352026746e-06, |
|
"logits/chosen": -1.2982990741729736, |
|
"logits/rejected": -1.294737696647644, |
|
"logps/chosen": -135.0550537109375, |
|
"logps/rejected": -179.46371459960938, |
|
"loss": 1.1821, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 13.508148193359375, |
|
"rewards/margins": 3.7253730297088623, |
|
"rewards/rejected": 9.78277587890625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -1.2718416452407837, |
|
"eval_logits/rejected": -1.2731894254684448, |
|
"eval_logps/chosen": -142.27450561523438, |
|
"eval_logps/rejected": -166.72564697265625, |
|
"eval_loss": 1.9728976488113403, |
|
"eval_rewards/accuracies": 0.611347496509552, |
|
"eval_rewards/chosen": 9.937856674194336, |
|
"eval_rewards/margins": 0.7354397177696228, |
|
"eval_rewards/rejected": 9.20241641998291, |
|
"eval_runtime": 280.6001, |
|
"eval_samples_per_second": 2.512, |
|
"eval_steps_per_second": 2.512, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 16.105052947998047, |
|
"learning_rate": 8.798635578767584e-06, |
|
"logits/chosen": -1.2835286855697632, |
|
"logits/rejected": -1.2852187156677246, |
|
"logps/chosen": -128.66891479492188, |
|
"logps/rejected": -180.98855590820312, |
|
"loss": 1.2384, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 13.414710998535156, |
|
"rewards/margins": 3.5980567932128906, |
|
"rewards/rejected": 9.81665325164795, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 18.655576705932617, |
|
"learning_rate": 8.74121570852417e-06, |
|
"logits/chosen": -1.304517388343811, |
|
"logits/rejected": -1.295888900756836, |
|
"logps/chosen": -129.7939910888672, |
|
"logps/rejected": -182.28158569335938, |
|
"loss": 1.1389, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 13.784006118774414, |
|
"rewards/margins": 4.219157695770264, |
|
"rewards/rejected": 9.564847946166992, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 21.71282386779785, |
|
"learning_rate": 8.682652292345239e-06, |
|
"logits/chosen": -1.2803471088409424, |
|
"logits/rejected": -1.281240701675415, |
|
"logps/chosen": -126.82594299316406, |
|
"logps/rejected": -187.58602905273438, |
|
"loss": 1.0862, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 13.763422966003418, |
|
"rewards/margins": 4.294766426086426, |
|
"rewards/rejected": 9.468656539916992, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 23.261680603027344, |
|
"learning_rate": 8.622963230817599e-06, |
|
"logits/chosen": -1.3054393529891968, |
|
"logits/rejected": -1.294926404953003, |
|
"logps/chosen": -137.8651580810547, |
|
"logps/rejected": -187.91754150390625, |
|
"loss": 1.0189, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 13.93773078918457, |
|
"rewards/margins": 4.54481840133667, |
|
"rewards/rejected": 9.392911911010742, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 22.61610221862793, |
|
"learning_rate": 8.562166768594592e-06, |
|
"logits/chosen": -1.3260384798049927, |
|
"logits/rejected": -1.313039779663086, |
|
"logps/chosen": -136.37155151367188, |
|
"logps/rejected": -189.9808807373047, |
|
"loss": 1.0986, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 14.760931015014648, |
|
"rewards/margins": 5.300175666809082, |
|
"rewards/rejected": 9.460756301879883, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 44.37038803100586, |
|
"learning_rate": 8.500281488819426e-06, |
|
"logits/chosen": -1.335376501083374, |
|
"logits/rejected": -1.32558012008667, |
|
"logps/chosen": -137.15939331054688, |
|
"logps/rejected": -178.85699462890625, |
|
"loss": 1.25, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 14.302162170410156, |
|
"rewards/margins": 4.735361099243164, |
|
"rewards/rejected": 9.566801071166992, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 15.156902313232422, |
|
"learning_rate": 8.43732630744501e-06, |
|
"logits/chosen": -1.3467152118682861, |
|
"logits/rejected": -1.3411352634429932, |
|
"logps/chosen": -131.86680603027344, |
|
"logps/rejected": -184.54978942871094, |
|
"loss": 1.067, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 14.075284004211426, |
|
"rewards/margins": 4.149069309234619, |
|
"rewards/rejected": 9.926214218139648, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.137179374694824, |
|
"learning_rate": 8.373320467452069e-06, |
|
"logits/chosen": -1.3710681200027466, |
|
"logits/rejected": -1.360769271850586, |
|
"logps/chosen": -129.4867706298828, |
|
"logps/rejected": -187.3372802734375, |
|
"loss": 1.0214, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 14.554537773132324, |
|
"rewards/margins": 5.148342132568359, |
|
"rewards/rejected": 9.406194686889648, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 5.059852600097656, |
|
"learning_rate": 8.308283532967311e-06, |
|
"logits/chosen": -1.3810697793960571, |
|
"logits/rejected": -1.372183918952942, |
|
"logps/chosen": -128.07522583007812, |
|
"logps/rejected": -187.84347534179688, |
|
"loss": 1.0341, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 14.386204719543457, |
|
"rewards/margins": 4.947279930114746, |
|
"rewards/rejected": 9.438923835754395, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 15.475478172302246, |
|
"learning_rate": 8.242235383283433e-06, |
|
"logits/chosen": -1.3918366432189941, |
|
"logits/rejected": -1.379677176475525, |
|
"logps/chosen": -133.6950225830078, |
|
"logps/rejected": -194.3822479248047, |
|
"loss": 0.9832, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 15.278470039367676, |
|
"rewards/margins": 5.975428104400635, |
|
"rewards/rejected": 9.303044319152832, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 17.410987854003906, |
|
"learning_rate": 8.175196206782765e-06, |
|
"logits/chosen": -1.4215304851531982, |
|
"logits/rejected": -1.4245882034301758, |
|
"logps/chosen": -135.43157958984375, |
|
"logps/rejected": -195.17837524414062, |
|
"loss": 1.1563, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 13.64989948272705, |
|
"rewards/margins": 4.778893947601318, |
|
"rewards/rejected": 8.87100601196289, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 10.288193702697754, |
|
"learning_rate": 8.107186494766475e-06, |
|
"logits/chosen": -1.3871448040008545, |
|
"logits/rejected": -1.3849140405654907, |
|
"logps/chosen": -138.03524780273438, |
|
"logps/rejected": -192.06105041503906, |
|
"loss": 1.0753, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 14.112817764282227, |
|
"rewards/margins": 5.05327844619751, |
|
"rewards/rejected": 9.059538841247559, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.09673023223877, |
|
"learning_rate": 8.038227035191152e-06, |
|
"logits/chosen": -1.369144320487976, |
|
"logits/rejected": -1.3611090183258057, |
|
"logps/chosen": -129.99301147460938, |
|
"logps/rejected": -184.04013061523438, |
|
"loss": 0.9557, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 14.619100570678711, |
|
"rewards/margins": 5.349046230316162, |
|
"rewards/rejected": 9.270054817199707, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 10.032170295715332, |
|
"learning_rate": 7.968338906314739e-06, |
|
"logits/chosen": -1.384235143661499, |
|
"logits/rejected": -1.3696062564849854, |
|
"logps/chosen": -135.14321899414062, |
|
"logps/rejected": -188.68896484375, |
|
"loss": 0.9715, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 15.478785514831543, |
|
"rewards/margins": 6.076346397399902, |
|
"rewards/rejected": 9.40243911743164, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 19.764158248901367, |
|
"learning_rate": 7.897543470253708e-06, |
|
"logits/chosen": -1.4087002277374268, |
|
"logits/rejected": -1.4047653675079346, |
|
"logps/chosen": -123.32807922363281, |
|
"logps/rejected": -196.0301971435547, |
|
"loss": 0.987, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 14.471944808959961, |
|
"rewards/margins": 5.295290470123291, |
|
"rewards/rejected": 9.176654815673828, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 40.5106315612793, |
|
"learning_rate": 7.825862366453487e-06, |
|
"logits/chosen": -1.4345886707305908, |
|
"logits/rejected": -1.4347158670425415, |
|
"logps/chosen": -123.09619140625, |
|
"logps/rejected": -191.9667205810547, |
|
"loss": 1.0165, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 14.416943550109863, |
|
"rewards/margins": 5.778631210327148, |
|
"rewards/rejected": 8.638311386108398, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.272564888000488, |
|
"learning_rate": 7.753317505074114e-06, |
|
"logits/chosen": -1.3969998359680176, |
|
"logits/rejected": -1.3935317993164062, |
|
"logps/chosen": -136.36032104492188, |
|
"logps/rejected": -193.97418212890625, |
|
"loss": 1.088, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 14.054471969604492, |
|
"rewards/margins": 5.512935161590576, |
|
"rewards/rejected": 8.541539192199707, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 32.77339172363281, |
|
"learning_rate": 7.679931060293137e-06, |
|
"logits/chosen": -1.3968368768692017, |
|
"logits/rejected": -1.3951635360717773, |
|
"logps/chosen": -127.2842025756836, |
|
"logps/rejected": -192.80587768554688, |
|
"loss": 1.0683, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 14.630516052246094, |
|
"rewards/margins": 5.419614315032959, |
|
"rewards/rejected": 9.210902214050293, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.357986450195312, |
|
"learning_rate": 7.605725463527825e-06, |
|
"logits/chosen": -1.3885825872421265, |
|
"logits/rejected": -1.3792840242385864, |
|
"logps/chosen": -129.8568115234375, |
|
"logps/rejected": -175.72964477539062, |
|
"loss": 1.0435, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 14.813947677612305, |
|
"rewards/margins": 5.173638343811035, |
|
"rewards/rejected": 9.640308380126953, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 14.639739036560059, |
|
"learning_rate": 7.530723396578745e-06, |
|
"logits/chosen": -1.3973591327667236, |
|
"logits/rejected": -1.3902390003204346, |
|
"logps/chosen": -123.96485900878906, |
|
"logps/rejected": -186.21829223632812, |
|
"loss": 0.9116, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 15.26634693145752, |
|
"rewards/margins": 6.136044025421143, |
|
"rewards/rejected": 9.130304336547852, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.351010799407959, |
|
"eval_logits/rejected": -1.3527151346206665, |
|
"eval_logps/chosen": -143.65618896484375, |
|
"eval_logps/rejected": -169.28347778320312, |
|
"eval_loss": 1.9454625844955444, |
|
"eval_rewards/accuracies": 0.6482269763946533, |
|
"eval_rewards/chosen": 9.799687385559082, |
|
"eval_rewards/margins": 0.8530532121658325, |
|
"eval_rewards/rejected": 8.946634292602539, |
|
"eval_runtime": 280.7129, |
|
"eval_samples_per_second": 2.511, |
|
"eval_steps_per_second": 2.511, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 4.632735729217529, |
|
"learning_rate": 7.454947784696804e-06, |
|
"logits/chosen": -1.4173977375030518, |
|
"logits/rejected": -1.4061188697814941, |
|
"logps/chosen": -127.29417419433594, |
|
"logps/rejected": -196.90383911132812, |
|
"loss": 0.8744, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 14.873614311218262, |
|
"rewards/margins": 6.611442565917969, |
|
"rewards/rejected": 8.262171745300293, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 9.438977241516113, |
|
"learning_rate": 7.3784217895758804e-06, |
|
"logits/chosen": -1.4528166055679321, |
|
"logits/rejected": -1.4417811632156372, |
|
"logps/chosen": -134.85516357421875, |
|
"logps/rejected": -205.706298828125, |
|
"loss": 0.8906, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 15.327418327331543, |
|
"rewards/margins": 6.7848310470581055, |
|
"rewards/rejected": 8.542585372924805, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 8.289262771606445, |
|
"learning_rate": 7.3011688022731865e-06, |
|
"logits/chosen": -1.4324336051940918, |
|
"logits/rejected": -1.420256495475769, |
|
"logps/chosen": -129.654052734375, |
|
"logps/rejected": -184.8654327392578, |
|
"loss": 0.8893, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 14.612588882446289, |
|
"rewards/margins": 5.854708671569824, |
|
"rewards/rejected": 8.757880210876465, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 8.663153648376465, |
|
"learning_rate": 7.2232124360595205e-06, |
|
"logits/chosen": -1.455397605895996, |
|
"logits/rejected": -1.4463832378387451, |
|
"logps/chosen": -127.48155212402344, |
|
"logps/rejected": -205.6735382080078, |
|
"loss": 0.7873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.274075508117676, |
|
"rewards/margins": 6.838622093200684, |
|
"rewards/rejected": 8.435453414916992, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 6.434426307678223, |
|
"learning_rate": 7.144576519201595e-06, |
|
"logits/chosen": -1.4524507522583008, |
|
"logits/rejected": -1.437585711479187, |
|
"logps/chosen": -122.5464859008789, |
|
"logps/rejected": -193.6874542236328, |
|
"loss": 0.8148, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 15.566122055053711, |
|
"rewards/margins": 7.372010231018066, |
|
"rewards/rejected": 8.194112777709961, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 4.037100791931152, |
|
"learning_rate": 7.0652850876786485e-06, |
|
"logits/chosen": -1.4568301439285278, |
|
"logits/rejected": -1.4512048959732056, |
|
"logps/chosen": -109.03157043457031, |
|
"logps/rejected": -203.418212890625, |
|
"loss": 0.7832, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 15.715448379516602, |
|
"rewards/margins": 7.003268241882324, |
|
"rewards/rejected": 8.712181091308594, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 19.28900909423828, |
|
"learning_rate": 6.9853623778355805e-06, |
|
"logits/chosen": -1.4528229236602783, |
|
"logits/rejected": -1.441767692565918, |
|
"logps/chosen": -124.41324615478516, |
|
"logps/rejected": -183.64224243164062, |
|
"loss": 0.8512, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 15.778039932250977, |
|
"rewards/margins": 6.944033622741699, |
|
"rewards/rejected": 8.834006309509277, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 5.148800373077393, |
|
"learning_rate": 6.904832818974818e-06, |
|
"logits/chosen": -1.461948037147522, |
|
"logits/rejected": -1.4578710794448853, |
|
"logps/chosen": -112.7221450805664, |
|
"logps/rejected": -195.2073211669922, |
|
"loss": 0.8077, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 15.100164413452148, |
|
"rewards/margins": 6.574339866638184, |
|
"rewards/rejected": 8.525824546813965, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 8.881467819213867, |
|
"learning_rate": 6.823721025889227e-06, |
|
"logits/chosen": -1.4419883489608765, |
|
"logits/rejected": -1.4434632062911987, |
|
"logps/chosen": -120.42142486572266, |
|
"logps/rejected": -187.1098175048828, |
|
"loss": 0.8814, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 15.091572761535645, |
|
"rewards/margins": 6.394684791564941, |
|
"rewards/rejected": 8.696887016296387, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 4.702162265777588, |
|
"learning_rate": 6.742051791338305e-06, |
|
"logits/chosen": -1.4430066347122192, |
|
"logits/rejected": -1.4336270093917847, |
|
"logps/chosen": -124.91837310791016, |
|
"logps/rejected": -190.82229614257812, |
|
"loss": 0.8541, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 16.073802947998047, |
|
"rewards/margins": 7.100152015686035, |
|
"rewards/rejected": 8.973649978637695, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 16.544490814208984, |
|
"learning_rate": 6.6598500784700016e-06, |
|
"logits/chosen": -1.437310814857483, |
|
"logits/rejected": -1.439613938331604, |
|
"logps/chosen": -116.21219635009766, |
|
"logps/rejected": -190.1880340576172, |
|
"loss": 0.8385, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 14.853405952453613, |
|
"rewards/margins": 6.005455493927002, |
|
"rewards/rejected": 8.84795093536377, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 4.642127513885498, |
|
"learning_rate": 6.577141013190428e-06, |
|
"logits/chosen": -1.474867582321167, |
|
"logits/rejected": -1.4651817083358765, |
|
"logps/chosen": -114.55826568603516, |
|
"logps/rejected": -191.57504272460938, |
|
"loss": 0.7852, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 15.868762016296387, |
|
"rewards/margins": 7.344477653503418, |
|
"rewards/rejected": 8.524284362792969, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 5.04547119140625, |
|
"learning_rate": 6.493949876483841e-06, |
|
"logits/chosen": -1.4716556072235107, |
|
"logits/rejected": -1.4789526462554932, |
|
"logps/chosen": -114.3637466430664, |
|
"logps/rejected": -195.8820037841797, |
|
"loss": 0.8683, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 15.210566520690918, |
|
"rewards/margins": 7.015016078948975, |
|
"rewards/rejected": 8.195551872253418, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 8.906220436096191, |
|
"learning_rate": 6.410302096685219e-06, |
|
"logits/chosen": -1.467878818511963, |
|
"logits/rejected": -1.4584242105484009, |
|
"logps/chosen": -113.1087875366211, |
|
"logps/rejected": -196.9852752685547, |
|
"loss": 0.7898, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 15.73388671875, |
|
"rewards/margins": 7.293121337890625, |
|
"rewards/rejected": 8.440766334533691, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 4.600486755371094, |
|
"learning_rate": 6.326223241707787e-06, |
|
"logits/chosen": -1.4887049198150635, |
|
"logits/rejected": -1.4763177633285522, |
|
"logps/chosen": -114.50920104980469, |
|
"logps/rejected": -191.11123657226562, |
|
"loss": 0.7898, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.316104888916016, |
|
"rewards/margins": 7.5335493087768555, |
|
"rewards/rejected": 8.782556533813477, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 28.16501235961914, |
|
"learning_rate": 6.241739011227899e-06, |
|
"logits/chosen": -1.4948675632476807, |
|
"logits/rejected": -1.4884848594665527, |
|
"logps/chosen": -110.34342956542969, |
|
"logps/rejected": -183.22744750976562, |
|
"loss": 0.8029, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 15.722465515136719, |
|
"rewards/margins": 7.071497917175293, |
|
"rewards/rejected": 8.650967597961426, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 8.12157917022705, |
|
"learning_rate": 6.156875228829627e-06, |
|
"logits/chosen": -1.4990284442901611, |
|
"logits/rejected": -1.502752661705017, |
|
"logps/chosen": -124.48286437988281, |
|
"logps/rejected": -205.2565155029297, |
|
"loss": 0.9354, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 15.559919357299805, |
|
"rewards/margins": 7.290531158447266, |
|
"rewards/rejected": 8.269388198852539, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 5.052426338195801, |
|
"learning_rate": 6.071657834111483e-06, |
|
"logits/chosen": -1.4951364994049072, |
|
"logits/rejected": -1.4810194969177246, |
|
"logps/chosen": -115.41800689697266, |
|
"logps/rejected": -189.0443878173828, |
|
"loss": 0.8551, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 16.50579833984375, |
|
"rewards/margins": 7.6818413734436035, |
|
"rewards/rejected": 8.823959350585938, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 20.577198028564453, |
|
"learning_rate": 5.986112874757688e-06, |
|
"logits/chosen": -1.4854376316070557, |
|
"logits/rejected": -1.4871946573257446, |
|
"logps/chosen": -112.67398834228516, |
|
"logps/rejected": -196.98049926757812, |
|
"loss": 0.8269, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 16.0164852142334, |
|
"rewards/margins": 7.493149757385254, |
|
"rewards/rejected": 8.523335456848145, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 14.56539249420166, |
|
"learning_rate": 5.900266498576383e-06, |
|
"logits/chosen": -1.5002418756484985, |
|
"logits/rejected": -1.4961646795272827, |
|
"logps/chosen": -119.12144470214844, |
|
"logps/rejected": -195.09054565429688, |
|
"loss": 0.8412, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 15.817484855651855, |
|
"rewards/margins": 7.278079986572266, |
|
"rewards/rejected": 8.539405822753906, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_logits/chosen": -1.417891502380371, |
|
"eval_logits/rejected": -1.4206050634384155, |
|
"eval_logps/chosen": -146.20431518554688, |
|
"eval_logps/rejected": -173.58306884765625, |
|
"eval_loss": 2.0040743350982666, |
|
"eval_rewards/accuracies": 0.6397163271903992, |
|
"eval_rewards/chosen": 9.544875144958496, |
|
"eval_rewards/margins": 1.0281997919082642, |
|
"eval_rewards/rejected": 8.516674995422363, |
|
"eval_runtime": 280.445, |
|
"eval_samples_per_second": 2.514, |
|
"eval_steps_per_second": 2.514, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 8.800375938415527, |
|
"learning_rate": 5.81414494550726e-06, |
|
"logits/chosen": -1.4927313327789307, |
|
"logits/rejected": -1.486893653869629, |
|
"logps/chosen": -108.54096984863281, |
|
"logps/rejected": -194.3487548828125, |
|
"loss": 0.7899, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 16.199222564697266, |
|
"rewards/margins": 7.905210018157959, |
|
"rewards/rejected": 8.294013977050781, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 12.154976844787598, |
|
"learning_rate": 5.727774539601015e-06, |
|
"logits/chosen": -1.4953235387802124, |
|
"logits/rejected": -1.49058198928833, |
|
"logps/chosen": -105.4122543334961, |
|
"logps/rejected": -205.03518676757812, |
|
"loss": 0.7704, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 15.790075302124023, |
|
"rewards/margins": 7.537489414215088, |
|
"rewards/rejected": 8.252584457397461, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 18.195478439331055, |
|
"learning_rate": 5.641181680973094e-06, |
|
"logits/chosen": -1.5069096088409424, |
|
"logits/rejected": -1.5045548677444458, |
|
"logps/chosen": -107.76765441894531, |
|
"logps/rejected": -185.20993041992188, |
|
"loss": 0.7649, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 15.898587226867676, |
|
"rewards/margins": 7.165400505065918, |
|
"rewards/rejected": 8.733189582824707, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 6.522106647491455, |
|
"learning_rate": 5.554392837734201e-06, |
|
"logits/chosen": -1.4592971801757812, |
|
"logits/rejected": -1.4653236865997314, |
|
"logps/chosen": -126.52349853515625, |
|
"logps/rejected": -200.0361328125, |
|
"loss": 0.9745, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 14.701069831848145, |
|
"rewards/margins": 6.266867160797119, |
|
"rewards/rejected": 8.434202194213867, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 4.361470699310303, |
|
"learning_rate": 5.467434537900002e-06, |
|
"logits/chosen": -1.4889204502105713, |
|
"logits/rejected": -1.4831851720809937, |
|
"logps/chosen": -117.97469329833984, |
|
"logps/rejected": -199.29603576660156, |
|
"loss": 0.8372, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 15.988435745239258, |
|
"rewards/margins": 7.939582824707031, |
|
"rewards/rejected": 8.048852920532227, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 30.419536590576172, |
|
"learning_rate": 5.380333361282537e-06, |
|
"logits/chosen": -1.4820563793182373, |
|
"logits/rejected": -1.4779971837997437, |
|
"logps/chosen": -115.8757553100586, |
|
"logps/rejected": -182.51400756835938, |
|
"loss": 0.9258, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 15.708460807800293, |
|
"rewards/margins": 6.669085502624512, |
|
"rewards/rejected": 9.039376258850098, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 9.19847583770752, |
|
"learning_rate": 5.293115931365793e-06, |
|
"logits/chosen": -1.5117051601409912, |
|
"logits/rejected": -1.5152397155761719, |
|
"logps/chosen": -111.7296371459961, |
|
"logps/rejected": -208.90414428710938, |
|
"loss": 0.8764, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 16.2409725189209, |
|
"rewards/margins": 8.053875923156738, |
|
"rewards/rejected": 8.187097549438477, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 5.0174384117126465, |
|
"learning_rate": 5.20580890716792e-06, |
|
"logits/chosen": -1.5034881830215454, |
|
"logits/rejected": -1.500460147857666, |
|
"logps/chosen": -108.12443542480469, |
|
"logps/rejected": -194.9343719482422, |
|
"loss": 0.7908, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 16.14035415649414, |
|
"rewards/margins": 7.5548906326293945, |
|
"rewards/rejected": 8.585463523864746, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 51.05670928955078, |
|
"learning_rate": 5.118438975092605e-06, |
|
"logits/chosen": -1.503549575805664, |
|
"logits/rejected": -1.5023475885391235, |
|
"logps/chosen": -113.84260559082031, |
|
"logps/rejected": -193.15135192871094, |
|
"loss": 0.8541, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 15.795028686523438, |
|
"rewards/margins": 7.298943519592285, |
|
"rewards/rejected": 8.496085166931152, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 10.384552955627441, |
|
"learning_rate": 5.031032840772048e-06, |
|
"logits/chosen": -1.49759840965271, |
|
"logits/rejected": -1.491120457649231, |
|
"logps/chosen": -110.45916748046875, |
|
"logps/rejected": -189.4463653564453, |
|
"loss": 0.8419, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 16.17422866821289, |
|
"rewards/margins": 7.4651031494140625, |
|
"rewards/rejected": 8.709126472473145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 21.21319007873535, |
|
"learning_rate": 4.943617220904091e-06, |
|
"logits/chosen": -1.5416353940963745, |
|
"logits/rejected": -1.5274879932403564, |
|
"logps/chosen": -113.6543197631836, |
|
"logps/rejected": -192.6654815673828, |
|
"loss": 0.8271, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 17.103158950805664, |
|
"rewards/margins": 9.091134071350098, |
|
"rewards/rejected": 8.01202392578125, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 4.474107265472412, |
|
"learning_rate": 4.856218835085946e-06, |
|
"logits/chosen": -1.5196397304534912, |
|
"logits/rejected": -1.5235029458999634, |
|
"logps/chosen": -102.72175598144531, |
|
"logps/rejected": -212.8190155029297, |
|
"loss": 0.7193, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 16.488685607910156, |
|
"rewards/margins": 8.548524856567383, |
|
"rewards/rejected": 7.940161228179932, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 4.788906097412109, |
|
"learning_rate": 4.768864397647031e-06, |
|
"logits/chosen": -1.5152068138122559, |
|
"logits/rejected": -1.5065648555755615, |
|
"logps/chosen": -101.84674072265625, |
|
"logps/rejected": -219.8702392578125, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.328767776489258, |
|
"rewards/margins": 10.332796096801758, |
|
"rewards/rejected": 6.995970726013184, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 5.230409622192383, |
|
"learning_rate": 4.681580609483436e-06, |
|
"logits/chosen": -1.5237815380096436, |
|
"logits/rejected": -1.5128507614135742, |
|
"logps/chosen": -107.20805358886719, |
|
"logps/rejected": -202.7063751220703, |
|
"loss": 0.7475, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 17.05699920654297, |
|
"rewards/margins": 9.087102890014648, |
|
"rewards/rejected": 7.969895362854004, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 4.067636013031006, |
|
"learning_rate": 4.594394149896481e-06, |
|
"logits/chosen": -1.5245317220687866, |
|
"logits/rejected": -1.5309604406356812, |
|
"logps/chosen": -104.8866958618164, |
|
"logps/rejected": -214.6997528076172, |
|
"loss": 0.7442, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.146936416625977, |
|
"rewards/margins": 9.150843620300293, |
|
"rewards/rejected": 7.996092319488525, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 5.677704811096191, |
|
"learning_rate": 4.507331668437878e-06, |
|
"logits/chosen": -1.524597406387329, |
|
"logits/rejected": -1.5158151388168335, |
|
"logps/chosen": -99.23400115966797, |
|
"logps/rejected": -204.5542755126953, |
|
"loss": 0.7509, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 17.156509399414062, |
|
"rewards/margins": 9.190254211425781, |
|
"rewards/rejected": 7.966255187988281, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 18.698612213134766, |
|
"learning_rate": 4.42041977676399e-06, |
|
"logits/chosen": -1.5082147121429443, |
|
"logits/rejected": -1.51360285282135, |
|
"logps/chosen": -101.81925964355469, |
|
"logps/rejected": -208.90847778320312, |
|
"loss": 0.7403, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 17.100299835205078, |
|
"rewards/margins": 9.655840873718262, |
|
"rewards/rejected": 7.444457054138184, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 8.17164421081543, |
|
"learning_rate": 4.333685040501664e-06, |
|
"logits/chosen": -1.5298702716827393, |
|
"logits/rejected": -1.5242555141448975, |
|
"logps/chosen": -103.73741149902344, |
|
"logps/rejected": -200.9259796142578, |
|
"loss": 0.7967, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 16.915372848510742, |
|
"rewards/margins": 8.870743751525879, |
|
"rewards/rejected": 8.04463005065918, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 11.650801658630371, |
|
"learning_rate": 4.247153971128145e-06, |
|
"logits/chosen": -1.518059492111206, |
|
"logits/rejected": -1.512229323387146, |
|
"logps/chosen": -100.54881286621094, |
|
"logps/rejected": -197.3603515625, |
|
"loss": 0.7513, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.221500396728516, |
|
"rewards/margins": 9.602907180786133, |
|
"rewards/rejected": 7.618594169616699, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 43.56822967529297, |
|
"learning_rate": 4.160853017867531e-06, |
|
"logits/chosen": -1.5179004669189453, |
|
"logits/rejected": -1.5160772800445557, |
|
"logps/chosen": -103.8607177734375, |
|
"logps/rejected": -198.41555786132812, |
|
"loss": 0.7345, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 16.935503005981445, |
|
"rewards/margins": 8.681713104248047, |
|
"rewards/rejected": 8.253790855407715, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": -1.4289880990982056, |
|
"eval_logits/rejected": -1.4324686527252197, |
|
"eval_logps/chosen": -150.1593017578125, |
|
"eval_logps/rejected": -177.23570251464844, |
|
"eval_loss": 2.0659372806549072, |
|
"eval_rewards/accuracies": 0.6425532102584839, |
|
"eval_rewards/chosen": 9.149377822875977, |
|
"eval_rewards/margins": 0.9979680180549622, |
|
"eval_rewards/rejected": 8.151410102844238, |
|
"eval_runtime": 280.4165, |
|
"eval_samples_per_second": 2.514, |
|
"eval_steps_per_second": 2.514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 18.621665954589844, |
|
"learning_rate": 4.074808559606264e-06, |
|
"logits/chosen": -1.5129244327545166, |
|
"logits/rejected": -1.5049433708190918, |
|
"logps/chosen": -100.34537506103516, |
|
"logps/rejected": -216.16506958007812, |
|
"loss": 0.6972, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.9000301361084, |
|
"rewards/margins": 10.329971313476562, |
|
"rewards/rejected": 7.570061683654785, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 38.73393630981445, |
|
"learning_rate": 3.989046896830119e-06, |
|
"logits/chosen": -1.5301315784454346, |
|
"logits/rejected": -1.5370006561279297, |
|
"logps/chosen": -108.65140533447266, |
|
"logps/rejected": -213.85879516601562, |
|
"loss": 0.8484, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 16.508888244628906, |
|
"rewards/margins": 9.253557205200195, |
|
"rewards/rejected": 7.255330562591553, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 5.319835662841797, |
|
"learning_rate": 3.9035942435851504e-06, |
|
"logits/chosen": -1.5252224206924438, |
|
"logits/rejected": -1.5232148170471191, |
|
"logps/chosen": -102.89615631103516, |
|
"logps/rejected": -206.59750366210938, |
|
"loss": 0.7447, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 17.126842498779297, |
|
"rewards/margins": 9.563276290893555, |
|
"rewards/rejected": 7.563568115234375, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 19.18516731262207, |
|
"learning_rate": 3.818476719465073e-06, |
|
"logits/chosen": -1.5133918523788452, |
|
"logits/rejected": -1.503049612045288, |
|
"logps/chosen": -111.7210693359375, |
|
"logps/rejected": -180.65403747558594, |
|
"loss": 0.9205, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 15.973672866821289, |
|
"rewards/margins": 7.1840667724609375, |
|
"rewards/rejected": 8.789606094360352, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 20.20930290222168, |
|
"learning_rate": 3.7337203416274993e-06, |
|
"logits/chosen": -1.5373504161834717, |
|
"logits/rejected": -1.5284373760223389, |
|
"logps/chosen": -98.73445129394531, |
|
"logps/rejected": -208.073486328125, |
|
"loss": 0.7114, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 17.71356773376465, |
|
"rewards/margins": 10.003168106079102, |
|
"rewards/rejected": 7.710400581359863, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 26.072620391845703, |
|
"learning_rate": 3.6493510168414924e-06, |
|
"logits/chosen": -1.5504237413406372, |
|
"logits/rejected": -1.5454633235931396, |
|
"logps/chosen": -104.91600036621094, |
|
"logps/rejected": -212.7382354736328, |
|
"loss": 0.8006, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 16.929195404052734, |
|
"rewards/margins": 10.16772174835205, |
|
"rewards/rejected": 6.761473178863525, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 4.6664628982543945, |
|
"learning_rate": 3.5653945335688688e-06, |
|
"logits/chosen": -1.520021677017212, |
|
"logits/rejected": -1.5187304019927979, |
|
"logps/chosen": -99.18524932861328, |
|
"logps/rejected": -208.15414428710938, |
|
"loss": 0.7484, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.673595428466797, |
|
"rewards/margins": 9.892778396606445, |
|
"rewards/rejected": 7.780816555023193, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 17.390003204345703, |
|
"learning_rate": 3.4818765540816505e-06, |
|
"logits/chosen": -1.5037453174591064, |
|
"logits/rejected": -1.4973242282867432, |
|
"logps/chosen": -120.0849609375, |
|
"logps/rejected": -196.7596893310547, |
|
"loss": 0.8644, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 15.961163520812988, |
|
"rewards/margins": 8.226602554321289, |
|
"rewards/rejected": 7.734560489654541, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 29.988948822021484, |
|
"learning_rate": 3.398822606618095e-06, |
|
"logits/chosen": -1.507930040359497, |
|
"logits/rejected": -1.5118629932403564, |
|
"logps/chosen": -95.96809387207031, |
|
"logps/rejected": -216.37130737304688, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 17.203866958618164, |
|
"rewards/margins": 9.483266830444336, |
|
"rewards/rejected": 7.7205986976623535, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 23.51713752746582, |
|
"learning_rate": 3.3162580775796994e-06, |
|
"logits/chosen": -1.4951313734054565, |
|
"logits/rejected": -1.48732590675354, |
|
"logps/chosen": -102.76708984375, |
|
"logps/rejected": -198.36593627929688, |
|
"loss": 0.8403, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 15.965538024902344, |
|
"rewards/margins": 7.880660057067871, |
|
"rewards/rejected": 8.084877967834473, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 5.080748558044434, |
|
"learning_rate": 3.2342082037715404e-06, |
|
"logits/chosen": -1.4765777587890625, |
|
"logits/rejected": -1.4732264280319214, |
|
"logps/chosen": -101.1002426147461, |
|
"logps/rejected": -198.5289306640625, |
|
"loss": 0.7783, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 16.523101806640625, |
|
"rewards/margins": 8.514945983886719, |
|
"rewards/rejected": 8.008153915405273, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 15.195828437805176, |
|
"learning_rate": 3.1526980646883664e-06, |
|
"logits/chosen": -1.49948251247406, |
|
"logits/rejected": -1.49336838722229, |
|
"logps/chosen": -113.28621673583984, |
|
"logps/rejected": -198.71463012695312, |
|
"loss": 0.8398, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 16.502878189086914, |
|
"rewards/margins": 8.838693618774414, |
|
"rewards/rejected": 7.664183139801025, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 8.84524154663086, |
|
"learning_rate": 3.071752574848747e-06, |
|
"logits/chosen": -1.5089815855026245, |
|
"logits/rejected": -1.5023810863494873, |
|
"logps/chosen": -109.5863265991211, |
|
"logps/rejected": -200.3076934814453, |
|
"loss": 0.7723, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 16.556867599487305, |
|
"rewards/margins": 8.802556037902832, |
|
"rewards/rejected": 7.754312992095947, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 5.309729099273682, |
|
"learning_rate": 2.991396476179671e-06, |
|
"logits/chosen": -1.5207172632217407, |
|
"logits/rejected": -1.5075544118881226, |
|
"logps/chosen": -104.1460189819336, |
|
"logps/rejected": -200.79122924804688, |
|
"loss": 0.7185, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 18.383495330810547, |
|
"rewards/margins": 9.844137191772461, |
|
"rewards/rejected": 8.539360046386719, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 17.16521644592285, |
|
"learning_rate": 2.911654330453882e-06, |
|
"logits/chosen": -1.5120269060134888, |
|
"logits/rejected": -1.5061429738998413, |
|
"logps/chosen": -104.67869567871094, |
|
"logps/rejected": -189.43106079101562, |
|
"loss": 0.8524, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.262327194213867, |
|
"rewards/margins": 8.857732772827148, |
|
"rewards/rejected": 8.404593467712402, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 6.592312812805176, |
|
"learning_rate": 2.8325505117822984e-06, |
|
"logits/chosen": -1.51890230178833, |
|
"logits/rejected": -1.5155048370361328, |
|
"logps/chosen": -101.92094421386719, |
|
"logps/rejected": -207.8151092529297, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 16.95406723022461, |
|
"rewards/margins": 9.166045188903809, |
|
"rewards/rejected": 7.788022518157959, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 14.916762351989746, |
|
"learning_rate": 2.754109199163771e-06, |
|
"logits/chosen": -1.5025979280471802, |
|
"logits/rejected": -1.4987188577651978, |
|
"logps/chosen": -101.9366226196289, |
|
"logps/rejected": -201.5316162109375, |
|
"loss": 0.7587, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 17.186330795288086, |
|
"rewards/margins": 8.913457870483398, |
|
"rewards/rejected": 8.272873878479004, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 39.37679672241211, |
|
"learning_rate": 2.6763543690945004e-06, |
|
"logits/chosen": -1.5020328760147095, |
|
"logits/rejected": -1.4887430667877197, |
|
"logps/chosen": -109.6100845336914, |
|
"logps/rejected": -188.98495483398438, |
|
"loss": 0.913, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 17.317249298095703, |
|
"rewards/margins": 8.822264671325684, |
|
"rewards/rejected": 8.49498462677002, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 3.733832597732544, |
|
"learning_rate": 2.599309788239339e-06, |
|
"logits/chosen": -1.5142863988876343, |
|
"logits/rejected": -1.5111477375030518, |
|
"logps/chosen": -106.55147552490234, |
|
"logps/rejected": -215.3072967529297, |
|
"loss": 0.7224, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.323034286499023, |
|
"rewards/margins": 9.6803560256958, |
|
"rewards/rejected": 7.642678260803223, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 15.505064010620117, |
|
"learning_rate": 2.5229990061672414e-06, |
|
"logits/chosen": -1.5279539823532104, |
|
"logits/rejected": -1.5259180068969727, |
|
"logps/chosen": -97.4522476196289, |
|
"logps/rejected": -206.6697235107422, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.010801315307617, |
|
"rewards/margins": 10.6015043258667, |
|
"rewards/rejected": 7.409295558929443, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.4321023225784302, |
|
"eval_logits/rejected": -1.4358972311019897, |
|
"eval_logps/chosen": -151.3264923095703, |
|
"eval_logps/rejected": -180.62367248535156, |
|
"eval_loss": 2.032116174697876, |
|
"eval_rewards/accuracies": 0.6680850982666016, |
|
"eval_rewards/chosen": 9.032657623291016, |
|
"eval_rewards/margins": 1.22004234790802, |
|
"eval_rewards/rejected": 7.812614917755127, |
|
"eval_runtime": 280.3663, |
|
"eval_samples_per_second": 2.515, |
|
"eval_steps_per_second": 2.515, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 7.859879493713379, |
|
"learning_rate": 2.4474453481530587e-06, |
|
"logits/chosen": -1.5223504304885864, |
|
"logits/rejected": -1.5135400295257568, |
|
"logps/chosen": -93.24683380126953, |
|
"logps/rejected": -219.54849243164062, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.513376235961914, |
|
"rewards/margins": 11.950658798217773, |
|
"rewards/rejected": 6.562716484069824, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 27.701181411743164, |
|
"learning_rate": 2.3726719080478962e-06, |
|
"logits/chosen": -1.509887933731079, |
|
"logits/rejected": -1.5058742761611938, |
|
"logps/chosen": -103.5932388305664, |
|
"logps/rejected": -203.78501892089844, |
|
"loss": 0.7128, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.814315795898438, |
|
"rewards/margins": 9.831649780273438, |
|
"rewards/rejected": 7.982666969299316, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 8.064666748046875, |
|
"learning_rate": 2.298701541220218e-06, |
|
"logits/chosen": -1.5236984491348267, |
|
"logits/rejected": -1.5181363821029663, |
|
"logps/chosen": -100.51972961425781, |
|
"logps/rejected": -202.49575805664062, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.57242202758789, |
|
"rewards/margins": 11.497468948364258, |
|
"rewards/rejected": 7.074953556060791, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 5.775174140930176, |
|
"learning_rate": 2.22555685756983e-06, |
|
"logits/chosen": -1.5132160186767578, |
|
"logits/rejected": -1.503983974456787, |
|
"logps/chosen": -96.10552978515625, |
|
"logps/rejected": -201.612060546875, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.1705322265625, |
|
"rewards/margins": 10.621967315673828, |
|
"rewards/rejected": 7.548564910888672, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 7.816349983215332, |
|
"learning_rate": 2.153260214616915e-06, |
|
"logits/chosen": -1.5382534265518188, |
|
"logits/rejected": -1.5415149927139282, |
|
"logps/chosen": -95.2313461303711, |
|
"logps/rejected": -223.0685577392578, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.566387176513672, |
|
"rewards/margins": 11.582368850708008, |
|
"rewards/rejected": 6.9840192794799805, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 5.086969375610352, |
|
"learning_rate": 2.081833710668181e-06, |
|
"logits/chosen": -1.494619607925415, |
|
"logits/rejected": -1.4864647388458252, |
|
"logps/chosen": -93.64717864990234, |
|
"logps/rejected": -195.05477905273438, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.304344177246094, |
|
"rewards/margins": 10.235904693603516, |
|
"rewards/rejected": 7.068438529968262, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 3.984151840209961, |
|
"learning_rate": 2.0112991780622725e-06, |
|
"logits/chosen": -1.4988569021224976, |
|
"logits/rejected": -1.4943821430206299, |
|
"logps/chosen": -102.37433624267578, |
|
"logps/rejected": -204.2311553955078, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.241689682006836, |
|
"rewards/margins": 10.953558921813965, |
|
"rewards/rejected": 7.288130283355713, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 39.707828521728516, |
|
"learning_rate": 1.9416781764964486e-06, |
|
"logits/chosen": -1.5009758472442627, |
|
"logits/rejected": -1.5007375478744507, |
|
"logps/chosen": -95.72146606445312, |
|
"logps/rejected": -212.9442901611328, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.809816360473633, |
|
"rewards/margins": 10.415254592895508, |
|
"rewards/rejected": 7.394561767578125, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 6.5094475746154785, |
|
"learning_rate": 1.8729919864366292e-06, |
|
"logits/chosen": -1.5389680862426758, |
|
"logits/rejected": -1.5245224237442017, |
|
"logps/chosen": -93.90787506103516, |
|
"logps/rejected": -200.7926788330078, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 18.495624542236328, |
|
"rewards/margins": 10.695076942443848, |
|
"rewards/rejected": 7.800548553466797, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 11.99113655090332, |
|
"learning_rate": 1.8052616026127563e-06, |
|
"logits/chosen": -1.5282859802246094, |
|
"logits/rejected": -1.5253300666809082, |
|
"logps/chosen": -98.3044204711914, |
|
"logps/rejected": -214.7481689453125, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.510263442993164, |
|
"rewards/margins": 10.662416458129883, |
|
"rewards/rejected": 6.847846984863281, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 14.8443021774292, |
|
"learning_rate": 1.7385077276015267e-06, |
|
"logits/chosen": -1.5107916593551636, |
|
"logits/rejected": -1.513163685798645, |
|
"logps/chosen": -92.03627014160156, |
|
"logps/rejected": -213.4009552001953, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 18.635957717895508, |
|
"rewards/margins": 11.570890426635742, |
|
"rewards/rejected": 7.065066337585449, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 12.072715759277344, |
|
"learning_rate": 1.6727507654983977e-06, |
|
"logits/chosen": -1.5300309658050537, |
|
"logits/rejected": -1.523341417312622, |
|
"logps/chosen": -107.01918029785156, |
|
"logps/rejected": -197.11643981933594, |
|
"loss": 0.7373, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.78557014465332, |
|
"rewards/margins": 10.176875114440918, |
|
"rewards/rejected": 7.608694553375244, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 8.86849594116211, |
|
"learning_rate": 1.6080108156808439e-06, |
|
"logits/chosen": -1.5328854322433472, |
|
"logits/rejected": -1.5194957256317139, |
|
"logps/chosen": -100.20330810546875, |
|
"logps/rejected": -195.58236694335938, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.137895584106445, |
|
"rewards/margins": 10.396581649780273, |
|
"rewards/rejected": 7.7413129806518555, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 4.92030143737793, |
|
"learning_rate": 1.5443076666647545e-06, |
|
"logits/chosen": -1.5401328802108765, |
|
"logits/rejected": -1.5302283763885498, |
|
"logps/chosen": -87.4715805053711, |
|
"logps/rejected": -214.174560546875, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 19.330814361572266, |
|
"rewards/margins": 11.920788764953613, |
|
"rewards/rejected": 7.410025596618652, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 5.7385783195495605, |
|
"learning_rate": 1.4816607900558311e-06, |
|
"logits/chosen": -1.5227100849151611, |
|
"logits/rejected": -1.5200421810150146, |
|
"logps/chosen": -87.50262451171875, |
|
"logps/rejected": -214.90176391601562, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.722919464111328, |
|
"rewards/margins": 11.610313415527344, |
|
"rewards/rejected": 7.112607002258301, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 4.156861305236816, |
|
"learning_rate": 1.4200893345978816e-06, |
|
"logits/chosen": -1.5246403217315674, |
|
"logits/rejected": -1.5162007808685303, |
|
"logps/chosen": -95.39898681640625, |
|
"logps/rejected": -208.87509155273438, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.26802635192871, |
|
"rewards/margins": 11.145674705505371, |
|
"rewards/rejected": 7.122353553771973, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 3.7285964488983154, |
|
"learning_rate": 1.3596121203197715e-06, |
|
"logits/chosen": -1.5043359994888306, |
|
"logits/rejected": -1.5017638206481934, |
|
"logps/chosen": -101.5743637084961, |
|
"logps/rejected": -205.45144653320312, |
|
"loss": 0.7345, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.542848587036133, |
|
"rewards/margins": 10.09937858581543, |
|
"rewards/rejected": 7.443469047546387, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 5.035600662231445, |
|
"learning_rate": 1.3002476327828717e-06, |
|
"logits/chosen": -1.5371487140655518, |
|
"logits/rejected": -1.5391342639923096, |
|
"logps/chosen": -102.25202941894531, |
|
"logps/rejected": -217.117919921875, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.24382972717285, |
|
"rewards/margins": 10.82688045501709, |
|
"rewards/rejected": 7.416950225830078, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 10.119084358215332, |
|
"learning_rate": 1.2420140174307267e-06, |
|
"logits/chosen": -1.5054762363433838, |
|
"logits/rejected": -1.5047041177749634, |
|
"logps/chosen": -90.96085357666016, |
|
"logps/rejected": -203.27731323242188, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 16.905033111572266, |
|
"rewards/margins": 9.766073226928711, |
|
"rewards/rejected": 7.1389594078063965, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 4.378362655639648, |
|
"learning_rate": 1.1849290740426994e-06, |
|
"logits/chosen": -1.52398681640625, |
|
"logits/rejected": -1.5253530740737915, |
|
"logps/chosen": -100.94436645507812, |
|
"logps/rejected": -208.01913452148438, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.257160186767578, |
|
"rewards/margins": 10.379103660583496, |
|
"rewards/rejected": 6.878057956695557, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_logits/chosen": -1.4431970119476318, |
|
"eval_logits/rejected": -1.447227120399475, |
|
"eval_logps/chosen": -150.6456756591797, |
|
"eval_logps/rejected": -179.82106018066406, |
|
"eval_loss": 2.0312814712524414, |
|
"eval_rewards/accuracies": 0.6709219813346863, |
|
"eval_rewards/chosen": 9.100737571716309, |
|
"eval_rewards/margins": 1.207862377166748, |
|
"eval_rewards/rejected": 7.892876148223877, |
|
"eval_runtime": 280.087, |
|
"eval_samples_per_second": 2.517, |
|
"eval_steps_per_second": 2.517, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 4.4983320236206055, |
|
"learning_rate": 1.1290102512932482e-06, |
|
"logits/chosen": -1.5456353425979614, |
|
"logits/rejected": -1.5437848567962646, |
|
"logps/chosen": -90.5999984741211, |
|
"logps/rejected": -215.61239624023438, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.774730682373047, |
|
"rewards/margins": 11.802639961242676, |
|
"rewards/rejected": 6.972087860107422, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 8.901042938232422, |
|
"learning_rate": 1.074274641418554e-06, |
|
"logits/chosen": -1.5062071084976196, |
|
"logits/rejected": -1.502605676651001, |
|
"logps/chosen": -93.99595642089844, |
|
"logps/rejected": -201.00819396972656, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.505390167236328, |
|
"rewards/margins": 10.57873249053955, |
|
"rewards/rejected": 6.926657199859619, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 5.139017581939697, |
|
"learning_rate": 1.0207389749920593e-06, |
|
"logits/chosen": -1.509242057800293, |
|
"logits/rejected": -1.511796474456787, |
|
"logps/chosen": -96.8970718383789, |
|
"logps/rejected": -215.5059051513672, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.298181533813477, |
|
"rewards/margins": 10.531153678894043, |
|
"rewards/rejected": 6.767026424407959, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 4.371284484863281, |
|
"learning_rate": 9.68419615810598e-07, |
|
"logits/chosen": -1.5192222595214844, |
|
"logits/rejected": -1.5184710025787354, |
|
"logps/chosen": -92.4468002319336, |
|
"logps/rejected": -208.1323699951172, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.749366760253906, |
|
"rewards/margins": 10.604511260986328, |
|
"rewards/rejected": 7.144855499267578, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 3.970381259918213, |
|
"learning_rate": 9.173325558925905e-07, |
|
"logits/chosen": -1.5145829916000366, |
|
"logits/rejected": -1.51982843875885, |
|
"logps/chosen": -90.59550476074219, |
|
"logps/rejected": -224.2189483642578, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.675228118896484, |
|
"rewards/margins": 11.378229141235352, |
|
"rewards/rejected": 6.296999931335449, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 4.847020626068115, |
|
"learning_rate": 8.674934105899152e-07, |
|
"logits/chosen": -1.4975147247314453, |
|
"logits/rejected": -1.4984054565429688, |
|
"logps/chosen": -95.9659423828125, |
|
"logps/rejected": -205.8800048828125, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.054203033447266, |
|
"rewards/margins": 9.49101448059082, |
|
"rewards/rejected": 7.563189506530762, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 4.591710090637207, |
|
"learning_rate": 8.189174138148814e-07, |
|
"logits/chosen": -1.5397193431854248, |
|
"logits/rejected": -1.5424444675445557, |
|
"logps/chosen": -83.99203491210938, |
|
"logps/rejected": -215.84378051757812, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.956871032714844, |
|
"rewards/margins": 11.750204086303711, |
|
"rewards/rejected": 7.206667900085449, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 5.379372596740723, |
|
"learning_rate": 7.716194133838135e-07, |
|
"logits/chosen": -1.5043582916259766, |
|
"logits/rejected": -1.507912278175354, |
|
"logps/chosen": -94.89381408691406, |
|
"logps/rejected": -209.4828338623047, |
|
"loss": 0.654, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.687734603881836, |
|
"rewards/margins": 10.847681045532227, |
|
"rewards/rejected": 6.840054988861084, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 7.855030536651611, |
|
"learning_rate": 7.256138664786477e-07, |
|
"logits/chosen": -1.5224730968475342, |
|
"logits/rejected": -1.5275871753692627, |
|
"logps/chosen": -83.72583770751953, |
|
"logps/rejected": -211.76937866210938, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.119752883911133, |
|
"rewards/margins": 12.638054847717285, |
|
"rewards/rejected": 6.481698513031006, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 3.8725578784942627, |
|
"learning_rate": 6.809148352279182e-07, |
|
"logits/chosen": -1.5395710468292236, |
|
"logits/rejected": -1.5471025705337524, |
|
"logps/chosen": -90.97738647460938, |
|
"logps/rejected": -220.6120147705078, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.08513641357422, |
|
"rewards/margins": 11.457734107971191, |
|
"rewards/rejected": 6.627403259277344, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 8.233463287353516, |
|
"learning_rate": 6.375359824085126e-07, |
|
"logits/chosen": -1.5111545324325562, |
|
"logits/rejected": -1.5080124139785767, |
|
"logps/chosen": -100.68155670166016, |
|
"logps/rejected": -211.43820190429688, |
|
"loss": 0.7931, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.69210433959961, |
|
"rewards/margins": 10.682944297790527, |
|
"rewards/rejected": 7.009159088134766, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 5.737654209136963, |
|
"learning_rate": 5.954905672694805e-07, |
|
"logits/chosen": -1.5383660793304443, |
|
"logits/rejected": -1.5310518741607666, |
|
"logps/chosen": -91.32364654541016, |
|
"logps/rejected": -210.65103149414062, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.20272445678711, |
|
"rewards/margins": 11.968230247497559, |
|
"rewards/rejected": 7.23449182510376, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 5.602839469909668, |
|
"learning_rate": 5.547914414791922e-07, |
|
"logits/chosen": -1.523057222366333, |
|
"logits/rejected": -1.526564121246338, |
|
"logps/chosen": -92.25067901611328, |
|
"logps/rejected": -210.3281707763672, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.999813079833984, |
|
"rewards/margins": 10.866498947143555, |
|
"rewards/rejected": 7.133312225341797, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 80.35784912109375, |
|
"learning_rate": 5.154510451970762e-07, |
|
"logits/chosen": -1.5100964307785034, |
|
"logits/rejected": -1.5057765245437622, |
|
"logps/chosen": -91.3646469116211, |
|
"logps/rejected": -208.1036834716797, |
|
"loss": 0.698, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.777923583984375, |
|
"rewards/margins": 10.551263809204102, |
|
"rewards/rejected": 7.22666072845459, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 11.723702430725098, |
|
"learning_rate": 4.774814032711461e-07, |
|
"logits/chosen": -1.5164598226547241, |
|
"logits/rejected": -1.5183497667312622, |
|
"logps/chosen": -99.03367614746094, |
|
"logps/rejected": -213.4344024658203, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.00816535949707, |
|
"rewards/margins": 10.20258903503418, |
|
"rewards/rejected": 6.805574893951416, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 10.103087425231934, |
|
"learning_rate": 4.4089412156245793e-07, |
|
"logits/chosen": -1.5356425046920776, |
|
"logits/rejected": -1.5423994064331055, |
|
"logps/chosen": -83.90791320800781, |
|
"logps/rejected": -210.4249267578125, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.552326202392578, |
|
"rewards/margins": 11.089494705200195, |
|
"rewards/rejected": 7.462831020355225, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 8.58123779296875, |
|
"learning_rate": 4.0570038339764803e-07, |
|
"logits/chosen": -1.5257699489593506, |
|
"logits/rejected": -1.521463394165039, |
|
"logps/chosen": -99.25550842285156, |
|
"logps/rejected": -212.4494171142578, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.084367752075195, |
|
"rewards/margins": 11.122998237609863, |
|
"rewards/rejected": 6.961369514465332, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 5.0036845207214355, |
|
"learning_rate": 3.719109461506215e-07, |
|
"logits/chosen": -1.4980933666229248, |
|
"logits/rejected": -1.5047228336334229, |
|
"logps/chosen": -89.56025695800781, |
|
"logps/rejected": -215.61279296875, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.403789520263672, |
|
"rewards/margins": 10.387609481811523, |
|
"rewards/rejected": 7.016180515289307, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 5.2077412605285645, |
|
"learning_rate": 3.3953613795443376e-07, |
|
"logits/chosen": -1.5230729579925537, |
|
"logits/rejected": -1.5227991342544556, |
|
"logps/chosen": -90.34446716308594, |
|
"logps/rejected": -208.3804931640625, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 18.70303726196289, |
|
"rewards/margins": 11.128946304321289, |
|
"rewards/rejected": 7.574089050292969, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 5.617701530456543, |
|
"learning_rate": 3.0858585454437927e-07, |
|
"logits/chosen": -1.508111596107483, |
|
"logits/rejected": -1.5131912231445312, |
|
"logps/chosen": -89.39806365966797, |
|
"logps/rejected": -204.76675415039062, |
|
"loss": 0.615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.604631423950195, |
|
"rewards/margins": 10.715816497802734, |
|
"rewards/rejected": 6.8888139724731445, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_logits/chosen": -1.4370256662368774, |
|
"eval_logits/rejected": -1.4413024187088013, |
|
"eval_logps/chosen": -150.68116760253906, |
|
"eval_logps/rejected": -179.16799926757812, |
|
"eval_loss": 2.051481008529663, |
|
"eval_rewards/accuracies": 0.6624113321304321, |
|
"eval_rewards/chosen": 9.097188949584961, |
|
"eval_rewards/margins": 1.1390060186386108, |
|
"eval_rewards/rejected": 7.958182334899902, |
|
"eval_runtime": 280.5593, |
|
"eval_samples_per_second": 2.513, |
|
"eval_steps_per_second": 2.513, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 18.668004989624023, |
|
"learning_rate": 2.7906955623324074e-07, |
|
"logits/chosen": -1.519481897354126, |
|
"logits/rejected": -1.5189173221588135, |
|
"logps/chosen": -93.53435516357422, |
|
"logps/rejected": -198.51348876953125, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.499284744262695, |
|
"rewards/margins": 10.426948547363281, |
|
"rewards/rejected": 7.072335720062256, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 4.54674768447876, |
|
"learning_rate": 2.509962650196407e-07, |
|
"logits/chosen": -1.5232311487197876, |
|
"logits/rejected": -1.5235909223556519, |
|
"logps/chosen": -89.19126892089844, |
|
"logps/rejected": -211.9619140625, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.46454620361328, |
|
"rewards/margins": 11.837278366088867, |
|
"rewards/rejected": 6.627265930175781, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 13.877330780029297, |
|
"learning_rate": 2.2437456183035833e-07, |
|
"logits/chosen": -1.5302555561065674, |
|
"logits/rejected": -1.5268919467926025, |
|
"logps/chosen": -90.18998718261719, |
|
"logps/rejected": -205.0181121826172, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.42779541015625, |
|
"rewards/margins": 11.21821403503418, |
|
"rewards/rejected": 7.209580898284912, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 7.050842761993408, |
|
"learning_rate": 1.99212583897474e-07, |
|
"logits/chosen": -1.5237982273101807, |
|
"logits/rejected": -1.5170161724090576, |
|
"logps/chosen": -96.49137115478516, |
|
"logps/rejected": -208.6808319091797, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.960681915283203, |
|
"rewards/margins": 11.038247108459473, |
|
"rewards/rejected": 6.9224348068237305, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 9.275837898254395, |
|
"learning_rate": 1.7551802227112558e-07, |
|
"logits/chosen": -1.5040963888168335, |
|
"logits/rejected": -1.5078271627426147, |
|
"logps/chosen": -91.1369857788086, |
|
"logps/rejected": -209.1532745361328, |
|
"loss": 0.622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.7973690032959, |
|
"rewards/margins": 10.730772972106934, |
|
"rewards/rejected": 7.066598415374756, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 5.387999534606934, |
|
"learning_rate": 1.5329811946865392e-07, |
|
"logits/chosen": -1.5108160972595215, |
|
"logits/rejected": -1.5107080936431885, |
|
"logps/chosen": -98.21044921875, |
|
"logps/rejected": -212.44580078125, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.72305679321289, |
|
"rewards/margins": 10.394246101379395, |
|
"rewards/rejected": 7.328810214996338, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 5.4950714111328125, |
|
"learning_rate": 1.3255966726084036e-07, |
|
"logits/chosen": -1.504311442375183, |
|
"logits/rejected": -1.5167784690856934, |
|
"logps/chosen": -87.20539093017578, |
|
"logps/rejected": -200.42111206054688, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.13315200805664, |
|
"rewards/margins": 10.226241111755371, |
|
"rewards/rejected": 6.9069085121154785, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 6.581058979034424, |
|
"learning_rate": 1.1330900459592564e-07, |
|
"logits/chosen": -1.5215296745300293, |
|
"logits/rejected": -1.5147764682769775, |
|
"logps/chosen": -92.18501281738281, |
|
"logps/rejected": -193.42245483398438, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.840869903564453, |
|
"rewards/margins": 10.311830520629883, |
|
"rewards/rejected": 7.529041290283203, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 29.634891510009766, |
|
"learning_rate": 9.55520156620332e-08, |
|
"logits/chosen": -1.5149075984954834, |
|
"logits/rejected": -1.5113269090652466, |
|
"logps/chosen": -92.65069580078125, |
|
"logps/rejected": -197.09310913085938, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.486942291259766, |
|
"rewards/margins": 11.153399467468262, |
|
"rewards/rejected": 7.333543300628662, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 7.716902256011963, |
|
"learning_rate": 7.92941280886056e-08, |
|
"logits/chosen": -1.5236696004867554, |
|
"logits/rejected": -1.5350602865219116, |
|
"logps/chosen": -83.11317443847656, |
|
"logps/rejected": -225.55429077148438, |
|
"loss": 0.619, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.241207122802734, |
|
"rewards/margins": 11.599997520446777, |
|
"rewards/rejected": 6.641209602355957, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 9.752326011657715, |
|
"learning_rate": 6.454031128737881e-08, |
|
"logits/chosen": -1.5312079191207886, |
|
"logits/rejected": -1.5227811336517334, |
|
"logps/chosen": -93.35438537597656, |
|
"logps/rejected": -215.9623260498047, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.262916564941406, |
|
"rewards/margins": 12.349664688110352, |
|
"rewards/rejected": 6.9132513999938965, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 9.797955513000488, |
|
"learning_rate": 5.129507493343011e-08, |
|
"logits/chosen": -1.511307716369629, |
|
"logits/rejected": -1.5151628255844116, |
|
"logps/chosen": -94.07261657714844, |
|
"logps/rejected": -221.2146453857422, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 18.428720474243164, |
|
"rewards/margins": 11.63404369354248, |
|
"rewards/rejected": 6.794674873352051, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 4.7570719718933105, |
|
"learning_rate": 3.956246758674065e-08, |
|
"logits/chosen": -1.5266796350479126, |
|
"logits/rejected": -1.5276660919189453, |
|
"logps/chosen": -96.55259704589844, |
|
"logps/rejected": -218.0394744873047, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.319250106811523, |
|
"rewards/margins": 11.030166625976562, |
|
"rewards/rejected": 7.289083003997803, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 5.806319713592529, |
|
"learning_rate": 2.934607545470536e-08, |
|
"logits/chosen": -1.5195215940475464, |
|
"logits/rejected": -1.5297635793685913, |
|
"logps/chosen": -87.46583557128906, |
|
"logps/rejected": -229.15591430664062, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 18.350963592529297, |
|
"rewards/margins": 11.915371894836426, |
|
"rewards/rejected": 6.4355902671813965, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 4.364658832550049, |
|
"learning_rate": 2.0649021295970906e-08, |
|
"logits/chosen": -1.5325102806091309, |
|
"logits/rejected": -1.5392825603485107, |
|
"logps/chosen": -89.66578674316406, |
|
"logps/rejected": -212.051513671875, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 18.12158966064453, |
|
"rewards/margins": 11.411016464233398, |
|
"rewards/rejected": 6.710572719573975, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 7.178764343261719, |
|
"learning_rate": 1.3473963465924222e-08, |
|
"logits/chosen": -1.5103847980499268, |
|
"logits/rejected": -1.510608434677124, |
|
"logps/chosen": -90.99683380126953, |
|
"logps/rejected": -202.84072875976562, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.887941360473633, |
|
"rewards/margins": 11.204258918762207, |
|
"rewards/rejected": 6.683682441711426, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 9.613419532775879, |
|
"learning_rate": 7.823095104137479e-09, |
|
"logits/chosen": -1.5148423910140991, |
|
"logits/rejected": -1.5170824527740479, |
|
"logps/chosen": -101.44172668457031, |
|
"logps/rejected": -203.09060668945312, |
|
"loss": 0.7437, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.126873016357422, |
|
"rewards/margins": 9.506840705871582, |
|
"rewards/rejected": 7.620034694671631, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 3.819849729537964, |
|
"learning_rate": 3.6981434640093183e-09, |
|
"logits/chosen": -1.5141820907592773, |
|
"logits/rejected": -1.5163359642028809, |
|
"logps/chosen": -91.76555633544922, |
|
"logps/rejected": -208.36703491210938, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.568470001220703, |
|
"rewards/margins": 11.578906059265137, |
|
"rewards/rejected": 6.989563941955566, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 25.042579650878906, |
|
"learning_rate": 1.1003693848093965e-09, |
|
"logits/chosen": -1.5274379253387451, |
|
"logits/rejected": -1.5291308164596558, |
|
"logps/chosen": -102.40494537353516, |
|
"logps/rejected": -220.06063842773438, |
|
"loss": 0.7154, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 17.37801742553711, |
|
"rewards/margins": 10.208986282348633, |
|
"rewards/rejected": 7.169030666351318, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1797, |
|
"total_flos": 8.333096122255933e+17, |
|
"train_loss": 0.8950637202828078, |
|
"train_runtime": 14615.0375, |
|
"train_samples_per_second": 0.985, |
|
"train_steps_per_second": 0.123 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1797, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 8.333096122255933e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|