|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 800000000, |
|
"global_step": 383, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -3.3797154426574707, |
|
"logits/rejected": -3.440782070159912, |
|
"logps/chosen": -244.57943725585938, |
|
"logps/rejected": -168.14312744140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/diff": -0.5416666865348816, |
|
"rewards/diff_abs": 0.5416666865348816, |
|
"rewards/rejected": 0.0, |
|
"rewards/student_margin": 0.0, |
|
"rewards/teacher_margin": 0.5416666865348816, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -3.3581883907318115, |
|
"logits/rejected": -3.306663990020752, |
|
"logps/chosen": -323.6011657714844, |
|
"logps/rejected": -269.5755615234375, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.48148155212402344, |
|
"rewards/chosen": -0.002131123561412096, |
|
"rewards/diff": -2.200160026550293, |
|
"rewards/diff_abs": 2.200160026550293, |
|
"rewards/rejected": -0.0022028146777302027, |
|
"rewards/student_margin": 7.169279706431553e-05, |
|
"rewards/teacher_margin": 2.2002317905426025, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -3.5238196849823, |
|
"logits/rejected": -3.590470552444458, |
|
"logps/chosen": -277.163818359375, |
|
"logps/rejected": -192.54022216796875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5333333015441895, |
|
"rewards/chosen": -0.003379967762157321, |
|
"rewards/diff": -2.1003687381744385, |
|
"rewards/diff_abs": 2.1011030673980713, |
|
"rewards/rejected": -0.007698670029640198, |
|
"rewards/student_margin": 0.004318701568990946, |
|
"rewards/teacher_margin": 2.1046876907348633, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -3.4147961139678955, |
|
"logits/rejected": -3.542013168334961, |
|
"logps/chosen": -301.5926208496094, |
|
"logps/rejected": -231.64608764648438, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 0.017984800040721893, |
|
"rewards/diff": -2.444106101989746, |
|
"rewards/diff_abs": 2.444106101989746, |
|
"rewards/rejected": 0.006361725740134716, |
|
"rewards/student_margin": 0.011623072437942028, |
|
"rewards/teacher_margin": 2.4557292461395264, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 4.99989574668946e-06, |
|
"logits/chosen": -3.4892425537109375, |
|
"logits/rejected": -3.546952486038208, |
|
"logps/chosen": -249.1132049560547, |
|
"logps/rejected": -179.81195068359375, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.36666667461395264, |
|
"rewards/chosen": 0.002312846016138792, |
|
"rewards/diff": -2.8649048805236816, |
|
"rewards/diff_abs": 2.8649048805236816, |
|
"rewards/rejected": 0.019301289692521095, |
|
"rewards/student_margin": -0.016988443210721016, |
|
"rewards/teacher_margin": 2.847916841506958, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 4.987395866955716e-06, |
|
"logits/chosen": -3.387133836746216, |
|
"logits/rejected": -3.533109664916992, |
|
"logps/chosen": -331.1688232421875, |
|
"logps/rejected": -186.29550170898438, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.03838258236646652, |
|
"rewards/diff": -1.9975881576538086, |
|
"rewards/diff_abs": 2.001307487487793, |
|
"rewards/rejected": -0.008821181952953339, |
|
"rewards/student_margin": 0.04720376059412956, |
|
"rewards/teacher_margin": 2.0447916984558105, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 4.954164717534748e-06, |
|
"logits/chosen": -3.346745729446411, |
|
"logits/rejected": -3.3796913623809814, |
|
"logps/chosen": -327.2835388183594, |
|
"logps/rejected": -350.4725036621094, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.46666669845581055, |
|
"rewards/chosen": 0.01055043376982212, |
|
"rewards/diff": -1.2435307502746582, |
|
"rewards/diff_abs": 1.2447913885116577, |
|
"rewards/rejected": 0.049393653869628906, |
|
"rewards/student_margin": -0.038843221962451935, |
|
"rewards/teacher_margin": 1.2046875953674316, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 4.900479264361017e-06, |
|
"logits/chosen": -3.3996708393096924, |
|
"logits/rejected": -3.4439964294433594, |
|
"logps/chosen": -308.00958251953125, |
|
"logps/rejected": -278.58026123046875, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 0.0636972039937973, |
|
"rewards/diff": -1.3036770820617676, |
|
"rewards/diff_abs": 1.306718349456787, |
|
"rewards/rejected": 0.04393672198057175, |
|
"rewards/student_margin": 0.019760485738515854, |
|
"rewards/teacher_margin": 1.3234374523162842, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 4.826786950329646e-06, |
|
"logits/chosen": -3.520059108734131, |
|
"logits/rejected": -3.576214551925659, |
|
"logps/chosen": -283.0509338378906, |
|
"logps/rejected": -180.75180053710938, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": 0.07656367868185043, |
|
"rewards/diff": -1.3558346033096313, |
|
"rewards/diff_abs": 1.3813632726669312, |
|
"rewards/rejected": 0.01052325963973999, |
|
"rewards/student_margin": 0.06604041904211044, |
|
"rewards/teacher_margin": 1.421875, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 4.733701966071226e-06, |
|
"logits/chosen": -3.4589409828186035, |
|
"logits/rejected": -3.511751890182495, |
|
"logps/chosen": -335.0478820800781, |
|
"logps/rejected": -170.95372009277344, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.02129988744854927, |
|
"rewards/diff": -2.89522647857666, |
|
"rewards/diff_abs": 2.9056954383850098, |
|
"rewards/rejected": 0.027463769540190697, |
|
"rewards/student_margin": -0.006163885351270437, |
|
"rewards/teacher_margin": 2.8890626430511475, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 4.622000130963015e-06, |
|
"logits/chosen": -3.4993369579315186, |
|
"logits/rejected": -3.5635037422180176, |
|
"logps/chosen": -305.15899658203125, |
|
"logps/rejected": -202.56192016601562, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.018651207908988, |
|
"rewards/diff": -2.507476329803467, |
|
"rewards/diff_abs": 2.507476329803467, |
|
"rewards/rejected": -0.02908078208565712, |
|
"rewards/student_margin": 0.04773198813199997, |
|
"rewards/teacher_margin": 2.555208444595337, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 4.492612427040864e-06, |
|
"logits/chosen": -3.5523293018341064, |
|
"logits/rejected": -3.6302642822265625, |
|
"logps/chosen": -277.225830078125, |
|
"logps/rejected": -200.26490783691406, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.5999999642372131, |
|
"rewards/chosen": 0.024044061079621315, |
|
"rewards/diff": -1.571934461593628, |
|
"rewards/diff_abs": 1.571934461593628, |
|
"rewards/rejected": -0.08318804949522018, |
|
"rewards/student_margin": 0.10723210871219635, |
|
"rewards/teacher_margin": 1.6791667938232422, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.346617239703676e-06, |
|
"logits/chosen": -3.480700969696045, |
|
"logits/rejected": -3.604377031326294, |
|
"logps/chosen": -304.3082580566406, |
|
"logps/rejected": -239.17782592773438, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 0.10159333795309067, |
|
"rewards/diff": -1.6648391485214233, |
|
"rewards/diff_abs": 1.7264082431793213, |
|
"rewards/rejected": 0.0065366788767278194, |
|
"rewards/student_margin": 0.09505666792392731, |
|
"rewards/teacher_margin": 1.7598956823349, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 4.185231369880461e-06, |
|
"logits/chosen": -3.216306209564209, |
|
"logits/rejected": -3.4205565452575684, |
|
"logps/chosen": -324.16461181640625, |
|
"logps/rejected": -221.4116668701172, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.020539376884698868, |
|
"rewards/diff": -2.5135083198547363, |
|
"rewards/diff_abs": 2.52120041847229, |
|
"rewards/rejected": -0.060743771493434906, |
|
"rewards/student_margin": 0.08128315210342407, |
|
"rewards/teacher_margin": 2.594791889190674, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 4.009799892569317e-06, |
|
"logits/chosen": -3.4796624183654785, |
|
"logits/rejected": -3.4889540672302246, |
|
"logps/chosen": -294.43646240234375, |
|
"logps/rejected": -235.06918334960938, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.09381090849637985, |
|
"rewards/diff": -1.9453909397125244, |
|
"rewards/diff_abs": 1.9707868099212646, |
|
"rewards/rejected": -0.13163167238235474, |
|
"rewards/student_margin": 0.225442573428154, |
|
"rewards/teacher_margin": 2.1708333492279053, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 3.8217849462726334e-06, |
|
"logits/chosen": -3.6116485595703125, |
|
"logits/rejected": -3.564044237136841, |
|
"logps/chosen": -246.2872314453125, |
|
"logps/rejected": -221.53182983398438, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": -0.0010233506327494979, |
|
"rewards/diff": -1.8935142755508423, |
|
"rewards/diff_abs": 1.8935142755508423, |
|
"rewards/rejected": -0.08146756142377853, |
|
"rewards/student_margin": 0.08044421672821045, |
|
"rewards/teacher_margin": 1.9739586114883423, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 3.6227535467632873e-06, |
|
"logits/chosen": -3.4925827980041504, |
|
"logits/rejected": -3.6650619506835938, |
|
"logps/chosen": -441.7289123535156, |
|
"logps/rejected": -258.9549255371094, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09688085317611694, |
|
"rewards/diff": -1.887935996055603, |
|
"rewards/diff_abs": 1.9241327047348022, |
|
"rewards/rejected": -0.09226653724908829, |
|
"rewards/student_margin": 0.18914742767810822, |
|
"rewards/teacher_margin": 2.0770833492279053, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 3.4143645267483144e-06, |
|
"logits/chosen": -3.485863208770752, |
|
"logits/rejected": -3.5399489402770996, |
|
"logps/chosen": -317.8275146484375, |
|
"logps/rejected": -262.54498291015625, |
|
"loss": 0.6253, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.009405359625816345, |
|
"rewards/diff": -2.4229490756988525, |
|
"rewards/diff_abs": 2.4852664470672607, |
|
"rewards/rejected": -0.1302066296339035, |
|
"rewards/student_margin": 0.12080129235982895, |
|
"rewards/teacher_margin": 2.543750047683716, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 3.1983547102818104e-06, |
|
"logits/chosen": -3.4576945304870605, |
|
"logits/rejected": -3.5367603302001953, |
|
"logps/chosen": -356.4649658203125, |
|
"logps/rejected": -292.81439208984375, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.02529655024409294, |
|
"rewards/diff": -1.6506948471069336, |
|
"rewards/diff_abs": 1.7114416360855103, |
|
"rewards/rejected": -0.43606019020080566, |
|
"rewards/student_margin": 0.410763680934906, |
|
"rewards/teacher_margin": 2.0614585876464844, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 2.9765244371567873e-06, |
|
"logits/chosen": -3.4763588905334473, |
|
"logits/rejected": -3.562711715698242, |
|
"logps/chosen": -280.12835693359375, |
|
"logps/rejected": -208.5796661376953, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 0.045582111924886703, |
|
"rewards/diff": -2.346590280532837, |
|
"rewards/diff_abs": 2.3965134620666504, |
|
"rewards/rejected": -0.23230692744255066, |
|
"rewards/student_margin": 0.27788907289505005, |
|
"rewards/teacher_margin": 2.624479055404663, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 2.7507225579233487e-06, |
|
"logits/chosen": -3.7000794410705566, |
|
"logits/rejected": -3.884822368621826, |
|
"logps/chosen": -268.5820617675781, |
|
"logps/rejected": -196.71481323242188, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.0688471719622612, |
|
"rewards/diff": -2.06192946434021, |
|
"rewards/diff_abs": 2.06192946434021, |
|
"rewards/rejected": -0.0957857146859169, |
|
"rewards/student_margin": 0.1646328866481781, |
|
"rewards/teacher_margin": 2.2265625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 2.522831024592615e-06, |
|
"logits/chosen": -3.5710883140563965, |
|
"logits/rejected": -3.746605634689331, |
|
"logps/chosen": -306.7405700683594, |
|
"logps/rejected": -241.92324829101562, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": -0.010941224172711372, |
|
"rewards/diff": -2.189682722091675, |
|
"rewards/diff_abs": 2.2137255668640137, |
|
"rewards/rejected": -0.17959186434745789, |
|
"rewards/student_margin": 0.16865065693855286, |
|
"rewards/teacher_margin": 2.3583333492279053, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 2.2947492054556075e-06, |
|
"logits/chosen": -3.5517051219940186, |
|
"logits/rejected": -3.7514452934265137, |
|
"logps/chosen": -323.0804138183594, |
|
"logps/rejected": -211.4063262939453, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": -0.08047564327716827, |
|
"rewards/diff": -1.5008246898651123, |
|
"rewards/diff_abs": 1.5533117055892944, |
|
"rewards/rejected": -0.24319279193878174, |
|
"rewards/student_margin": 0.16271713376045227, |
|
"rewards/teacher_margin": 1.6635417938232422, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 2.0683780547456666e-06, |
|
"logits/chosen": -3.480419635772705, |
|
"logits/rejected": -3.667999744415283, |
|
"logps/chosen": -314.51739501953125, |
|
"logps/rejected": -293.828125, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 0.07014875113964081, |
|
"rewards/diff": -1.6531155109405518, |
|
"rewards/diff_abs": 1.6864144802093506, |
|
"rewards/rejected": -0.21058997511863708, |
|
"rewards/student_margin": 0.2807387411594391, |
|
"rewards/teacher_margin": 1.933854341506958, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.845604269082787e-06, |
|
"logits/chosen": -3.5570831298828125, |
|
"logits/rejected": -3.799448013305664, |
|
"logps/chosen": -326.3376159667969, |
|
"logps/rejected": -229.31527709960938, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007120040711015463, |
|
"rewards/diff": -2.2296009063720703, |
|
"rewards/diff_abs": 2.2296009063720703, |
|
"rewards/rejected": -0.10494570434093475, |
|
"rewards/student_margin": 0.11206575483083725, |
|
"rewards/teacher_margin": 2.3416669368743896, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 1.628284562748429e-06, |
|
"logits/chosen": -3.538252592086792, |
|
"logits/rejected": -3.9445133209228516, |
|
"logps/chosen": -453.56585693359375, |
|
"logps/rejected": -191.9573516845703, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.1685246229171753, |
|
"rewards/diff": -2.3100411891937256, |
|
"rewards/diff_abs": 2.372875690460205, |
|
"rewards/rejected": -0.21674680709838867, |
|
"rewards/student_margin": 0.38527145981788635, |
|
"rewards/teacher_margin": 2.695312738418579, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 1.4182301928489556e-06, |
|
"logits/chosen": -3.6895079612731934, |
|
"logits/rejected": -3.920624256134033, |
|
"logps/chosen": -319.21429443359375, |
|
"logps/rejected": -181.65463256835938, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.05849825218319893, |
|
"rewards/diff": -2.5624961853027344, |
|
"rewards/diff_abs": 2.587552547454834, |
|
"rewards/rejected": -0.21494324505329132, |
|
"rewards/student_margin": 0.27344149351119995, |
|
"rewards/teacher_margin": 2.835937738418579, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 1.2171918633431623e-06, |
|
"logits/chosen": -3.5318374633789062, |
|
"logits/rejected": -3.4252419471740723, |
|
"logps/chosen": -341.723388671875, |
|
"logps/rejected": -344.2121887207031, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.0180380679666996, |
|
"rewards/diff": -1.8148130178451538, |
|
"rewards/diff_abs": 1.9221045970916748, |
|
"rewards/rejected": -0.31676679849624634, |
|
"rewards/student_margin": 0.2987287640571594, |
|
"rewards/teacher_margin": 2.113541603088379, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.0268451337516774e-06, |
|
"logits/chosen": -3.6931426525115967, |
|
"logits/rejected": -3.896336078643799, |
|
"logps/chosen": -308.007080078125, |
|
"logps/rejected": -167.8982696533203, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.07022675126791, |
|
"rewards/diff": -2.198145627975464, |
|
"rewards/diff_abs": 2.226590394973755, |
|
"rewards/rejected": -0.31322699785232544, |
|
"rewards/student_margin": 0.24300022423267365, |
|
"rewards/teacher_margin": 2.441145896911621, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5, |
|
"learning_rate": 8.487764541597765e-07, |
|
"logits/chosen": -3.497781753540039, |
|
"logits/rejected": -3.8565516471862793, |
|
"logps/chosen": -267.2030944824219, |
|
"logps/rejected": -168.0758819580078, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": -0.04164644330739975, |
|
"rewards/diff": -2.201241970062256, |
|
"rewards/diff_abs": 2.2781121730804443, |
|
"rewards/rejected": -0.2831125855445862, |
|
"rewards/student_margin": 0.24146613478660583, |
|
"rewards/teacher_margin": 2.4427084922790527, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 6.844699429052377e-07, |
|
"logits/chosen": -3.385387897491455, |
|
"logits/rejected": -3.6356453895568848, |
|
"logps/chosen": -409.85205078125, |
|
"logps/rejected": -312.1107177734375, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05188798904418945, |
|
"rewards/diff": -1.7707669734954834, |
|
"rewards/diff_abs": 1.8406012058258057, |
|
"rewards/rejected": -0.28216272592544556, |
|
"rewards/student_margin": 0.2302747219800949, |
|
"rewards/teacher_margin": 2.001041889190674, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 5.352950171529928e-07, |
|
"logits/chosen": -3.574982166290283, |
|
"logits/rejected": -3.6512961387634277, |
|
"logps/chosen": -244.470947265625, |
|
"logps/rejected": -193.90447998046875, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.07677438855171204, |
|
"rewards/diff": -2.3986549377441406, |
|
"rewards/diff_abs": 2.4272804260253906, |
|
"rewards/rejected": -0.37082797288894653, |
|
"rewards/student_margin": 0.2940535545349121, |
|
"rewards/teacher_margin": 2.6927084922790527, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 4.024949794498623e-07, |
|
"logits/chosen": -3.5370934009552, |
|
"logits/rejected": -3.8875668048858643, |
|
"logps/chosen": -254.0207977294922, |
|
"logps/rejected": -177.51011657714844, |
|
"loss": 0.6019, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -0.05724753811955452, |
|
"rewards/diff": -2.3326706886291504, |
|
"rewards/diff_abs": 2.4501850605010986, |
|
"rewards/rejected": -0.37457695603370667, |
|
"rewards/student_margin": 0.31732940673828125, |
|
"rewards/teacher_margin": 2.6500000953674316, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 2.8717665538507965e-07, |
|
"logits/chosen": -3.554170608520508, |
|
"logits/rejected": -3.562087297439575, |
|
"logps/chosen": -279.2240905761719, |
|
"logps/rejected": -198.07553100585938, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.03678184002637863, |
|
"rewards/diff": -1.2939647436141968, |
|
"rewards/diff_abs": 1.5477110147476196, |
|
"rewards/rejected": -0.4365670084953308, |
|
"rewards/student_margin": 0.39978522062301636, |
|
"rewards/teacher_margin": 1.693750023841858, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.9030116872178317e-07, |
|
"logits/chosen": -3.729553699493408, |
|
"logits/rejected": -3.6963329315185547, |
|
"logps/chosen": -294.4330139160156, |
|
"logps/rejected": -217.95785522460938, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 0.03005790151655674, |
|
"rewards/diff": -1.652592420578003, |
|
"rewards/diff_abs": 1.681958794593811, |
|
"rewards/rejected": -0.3183913826942444, |
|
"rewards/student_margin": 0.34844931960105896, |
|
"rewards/teacher_margin": 2.001041889190674, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 1.1267593088441886e-07, |
|
"logits/chosen": -3.5918819904327393, |
|
"logits/rejected": -3.487471103668213, |
|
"logps/chosen": -301.32330322265625, |
|
"logps/rejected": -268.36737060546875, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -0.02564469538629055, |
|
"rewards/diff": -1.5005762577056885, |
|
"rewards/diff_abs": 1.6122653484344482, |
|
"rewards/rejected": -0.3542352318763733, |
|
"rewards/student_margin": 0.3285905420780182, |
|
"rewards/teacher_margin": 1.8291666507720947, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 5.494791156587686e-08, |
|
"logits/chosen": -3.7036406993865967, |
|
"logits/rejected": -3.693377733230591, |
|
"logps/chosen": -232.45254516601562, |
|
"logps/rejected": -235.75521850585938, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": -0.03294830024242401, |
|
"rewards/diff": -1.6850173473358154, |
|
"rewards/diff_abs": 1.7215397357940674, |
|
"rewards/rejected": -0.19897261261940002, |
|
"rewards/student_margin": 0.166024312376976, |
|
"rewards/teacher_margin": 1.851041555404663, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.7598246540683483e-08, |
|
"logits/chosen": -3.8011093139648438, |
|
"logits/rejected": -3.7970664501190186, |
|
"logps/chosen": -246.9715118408203, |
|
"logps/rejected": -200.6898956298828, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.7999999523162842, |
|
"rewards/chosen": -0.005081920884549618, |
|
"rewards/diff": -1.5501182079315186, |
|
"rewards/diff_abs": 1.625860571861267, |
|
"rewards/rejected": -0.3940262198448181, |
|
"rewards/student_margin": 0.3889443278312683, |
|
"rewards/teacher_margin": 1.9390627145767212, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 9.382276255742729e-10, |
|
"logits/chosen": -3.510223388671875, |
|
"logits/rejected": -3.580479145050049, |
|
"logps/chosen": -379.2341003417969, |
|
"logps/rejected": -310.8593444824219, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.037930965423583984, |
|
"rewards/diff": -1.598463535308838, |
|
"rewards/diff_abs": 1.7268564701080322, |
|
"rewards/rejected": -0.2682930827140808, |
|
"rewards/student_margin": 0.3062240481376648, |
|
"rewards/teacher_margin": 1.904687523841858, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 383, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6328112833182432, |
|
"train_runtime": 3006.7888, |
|
"train_samples_per_second": 48.89, |
|
"train_steps_per_second": 0.127 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 383, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000000000000000000000000000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|