|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"eval_steps": 1, |
|
"global_step": 472, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01694915254237288, |
|
"grad_norm": 39.081620832935286, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -0.04004784673452377, |
|
"logits/rejected": -0.012884330004453659, |
|
"logps/chosen": -24.14839744567871, |
|
"logps/rejected": -35.14466094970703, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03389830508474576, |
|
"grad_norm": 41.95997929051618, |
|
"learning_rate": 2.083333333333333e-08, |
|
"logits/chosen": 0.18785351514816284, |
|
"logits/rejected": 0.21833035349845886, |
|
"logps/chosen": -31.55377197265625, |
|
"logps/rejected": -35.9189567565918, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05084745762711865, |
|
"grad_norm": 41.574477134990545, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -0.13298606872558594, |
|
"logits/rejected": -0.12034030258655548, |
|
"logps/chosen": -27.085824966430664, |
|
"logps/rejected": -44.451595306396484, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.043108198791742325, |
|
"rewards/margins": 0.03870103508234024, |
|
"rewards/rejected": 0.004407165572047234, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06779661016949153, |
|
"grad_norm": 38.12229749762995, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -0.02340121753513813, |
|
"logits/rejected": 0.04097435995936394, |
|
"logps/chosen": -26.125139236450195, |
|
"logps/rejected": -34.786293029785156, |
|
"loss": 0.7018, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.005571034736931324, |
|
"rewards/margins": -0.0023282519541680813, |
|
"rewards/rejected": 0.007899284362792969, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0847457627118644, |
|
"grad_norm": 43.98516972909633, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -0.07847192883491516, |
|
"logits/rejected": -0.08863978832960129, |
|
"logps/chosen": -28.029014587402344, |
|
"logps/rejected": -24.517436981201172, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.022370003163814545, |
|
"rewards/margins": 0.06014883145689964, |
|
"rewards/rejected": -0.0377788320183754, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1016949152542373, |
|
"grad_norm": 37.8616646433652, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 0.01001177728176117, |
|
"logits/rejected": 0.03767494484782219, |
|
"logps/chosen": -34.69060134887695, |
|
"logps/rejected": -34.56515884399414, |
|
"loss": 0.6964, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0483599528670311, |
|
"rewards/margins": -0.05064802244305611, |
|
"rewards/rejected": 0.002288064919412136, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11864406779661017, |
|
"grad_norm": 39.48592290044396, |
|
"learning_rate": 7.291666666666667e-08, |
|
"logits/chosen": 0.09730193018913269, |
|
"logits/rejected": 0.12533338367938995, |
|
"logps/chosen": -26.894184112548828, |
|
"logps/rejected": -29.685768127441406, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.014862039126455784, |
|
"rewards/margins": 0.04402291774749756, |
|
"rewards/rejected": -0.05888495221734047, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"grad_norm": 41.45250718986053, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -0.07943608611822128, |
|
"logits/rejected": -0.05526775121688843, |
|
"logps/chosen": -23.665637969970703, |
|
"logps/rejected": -35.581138610839844, |
|
"loss": 0.7069, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0016717063263058662, |
|
"rewards/margins": -0.02722988836467266, |
|
"rewards/rejected": 0.028901590034365654, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.15254237288135594, |
|
"grad_norm": 36.01634420144333, |
|
"learning_rate": 9.375e-08, |
|
"logits/chosen": -0.0029595959931612015, |
|
"logits/rejected": 0.01232635322958231, |
|
"logps/chosen": -30.279748916625977, |
|
"logps/rejected": -24.777137756347656, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.024993158876895905, |
|
"rewards/margins": 0.01761629246175289, |
|
"rewards/rejected": -0.04260944947600365, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1694915254237288, |
|
"grad_norm": 39.56478667920128, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": 0.18664813041687012, |
|
"logits/rejected": 0.15227466821670532, |
|
"logps/chosen": -33.973602294921875, |
|
"logps/rejected": -33.727115631103516, |
|
"loss": 0.7043, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0049431659281253815, |
|
"rewards/margins": -0.026568636298179626, |
|
"rewards/rejected": 0.031511805951595306, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1864406779661017, |
|
"grad_norm": 39.96998647964932, |
|
"learning_rate": 1.1458333333333332e-07, |
|
"logits/chosen": 0.22770923376083374, |
|
"logits/rejected": 0.2530755400657654, |
|
"logps/chosen": -25.40655517578125, |
|
"logps/rejected": -39.74527359008789, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.05294986814260483, |
|
"rewards/margins": -0.01789700984954834, |
|
"rewards/rejected": -0.035052862018346786, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2033898305084746, |
|
"grad_norm": 41.34319202972142, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 0.05755678564310074, |
|
"logits/rejected": 0.05909465625882149, |
|
"logps/chosen": -23.82120704650879, |
|
"logps/rejected": -29.727937698364258, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.028387153521180153, |
|
"rewards/margins": 0.04089733213186264, |
|
"rewards/rejected": -0.012510182335972786, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.22033898305084745, |
|
"grad_norm": 39.52994008664552, |
|
"learning_rate": 1.3541666666666666e-07, |
|
"logits/chosen": 0.010963734239339828, |
|
"logits/rejected": -0.006987990811467171, |
|
"logps/chosen": -23.91936683654785, |
|
"logps/rejected": -30.996225357055664, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.013032305985689163, |
|
"rewards/margins": -0.01489502377808094, |
|
"rewards/rejected": 0.001862717792391777, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.23728813559322035, |
|
"grad_norm": 43.22009535631131, |
|
"learning_rate": 1.4583333333333335e-07, |
|
"logits/chosen": 0.1792532503604889, |
|
"logits/rejected": 0.23038198053836823, |
|
"logps/chosen": -38.606624603271484, |
|
"logps/rejected": -52.0256462097168, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.030375886708498, |
|
"rewards/margins": 0.07139457017183304, |
|
"rewards/rejected": -0.04101867973804474, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2542372881355932, |
|
"grad_norm": 38.55173749063397, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.00039753085002303123, |
|
"logits/rejected": 0.006743618752807379, |
|
"logps/chosen": -20.85459327697754, |
|
"logps/rejected": -31.867145538330078, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.015277398750185966, |
|
"rewards/margins": -0.01538792997598648, |
|
"rewards/rejected": 0.00011053076013922691, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 36.132422216008756, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/chosen": -0.036632318049669266, |
|
"logits/rejected": -0.05143912881612778, |
|
"logps/chosen": -25.975902557373047, |
|
"logps/rejected": -30.601673126220703, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.025256335735321045, |
|
"rewards/margins": -0.011055359616875648, |
|
"rewards/rejected": -0.014200975187122822, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.288135593220339, |
|
"grad_norm": 38.93415568334601, |
|
"learning_rate": 1.7708333333333334e-07, |
|
"logits/chosen": -0.03795609995722771, |
|
"logits/rejected": -0.04627775773406029, |
|
"logps/chosen": -22.88838768005371, |
|
"logps/rejected": -28.53569984436035, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0017479183152318, |
|
"rewards/margins": 0.0676286369562149, |
|
"rewards/rejected": -0.06588071584701538, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3050847457627119, |
|
"grad_norm": 37.216443506833954, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 0.12987589836120605, |
|
"logits/rejected": 0.16591012477874756, |
|
"logps/chosen": -20.29220962524414, |
|
"logps/rejected": -27.848968505859375, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02715757116675377, |
|
"rewards/margins": 0.0035054399631917477, |
|
"rewards/rejected": -0.030663013458251953, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3220338983050847, |
|
"grad_norm": 36.09119961798322, |
|
"learning_rate": 1.9791666666666664e-07, |
|
"logits/chosen": 0.11148576438426971, |
|
"logits/rejected": 0.1186145693063736, |
|
"logps/chosen": -19.455955505371094, |
|
"logps/rejected": -30.798999786376953, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03620731830596924, |
|
"rewards/margins": 0.08103629946708679, |
|
"rewards/rejected": -0.044828981161117554, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 34.52699754862708, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.040645819157361984, |
|
"logits/rejected": -0.09117074310779572, |
|
"logps/chosen": -30.70236587524414, |
|
"logps/rejected": -31.846435546875, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.025728441774845123, |
|
"rewards/margins": 0.05640077590942383, |
|
"rewards/rejected": -0.030672335997223854, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3559322033898305, |
|
"grad_norm": 38.450864425486444, |
|
"learning_rate": 2.1875e-07, |
|
"logits/chosen": -0.04155284911394119, |
|
"logits/rejected": -0.08195465058088303, |
|
"logps/chosen": -24.620819091796875, |
|
"logps/rejected": -35.44722366333008, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.007430051453411579, |
|
"rewards/margins": 0.049906615167856216, |
|
"rewards/rejected": -0.04247656092047691, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3728813559322034, |
|
"grad_norm": 38.60964633502043, |
|
"learning_rate": 2.2916666666666663e-07, |
|
"logits/chosen": 0.037601783871650696, |
|
"logits/rejected": 0.051545850932598114, |
|
"logps/chosen": -20.464923858642578, |
|
"logps/rejected": -25.813556671142578, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0019244614522904158, |
|
"rewards/margins": 0.10572130233049393, |
|
"rewards/rejected": -0.10379683971405029, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3898305084745763, |
|
"grad_norm": 37.53125515825806, |
|
"learning_rate": 2.3958333333333335e-07, |
|
"logits/chosen": -0.04523741453886032, |
|
"logits/rejected": -0.08811002969741821, |
|
"logps/chosen": -26.055984497070312, |
|
"logps/rejected": -25.679134368896484, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.054784782230854034, |
|
"rewards/margins": 0.07096240669488907, |
|
"rewards/rejected": -0.1257471889257431, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"grad_norm": 40.26892670789944, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.08595943450927734, |
|
"logits/rejected": -0.09404819458723068, |
|
"logps/chosen": -30.186988830566406, |
|
"logps/rejected": -33.44403076171875, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04100564867258072, |
|
"rewards/margins": 0.07639746367931366, |
|
"rewards/rejected": -0.11740311980247498, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 47.582895505174676, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": 0.002766113728284836, |
|
"logits/rejected": 0.002811681479215622, |
|
"logps/chosen": -35.549591064453125, |
|
"logps/rejected": -32.83184051513672, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.07030116766691208, |
|
"rewards/margins": 0.06389589607715607, |
|
"rewards/rejected": -0.13419707119464874, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4406779661016949, |
|
"grad_norm": 34.075640070522816, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -0.02534855529665947, |
|
"logits/rejected": -0.011001847684383392, |
|
"logps/chosen": -22.414587020874023, |
|
"logps/rejected": -28.95859146118164, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.008834121748805046, |
|
"rewards/margins": 0.2195996344089508, |
|
"rewards/rejected": -0.228433758020401, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4576271186440678, |
|
"grad_norm": 34.47879927670914, |
|
"learning_rate": 2.8125e-07, |
|
"logits/chosen": 0.0005891900509595871, |
|
"logits/rejected": -0.04569123312830925, |
|
"logps/chosen": -27.095754623413086, |
|
"logps/rejected": -34.3789176940918, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.00932928267866373, |
|
"rewards/margins": 0.28314852714538574, |
|
"rewards/rejected": -0.2924777865409851, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4745762711864407, |
|
"grad_norm": 33.381546864263576, |
|
"learning_rate": 2.916666666666667e-07, |
|
"logits/chosen": -0.03613307327032089, |
|
"logits/rejected": -0.07326073944568634, |
|
"logps/chosen": -20.990463256835938, |
|
"logps/rejected": -26.562923431396484, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00729251466691494, |
|
"rewards/margins": 0.15824466943740845, |
|
"rewards/rejected": -0.16553716361522675, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4915254237288136, |
|
"grad_norm": 39.396295244537285, |
|
"learning_rate": 3.020833333333333e-07, |
|
"logits/chosen": 0.06360377371311188, |
|
"logits/rejected": 0.0748274177312851, |
|
"logps/chosen": -23.62378692626953, |
|
"logps/rejected": -31.0860595703125, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.017558498308062553, |
|
"rewards/margins": 0.13148798048496246, |
|
"rewards/rejected": -0.14904648065567017, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5084745762711864, |
|
"grad_norm": 35.102940131398256, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 0.06532293558120728, |
|
"logits/rejected": 0.06247016414999962, |
|
"logps/chosen": -26.590116500854492, |
|
"logps/rejected": -34.515804290771484, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0014454489573836327, |
|
"rewards/margins": 0.5317557454109192, |
|
"rewards/rejected": -0.5332012176513672, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5254237288135594, |
|
"grad_norm": 34.11889459677525, |
|
"learning_rate": 3.2291666666666666e-07, |
|
"logits/chosen": 0.09973854571580887, |
|
"logits/rejected": 0.1072133332490921, |
|
"logps/chosen": -25.892887115478516, |
|
"logps/rejected": -32.363502502441406, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.006193090230226517, |
|
"rewards/margins": 0.30112165212631226, |
|
"rewards/rejected": -0.29492852091789246, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 32.761918192518266, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": 0.037455491721630096, |
|
"logits/rejected": -0.05081958696246147, |
|
"logps/chosen": -33.243309020996094, |
|
"logps/rejected": -35.219573974609375, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.022554118186235428, |
|
"rewards/margins": 0.5276864767074585, |
|
"rewards/rejected": -0.5502405166625977, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.559322033898305, |
|
"grad_norm": 30.482548567561853, |
|
"learning_rate": 3.4375e-07, |
|
"logits/chosen": 0.041740238666534424, |
|
"logits/rejected": 0.10962522029876709, |
|
"logps/chosen": -24.476438522338867, |
|
"logps/rejected": -38.58897399902344, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.003558039665222168, |
|
"rewards/margins": 0.8583400249481201, |
|
"rewards/rejected": -0.8618981838226318, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.576271186440678, |
|
"grad_norm": 31.30582576136025, |
|
"learning_rate": 3.541666666666667e-07, |
|
"logits/chosen": 0.002660442143678665, |
|
"logits/rejected": 0.017039887607097626, |
|
"logps/chosen": -27.219778060913086, |
|
"logps/rejected": -33.36122131347656, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.02310222014784813, |
|
"rewards/margins": 0.5289267301559448, |
|
"rewards/rejected": -0.5520289540290833, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5932203389830508, |
|
"grad_norm": 32.89490941791439, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": 0.03442692011594772, |
|
"logits/rejected": 0.06397214531898499, |
|
"logps/chosen": -20.274240493774414, |
|
"logps/rejected": -44.2073974609375, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0024875528179109097, |
|
"rewards/margins": 1.1822435855865479, |
|
"rewards/rejected": -1.1797560453414917, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6101694915254238, |
|
"grad_norm": 31.03945146034194, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.0311665628105402, |
|
"logits/rejected": -0.02556237392127514, |
|
"logps/chosen": -22.00820541381836, |
|
"logps/rejected": -27.99129295349121, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.02089952491223812, |
|
"rewards/margins": 0.5325387716293335, |
|
"rewards/rejected": -0.5534383058547974, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6271186440677966, |
|
"grad_norm": 29.670396668547138, |
|
"learning_rate": 3.8541666666666665e-07, |
|
"logits/chosen": 0.0932985171675682, |
|
"logits/rejected": 0.08139631897211075, |
|
"logps/chosen": -26.00881576538086, |
|
"logps/rejected": -29.33023452758789, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10078494995832443, |
|
"rewards/margins": 0.5040473341941833, |
|
"rewards/rejected": -0.6048322916030884, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6440677966101694, |
|
"grad_norm": 33.08140356711789, |
|
"learning_rate": 3.958333333333333e-07, |
|
"logits/chosen": -0.01641334407031536, |
|
"logits/rejected": -0.005850490182638168, |
|
"logps/chosen": -28.798660278320312, |
|
"logps/rejected": -50.10844421386719, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.050310466438531876, |
|
"rewards/margins": 0.6731055974960327, |
|
"rewards/rejected": -0.7234160304069519, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6610169491525424, |
|
"grad_norm": 34.20951880392297, |
|
"learning_rate": 4.0625e-07, |
|
"logits/chosen": -0.1090591624379158, |
|
"logits/rejected": -0.12284770607948303, |
|
"logps/chosen": -33.75372314453125, |
|
"logps/rejected": -42.935585021972656, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08654585480690002, |
|
"rewards/margins": 1.2381523847579956, |
|
"rewards/rejected": -1.3246984481811523, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 30.690269873517938, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.0014614351093769073, |
|
"logits/rejected": 0.08014758676290512, |
|
"logps/chosen": -25.105735778808594, |
|
"logps/rejected": -36.967323303222656, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09284328669309616, |
|
"rewards/margins": 0.7982729077339172, |
|
"rewards/rejected": -0.8911161422729492, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6949152542372882, |
|
"grad_norm": 30.04204632805336, |
|
"learning_rate": 4.270833333333333e-07, |
|
"logits/chosen": 0.053642358630895615, |
|
"logits/rejected": 0.044470448046922684, |
|
"logps/chosen": -24.64603042602539, |
|
"logps/rejected": -41.87240219116211, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.10667266696691513, |
|
"rewards/margins": 1.2056063413619995, |
|
"rewards/rejected": -1.3122789859771729, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.711864406779661, |
|
"grad_norm": 31.020878298393608, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": 0.019134098663926125, |
|
"logits/rejected": 0.01840081252157688, |
|
"logps/chosen": -23.039093017578125, |
|
"logps/rejected": -33.015777587890625, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.04813046008348465, |
|
"rewards/margins": 0.9387863874435425, |
|
"rewards/rejected": -0.9869168996810913, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7288135593220338, |
|
"grad_norm": 32.27135984427571, |
|
"learning_rate": 4.479166666666667e-07, |
|
"logits/chosen": 0.008926652371883392, |
|
"logits/rejected": -0.005259339697659016, |
|
"logps/chosen": -42.513465881347656, |
|
"logps/rejected": -36.392086029052734, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.16403140127658844, |
|
"rewards/margins": 0.2477284073829651, |
|
"rewards/rejected": -0.4117598235607147, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7457627118644068, |
|
"grad_norm": 31.70747032110601, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/chosen": 0.08293592184782028, |
|
"logits/rejected": 0.14042136073112488, |
|
"logps/chosen": -27.64384651184082, |
|
"logps/rejected": -43.646812438964844, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.002585211768746376, |
|
"rewards/margins": 0.8831788897514343, |
|
"rewards/rejected": -0.8805936574935913, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7627118644067796, |
|
"grad_norm": 32.92194369706788, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": 0.0978875681757927, |
|
"logits/rejected": 0.07510063052177429, |
|
"logps/chosen": -25.6392822265625, |
|
"logps/rejected": -43.59218215942383, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05219133943319321, |
|
"rewards/margins": 1.5015443563461304, |
|
"rewards/rejected": -1.553735613822937, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7796610169491526, |
|
"grad_norm": 31.09847853088202, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": 0.05425513535737991, |
|
"logits/rejected": 0.060507796704769135, |
|
"logps/chosen": -31.77846908569336, |
|
"logps/rejected": -39.067787170410156, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18950122594833374, |
|
"rewards/margins": 0.7816174626350403, |
|
"rewards/rejected": -0.971118688583374, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7966101694915254, |
|
"grad_norm": 31.87817139649752, |
|
"learning_rate": 4.895833333333333e-07, |
|
"logits/chosen": 0.06690789759159088, |
|
"logits/rejected": 0.06767144054174423, |
|
"logps/chosen": -29.99129867553711, |
|
"logps/rejected": -34.969505310058594, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09975402057170868, |
|
"rewards/margins": 0.3266308903694153, |
|
"rewards/rejected": -0.42638492584228516, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 26.865616424406536, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.09880068153142929, |
|
"logits/rejected": -0.10087430477142334, |
|
"logps/chosen": -28.3320369720459, |
|
"logps/rejected": -43.12381362915039, |
|
"loss": 0.3955, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.10569320619106293, |
|
"rewards/margins": 1.8550941944122314, |
|
"rewards/rejected": -1.960787296295166, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8305084745762712, |
|
"grad_norm": 25.76308856645317, |
|
"learning_rate": 4.999931375995349e-07, |
|
"logits/chosen": -0.12734848260879517, |
|
"logits/rejected": -0.11239587515592575, |
|
"logps/chosen": -23.94550132751465, |
|
"logps/rejected": -32.49237823486328, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11973586678504944, |
|
"rewards/margins": 0.8172601461410522, |
|
"rewards/rejected": -0.9369959831237793, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 30.25637466477477, |
|
"learning_rate": 4.999725507748798e-07, |
|
"logits/chosen": -0.015037477016448975, |
|
"logits/rejected": -0.009709347039461136, |
|
"logps/chosen": -25.780975341796875, |
|
"logps/rejected": -41.78852462768555, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.08354266732931137, |
|
"rewards/margins": 1.2830588817596436, |
|
"rewards/rejected": -1.3666014671325684, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.864406779661017, |
|
"grad_norm": 33.41966487787268, |
|
"learning_rate": 4.99938240656235e-07, |
|
"logits/chosen": 0.04738205671310425, |
|
"logits/rejected": 0.07401569187641144, |
|
"logps/chosen": -26.12303924560547, |
|
"logps/rejected": -49.93025207519531, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.14142322540283203, |
|
"rewards/margins": 0.954620361328125, |
|
"rewards/rejected": -1.096043586730957, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8813559322033898, |
|
"grad_norm": 27.061236838799616, |
|
"learning_rate": 4.998902091271985e-07, |
|
"logits/chosen": -0.06941650807857513, |
|
"logits/rejected": -0.05763792619109154, |
|
"logps/chosen": -23.328826904296875, |
|
"logps/rejected": -35.76228713989258, |
|
"loss": 0.389, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.26782581210136414, |
|
"rewards/margins": 0.8718900680541992, |
|
"rewards/rejected": -1.1397159099578857, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8983050847457628, |
|
"grad_norm": 27.553317644610285, |
|
"learning_rate": 4.998284588246634e-07, |
|
"logits/chosen": -0.03946888446807861, |
|
"logits/rejected": -0.03690715879201889, |
|
"logps/chosen": -28.930063247680664, |
|
"logps/rejected": -32.62754440307617, |
|
"loss": 0.4152, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32019758224487305, |
|
"rewards/margins": 1.2472233772277832, |
|
"rewards/rejected": -1.5674208402633667, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.9152542372881356, |
|
"grad_norm": 29.763675864173276, |
|
"learning_rate": 4.997529931386719e-07, |
|
"logits/chosen": -0.17749209702014923, |
|
"logits/rejected": -0.16170337796211243, |
|
"logps/chosen": -30.868289947509766, |
|
"logps/rejected": -32.478729248046875, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2578313648700714, |
|
"rewards/margins": 0.5673401355743408, |
|
"rewards/rejected": -0.8251715898513794, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.9322033898305084, |
|
"grad_norm": 33.13736711358155, |
|
"learning_rate": 4.996638162122302e-07, |
|
"logits/chosen": -0.06908832490444183, |
|
"logits/rejected": -0.05076206475496292, |
|
"logps/chosen": -30.415069580078125, |
|
"logps/rejected": -35.18532180786133, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.15613248944282532, |
|
"rewards/margins": 1.203932523727417, |
|
"rewards/rejected": -1.36006498336792, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"grad_norm": 43.11633871701129, |
|
"learning_rate": 4.995609329410804e-07, |
|
"logits/chosen": -0.008376002311706543, |
|
"logits/rejected": 0.001994941383600235, |
|
"logps/chosen": -20.613399505615234, |
|
"logps/rejected": -35.50030517578125, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1881721019744873, |
|
"rewards/margins": 1.925746202468872, |
|
"rewards/rejected": -2.1139183044433594, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.9661016949152542, |
|
"grad_norm": 24.797060027751225, |
|
"learning_rate": 4.994443489734322e-07, |
|
"logits/chosen": -0.015878597274422646, |
|
"logits/rejected": 0.03222089633345604, |
|
"logps/chosen": -26.61467742919922, |
|
"logps/rejected": -43.46265411376953, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.20103216171264648, |
|
"rewards/margins": 2.2574026584625244, |
|
"rewards/rejected": -2.458434820175171, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9830508474576272, |
|
"grad_norm": 31.271746187315504, |
|
"learning_rate": 4.993140707096525e-07, |
|
"logits/chosen": -0.010781673714518547, |
|
"logits/rejected": 0.019774336367845535, |
|
"logps/chosen": -32.57569885253906, |
|
"logps/rejected": -40.327457427978516, |
|
"loss": 0.3741, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2783823311328888, |
|
"rewards/margins": 1.6766613721847534, |
|
"rewards/rejected": -1.9550437927246094, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 24.272642085140525, |
|
"learning_rate": 4.991701053019145e-07, |
|
"logits/chosen": -0.01512301154434681, |
|
"logits/rejected": -0.009732574224472046, |
|
"logps/chosen": -26.456878662109375, |
|
"logps/rejected": -43.373043060302734, |
|
"loss": 0.3705, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21655352413654327, |
|
"rewards/margins": 1.63704514503479, |
|
"rewards/rejected": -1.8535985946655273, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0169491525423728, |
|
"grad_norm": 21.176773022731307, |
|
"learning_rate": 4.990124606538042e-07, |
|
"logits/chosen": -0.06877182424068451, |
|
"logits/rejected": -0.03728486970067024, |
|
"logps/chosen": -18.644493103027344, |
|
"logps/rejected": -34.91282272338867, |
|
"loss": 0.2702, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03750162199139595, |
|
"rewards/margins": 2.20272159576416, |
|
"rewards/rejected": -2.165220022201538, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0338983050847457, |
|
"grad_norm": 22.165507363954195, |
|
"learning_rate": 4.988411454198874e-07, |
|
"logits/chosen": 0.04961461201310158, |
|
"logits/rejected": 0.038518860936164856, |
|
"logps/chosen": -26.093852996826172, |
|
"logps/rejected": -32.088096618652344, |
|
"loss": 0.3406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.02193521521985531, |
|
"rewards/margins": 0.8063233494758606, |
|
"rewards/rejected": -0.7843881249427795, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.0508474576271187, |
|
"grad_norm": 23.06392685939665, |
|
"learning_rate": 4.98656169005234e-07, |
|
"logits/chosen": 0.16032031178474426, |
|
"logits/rejected": 0.11802197992801666, |
|
"logps/chosen": -28.6109676361084, |
|
"logps/rejected": -37.80739974975586, |
|
"loss": 0.2784, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.09771008789539337, |
|
"rewards/margins": 2.0157761573791504, |
|
"rewards/rejected": -1.9180662631988525, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.0677966101694916, |
|
"grad_norm": 19.615263753046836, |
|
"learning_rate": 4.984575415649018e-07, |
|
"logits/chosen": -0.06321832537651062, |
|
"logits/rejected": -0.0122019462287426, |
|
"logps/chosen": -26.929264068603516, |
|
"logps/rejected": -45.03318405151367, |
|
"loss": 0.2581, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.20472079515457153, |
|
"rewards/margins": 2.59661602973938, |
|
"rewards/rejected": -2.8013365268707275, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"grad_norm": 18.50797643625125, |
|
"learning_rate": 4.982452740033792e-07, |
|
"logits/chosen": -0.06859354674816132, |
|
"logits/rejected": -0.07365603744983673, |
|
"logps/chosen": -26.131860733032227, |
|
"logps/rejected": -34.671546936035156, |
|
"loss": 0.2422, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.026859302073717117, |
|
"rewards/margins": 2.035529375076294, |
|
"rewards/rejected": -2.0623886585235596, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.1016949152542372, |
|
"grad_norm": 20.414520001604362, |
|
"learning_rate": 4.980193779739863e-07, |
|
"logits/chosen": 0.009079991839826107, |
|
"logits/rejected": -0.0031675295904278755, |
|
"logps/chosen": -29.644994735717773, |
|
"logps/rejected": -45.55342102050781, |
|
"loss": 0.2681, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.13062885403633118, |
|
"rewards/margins": 2.6173148155212402, |
|
"rewards/rejected": -2.747943878173828, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.11864406779661, |
|
"grad_norm": 20.9255946117037, |
|
"learning_rate": 4.977798658782351e-07, |
|
"logits/chosen": -0.08888844400644302, |
|
"logits/rejected": -0.0911368578672409, |
|
"logps/chosen": -26.463741302490234, |
|
"logps/rejected": -41.51061248779297, |
|
"loss": 0.2946, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03352803736925125, |
|
"rewards/margins": 1.772619605064392, |
|
"rewards/rejected": -1.806147575378418, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.1355932203389831, |
|
"grad_norm": 21.302356946411365, |
|
"learning_rate": 4.975267508651491e-07, |
|
"logits/chosen": -0.028940977528691292, |
|
"logits/rejected": 0.0028336727991700172, |
|
"logps/chosen": -25.707382202148438, |
|
"logps/rejected": -30.72091293334961, |
|
"loss": 0.2749, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02922699600458145, |
|
"rewards/margins": 1.9206253290176392, |
|
"rewards/rejected": -1.9498521089553833, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.152542372881356, |
|
"grad_norm": 19.180516279847765, |
|
"learning_rate": 4.97260046830541e-07, |
|
"logits/chosen": -0.1452866494655609, |
|
"logits/rejected": -0.038837701082229614, |
|
"logps/chosen": -20.76878547668457, |
|
"logps/rejected": -42.36342239379883, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08528683334589005, |
|
"rewards/margins": 2.6560869216918945, |
|
"rewards/rejected": -2.5708000659942627, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.1694915254237288, |
|
"grad_norm": 21.190018630764428, |
|
"learning_rate": 4.969797684162497e-07, |
|
"logits/chosen": -0.12156227976083755, |
|
"logits/rejected": -0.0709511935710907, |
|
"logps/chosen": -22.62305450439453, |
|
"logps/rejected": -36.76183319091797, |
|
"loss": 0.2828, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.10709138959646225, |
|
"rewards/margins": 2.4480578899383545, |
|
"rewards/rejected": -2.3409664630889893, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.1864406779661016, |
|
"grad_norm": 17.29217666731802, |
|
"learning_rate": 4.966859310093372e-07, |
|
"logits/chosen": 0.007492711767554283, |
|
"logits/rejected": 0.019001876935362816, |
|
"logps/chosen": -27.733966827392578, |
|
"logps/rejected": -40.42127227783203, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1447516232728958, |
|
"rewards/margins": 2.107698678970337, |
|
"rewards/rejected": -2.252450466156006, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2033898305084745, |
|
"grad_norm": 25.122032977225658, |
|
"learning_rate": 4.96378550741243e-07, |
|
"logits/chosen": -0.057199642062187195, |
|
"logits/rejected": -0.06447561085224152, |
|
"logps/chosen": -27.951690673828125, |
|
"logps/rejected": -37.76457977294922, |
|
"loss": 0.2896, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13775676488876343, |
|
"rewards/margins": 1.7086197137832642, |
|
"rewards/rejected": -1.8463765382766724, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.2203389830508475, |
|
"grad_norm": 17.44185897051635, |
|
"learning_rate": 4.960576444868992e-07, |
|
"logits/chosen": -0.03605864569544792, |
|
"logits/rejected": -0.08552936464548111, |
|
"logps/chosen": -26.663238525390625, |
|
"logps/rejected": -49.157798767089844, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.12792766094207764, |
|
"rewards/margins": 3.0712804794311523, |
|
"rewards/rejected": -3.1992080211639404, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.2372881355932204, |
|
"grad_norm": 23.51722551646514, |
|
"learning_rate": 4.957232298638035e-07, |
|
"logits/chosen": -0.14576715230941772, |
|
"logits/rejected": -0.1281927525997162, |
|
"logps/chosen": -26.146411895751953, |
|
"logps/rejected": -39.19955825805664, |
|
"loss": 0.2843, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11095957458019257, |
|
"rewards/margins": 2.2008328437805176, |
|
"rewards/rejected": -2.3117926120758057, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.2542372881355932, |
|
"grad_norm": 17.504748122629483, |
|
"learning_rate": 4.953753252310525e-07, |
|
"logits/chosen": -0.10337841510772705, |
|
"logits/rejected": -0.11298589408397675, |
|
"logps/chosen": -26.215497970581055, |
|
"logps/rejected": -36.04429244995117, |
|
"loss": 0.2075, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.19672133028507233, |
|
"rewards/margins": 1.8031116724014282, |
|
"rewards/rejected": -1.9998328685760498, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 20.652812065700907, |
|
"learning_rate": 4.950139496883334e-07, |
|
"logits/chosen": 0.06242116168141365, |
|
"logits/rejected": 0.06666561216115952, |
|
"logps/chosen": -23.245695114135742, |
|
"logps/rejected": -31.755294799804688, |
|
"loss": 0.2429, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.064823217689991, |
|
"rewards/margins": 2.3041014671325684, |
|
"rewards/rejected": -2.2392783164978027, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.288135593220339, |
|
"grad_norm": 23.08981113112083, |
|
"learning_rate": 4.94639123074876e-07, |
|
"logits/chosen": -0.0955105572938919, |
|
"logits/rejected": -0.06442946940660477, |
|
"logps/chosen": -23.934703826904297, |
|
"logps/rejected": -35.5153694152832, |
|
"loss": 0.2569, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10075034201145172, |
|
"rewards/margins": 2.1841156482696533, |
|
"rewards/rejected": -2.2848658561706543, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.305084745762712, |
|
"grad_norm": 21.41973590257042, |
|
"learning_rate": 4.942508659683626e-07, |
|
"logits/chosen": -0.04648435115814209, |
|
"logits/rejected": -0.013210049830377102, |
|
"logps/chosen": -32.94620132446289, |
|
"logps/rejected": -53.122039794921875, |
|
"loss": 0.269, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.09716464579105377, |
|
"rewards/margins": 3.2333667278289795, |
|
"rewards/rejected": -3.1362016201019287, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.3220338983050848, |
|
"grad_norm": 22.84510019593904, |
|
"learning_rate": 4.938491996837994e-07, |
|
"logits/chosen": -0.005726225674152374, |
|
"logits/rejected": -0.0035298746079206467, |
|
"logps/chosen": -21.76548957824707, |
|
"logps/rejected": -39.55729293823242, |
|
"loss": 0.2568, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0918298214673996, |
|
"rewards/margins": 2.4565834999084473, |
|
"rewards/rejected": -2.3647537231445312, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.3389830508474576, |
|
"grad_norm": 17.384288528010632, |
|
"learning_rate": 4.934341462723454e-07, |
|
"logits/chosen": -0.14137157797813416, |
|
"logits/rejected": -0.1316397786140442, |
|
"logps/chosen": -20.925193786621094, |
|
"logps/rejected": -36.4559211730957, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.10143867135047913, |
|
"rewards/margins": 2.8934736251831055, |
|
"rewards/rejected": -2.7920351028442383, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.3559322033898304, |
|
"grad_norm": 20.990326447186, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -0.09045147150754929, |
|
"logits/rejected": -0.08031099289655685, |
|
"logps/chosen": -21.96762466430664, |
|
"logps/rejected": -36.81184387207031, |
|
"loss": 0.2569, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19079303741455078, |
|
"rewards/margins": 2.315279483795166, |
|
"rewards/rejected": -2.506072759628296, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3728813559322033, |
|
"grad_norm": 18.098050286729354, |
|
"learning_rate": 4.925639699468645e-07, |
|
"logits/chosen": -0.08457757532596588, |
|
"logits/rejected": -0.07319922745227814, |
|
"logps/chosen": -21.135604858398438, |
|
"logps/rejected": -33.960086822509766, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.054994210600852966, |
|
"rewards/margins": 2.582826852798462, |
|
"rewards/rejected": -2.5278327465057373, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3898305084745763, |
|
"grad_norm": 18.355783625838907, |
|
"learning_rate": 4.921088948048246e-07, |
|
"logits/chosen": 0.0004070308059453964, |
|
"logits/rejected": 0.010508737526834011, |
|
"logps/chosen": -19.553733825683594, |
|
"logps/rejected": -24.943431854248047, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1966938078403473, |
|
"rewards/margins": 2.039564609527588, |
|
"rewards/rejected": -1.8428709506988525, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.4067796610169492, |
|
"grad_norm": 18.59890208951988, |
|
"learning_rate": 4.916405280772462e-07, |
|
"logits/chosen": 0.061064671725034714, |
|
"logits/rejected": 0.04233198240399361, |
|
"logps/chosen": -31.1833553314209, |
|
"logps/rejected": -37.992191314697266, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24824562668800354, |
|
"rewards/margins": 2.010815143585205, |
|
"rewards/rejected": -2.259060859680176, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.423728813559322, |
|
"grad_norm": 18.608818270077023, |
|
"learning_rate": 4.911588954770896e-07, |
|
"logits/chosen": 0.006485683843493462, |
|
"logits/rejected": 0.017345350235700607, |
|
"logps/chosen": -23.56964683532715, |
|
"logps/rejected": -33.626216888427734, |
|
"loss": 0.2325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10479970276355743, |
|
"rewards/margins": 2.1866378784179688, |
|
"rewards/rejected": -2.2914376258850098, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.4406779661016949, |
|
"grad_norm": 27.860656554762212, |
|
"learning_rate": 4.906640234456011e-07, |
|
"logits/chosen": -0.10989750176668167, |
|
"logits/rejected": -0.08497381210327148, |
|
"logps/chosen": -20.454971313476562, |
|
"logps/rejected": -33.20934295654297, |
|
"loss": 0.2399, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.07277680188417435, |
|
"rewards/margins": 2.7808988094329834, |
|
"rewards/rejected": -2.7081220149993896, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.457627118644068, |
|
"grad_norm": 17.529622871109098, |
|
"learning_rate": 4.90155939150861e-07, |
|
"logits/chosen": -0.01597762666642666, |
|
"logits/rejected": -0.02296941541135311, |
|
"logps/chosen": -25.70912742614746, |
|
"logps/rejected": -41.43511199951172, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09175632894039154, |
|
"rewards/margins": 3.4984822273254395, |
|
"rewards/rejected": -3.590238571166992, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.4745762711864407, |
|
"grad_norm": 19.778355379129565, |
|
"learning_rate": 4.896346704862927e-07, |
|
"logits/chosen": -0.00542130321264267, |
|
"logits/rejected": -0.00442717969417572, |
|
"logps/chosen": -25.11708641052246, |
|
"logps/rejected": -38.2928581237793, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42681190371513367, |
|
"rewards/margins": 2.821324348449707, |
|
"rewards/rejected": -3.248136281967163, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.4915254237288136, |
|
"grad_norm": 20.237221371085674, |
|
"learning_rate": 4.891002460691305e-07, |
|
"logits/chosen": -0.12523381412029266, |
|
"logits/rejected": -0.12707139551639557, |
|
"logps/chosen": -28.615737915039062, |
|
"logps/rejected": -44.548152923583984, |
|
"loss": 0.2198, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.43669962882995605, |
|
"rewards/margins": 3.5562210083007812, |
|
"rewards/rejected": -3.992920160293579, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.5084745762711864, |
|
"grad_norm": 31.896672790729536, |
|
"learning_rate": 4.885526952388497e-07, |
|
"logits/chosen": -0.15658609569072723, |
|
"logits/rejected": -0.15329544246196747, |
|
"logps/chosen": -26.822874069213867, |
|
"logps/rejected": -40.6098747253418, |
|
"loss": 0.2059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17557695508003235, |
|
"rewards/margins": 3.500253200531006, |
|
"rewards/rejected": -3.675830364227295, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.5254237288135593, |
|
"grad_norm": 19.488400567309405, |
|
"learning_rate": 4.879920480555549e-07, |
|
"logits/chosen": -0.08191860467195511, |
|
"logits/rejected": -0.008589975535869598, |
|
"logps/chosen": -31.191484451293945, |
|
"logps/rejected": -51.83546829223633, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1294003576040268, |
|
"rewards/margins": 2.9513542652130127, |
|
"rewards/rejected": -3.080754518508911, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.542372881355932, |
|
"grad_norm": 20.01485074144144, |
|
"learning_rate": 4.874183352983297e-07, |
|
"logits/chosen": -0.022624505683779716, |
|
"logits/rejected": -0.03187233582139015, |
|
"logps/chosen": -24.933706283569336, |
|
"logps/rejected": -31.99811363220215, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.09345364570617676, |
|
"rewards/margins": 2.6890523433685303, |
|
"rewards/rejected": -2.5955986976623535, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.559322033898305, |
|
"grad_norm": 17.44552952468708, |
|
"learning_rate": 4.868315884635478e-07, |
|
"logits/chosen": -0.13437671959400177, |
|
"logits/rejected": -0.09966325759887695, |
|
"logps/chosen": -28.581546783447266, |
|
"logps/rejected": -40.725303649902344, |
|
"loss": 0.1702, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.3545893132686615, |
|
"rewards/margins": 2.1073248386383057, |
|
"rewards/rejected": -2.4619140625, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.576271186440678, |
|
"grad_norm": 25.932478195676993, |
|
"learning_rate": 4.862318397631433e-07, |
|
"logits/chosen": -0.04836834594607353, |
|
"logits/rejected": -0.06467059254646301, |
|
"logps/chosen": -24.941530227661133, |
|
"logps/rejected": -38.25274658203125, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04554582014679909, |
|
"rewards/margins": 2.8091211318969727, |
|
"rewards/rejected": -2.8546671867370605, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.5932203389830508, |
|
"grad_norm": 17.31152835419153, |
|
"learning_rate": 4.856191221228422e-07, |
|
"logits/chosen": -0.14374472200870514, |
|
"logits/rejected": -0.1499704271554947, |
|
"logps/chosen": -25.189186096191406, |
|
"logps/rejected": -48.39046859741211, |
|
"loss": 0.2548, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.014746442437171936, |
|
"rewards/margins": 3.3370161056518555, |
|
"rewards/rejected": -3.351762533187866, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.6101694915254239, |
|
"grad_norm": 21.553200648682367, |
|
"learning_rate": 4.84993469180355e-07, |
|
"logits/chosen": -0.25248920917510986, |
|
"logits/rejected": -0.1786680817604065, |
|
"logps/chosen": -21.31267547607422, |
|
"logps/rejected": -40.57464599609375, |
|
"loss": 0.1897, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06702820956707001, |
|
"rewards/margins": 3.686950206756592, |
|
"rewards/rejected": -3.619922399520874, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.6271186440677967, |
|
"grad_norm": 16.618810404954317, |
|
"learning_rate": 4.843549152835302e-07, |
|
"logits/chosen": -0.17732582986354828, |
|
"logits/rejected": -0.15217895805835724, |
|
"logps/chosen": -29.09910774230957, |
|
"logps/rejected": -38.864524841308594, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.05224495008587837, |
|
"rewards/margins": 2.6117098331451416, |
|
"rewards/rejected": -2.559464693069458, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.6440677966101696, |
|
"grad_norm": 16.96144669030696, |
|
"learning_rate": 4.837034954884681e-07, |
|
"logits/chosen": -0.13769695162773132, |
|
"logits/rejected": -0.09738799184560776, |
|
"logps/chosen": -16.64884376525879, |
|
"logps/rejected": -34.0985107421875, |
|
"loss": 0.2166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0295465886592865, |
|
"rewards/margins": 3.098619222640991, |
|
"rewards/rejected": -3.0690724849700928, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.6610169491525424, |
|
"grad_norm": 17.80864093537469, |
|
"learning_rate": 4.83039245557597e-07, |
|
"logits/chosen": -0.016016261652112007, |
|
"logits/rejected": -0.05212865397334099, |
|
"logps/chosen": -26.810836791992188, |
|
"logps/rejected": -38.81320571899414, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07362563908100128, |
|
"rewards/margins": 2.9003326892852783, |
|
"rewards/rejected": -2.9739584922790527, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.6779661016949152, |
|
"grad_norm": 20.332172117010963, |
|
"learning_rate": 4.823622019577088e-07, |
|
"logits/chosen": -0.22029350697994232, |
|
"logits/rejected": -0.1754826307296753, |
|
"logps/chosen": -24.44580841064453, |
|
"logps/rejected": -31.48262596130371, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.004874859936535358, |
|
"rewards/margins": 2.326341152191162, |
|
"rewards/rejected": -2.3312156200408936, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 20.940720757392302, |
|
"learning_rate": 4.816724018579583e-07, |
|
"logits/chosen": -0.08975666761398315, |
|
"logits/rejected": -0.03957574442028999, |
|
"logps/chosen": -36.57925796508789, |
|
"logps/rejected": -41.47373962402344, |
|
"loss": 0.2237, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.10056591033935547, |
|
"rewards/margins": 3.393941879272461, |
|
"rewards/rejected": -3.2933762073516846, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.711864406779661, |
|
"grad_norm": 17.000783102847848, |
|
"learning_rate": 4.809698831278217e-07, |
|
"logits/chosen": -0.09356296807527542, |
|
"logits/rejected": -0.09570194780826569, |
|
"logps/chosen": -25.839569091796875, |
|
"logps/rejected": -42.873077392578125, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1941157877445221, |
|
"rewards/margins": 3.0593459606170654, |
|
"rewards/rejected": -3.2534618377685547, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.7288135593220337, |
|
"grad_norm": 26.43033048122211, |
|
"learning_rate": 4.802546843350177e-07, |
|
"logits/chosen": -0.03907548263669014, |
|
"logits/rejected": -0.0613831952214241, |
|
"logps/chosen": -25.94208335876465, |
|
"logps/rejected": -34.799400329589844, |
|
"loss": 0.257, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1075030267238617, |
|
"rewards/margins": 2.6531782150268555, |
|
"rewards/rejected": -2.545675277709961, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.7457627118644068, |
|
"grad_norm": 19.25248915197079, |
|
"learning_rate": 4.795268447433906e-07, |
|
"logits/chosen": -0.23271867632865906, |
|
"logits/rejected": -0.2442181557416916, |
|
"logps/chosen": -21.609224319458008, |
|
"logps/rejected": -39.6169319152832, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4244083762168884, |
|
"rewards/margins": 3.7757644653320312, |
|
"rewards/rejected": -4.2001729011535645, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.7627118644067796, |
|
"grad_norm": 21.40405538405152, |
|
"learning_rate": 4.787864043107546e-07, |
|
"logits/chosen": -0.10186932981014252, |
|
"logits/rejected": -0.10761649906635284, |
|
"logps/chosen": -24.1138858795166, |
|
"logps/rejected": -23.169330596923828, |
|
"loss": 0.2512, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08395804464817047, |
|
"rewards/margins": 0.9992507696151733, |
|
"rewards/rejected": -0.9152926802635193, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.7796610169491527, |
|
"grad_norm": 20.65970281462911, |
|
"learning_rate": 4.780334036866996e-07, |
|
"logits/chosen": -0.1446046382188797, |
|
"logits/rejected": -0.16783642768859863, |
|
"logps/chosen": -29.0926513671875, |
|
"logps/rejected": -47.739131927490234, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4550026059150696, |
|
"rewards/margins": 3.175567865371704, |
|
"rewards/rejected": -3.630570650100708, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.7966101694915255, |
|
"grad_norm": 16.55598459027438, |
|
"learning_rate": 4.772678842103605e-07, |
|
"logits/chosen": -0.06549476087093353, |
|
"logits/rejected": -0.04416227340698242, |
|
"logps/chosen": -25.375438690185547, |
|
"logps/rejected": -39.032981872558594, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.16781294345855713, |
|
"rewards/margins": 3.484158992767334, |
|
"rewards/rejected": -3.6519718170166016, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.8135593220338984, |
|
"grad_norm": 16.11829115416798, |
|
"learning_rate": 4.764898879081467e-07, |
|
"logits/chosen": -0.05152374878525734, |
|
"logits/rejected": -0.07160673290491104, |
|
"logps/chosen": -23.518722534179688, |
|
"logps/rejected": -43.82634735107422, |
|
"loss": 0.1763, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2105274647474289, |
|
"rewards/margins": 3.0297629833221436, |
|
"rewards/rejected": -2.819235324859619, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.8305084745762712, |
|
"grad_norm": 18.544747915953614, |
|
"learning_rate": 4.7569945749143586e-07, |
|
"logits/chosen": -0.00994398258626461, |
|
"logits/rejected": 0.006802310235798359, |
|
"logps/chosen": -23.792747497558594, |
|
"logps/rejected": -47.211280822753906, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3990446925163269, |
|
"rewards/margins": 3.575429916381836, |
|
"rewards/rejected": -3.9744746685028076, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.847457627118644, |
|
"grad_norm": 15.674768365246683, |
|
"learning_rate": 4.748966363542285e-07, |
|
"logits/chosen": -0.10318706929683685, |
|
"logits/rejected": -0.04973382502794266, |
|
"logps/chosen": -20.84232521057129, |
|
"logps/rejected": -39.88136672973633, |
|
"loss": 0.1698, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17250564694404602, |
|
"rewards/margins": 3.216583251953125, |
|
"rewards/rejected": -3.0440773963928223, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.8644067796610169, |
|
"grad_norm": 16.323100274211107, |
|
"learning_rate": 4.7408146857076563e-07, |
|
"logits/chosen": 0.08578380197286606, |
|
"logits/rejected": 0.04284593090415001, |
|
"logps/chosen": -37.73735809326172, |
|
"logps/rejected": -38.75680923461914, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.11772266030311584, |
|
"rewards/margins": 2.448854446411133, |
|
"rewards/rejected": -2.33113169670105, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.8813559322033897, |
|
"grad_norm": 16.578710310200407, |
|
"learning_rate": 4.732539988931096e-07, |
|
"logits/chosen": -0.26771169900894165, |
|
"logits/rejected": -0.26380079984664917, |
|
"logps/chosen": -23.918312072753906, |
|
"logps/rejected": -43.63589096069336, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.2503085136413574, |
|
"rewards/margins": 3.4693069458007812, |
|
"rewards/rejected": -3.7196154594421387, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.8983050847457628, |
|
"grad_norm": 19.979288606666017, |
|
"learning_rate": 4.7241427274868683e-07, |
|
"logits/chosen": -0.048879463225603104, |
|
"logits/rejected": 0.00943760946393013, |
|
"logps/chosen": -24.316715240478516, |
|
"logps/rejected": -42.57545471191406, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01973732002079487, |
|
"rewards/margins": 3.4818313121795654, |
|
"rewards/rejected": -3.5015687942504883, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.9152542372881356, |
|
"grad_norm": 16.77919383034577, |
|
"learning_rate": 4.7156233623779383e-07, |
|
"logits/chosen": -0.017183750867843628, |
|
"logits/rejected": -0.02489522099494934, |
|
"logps/chosen": -30.669607162475586, |
|
"logps/rejected": -35.61785125732422, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1518259346485138, |
|
"rewards/margins": 2.721503973007202, |
|
"rewards/rejected": -2.8733298778533936, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.9322033898305084, |
|
"grad_norm": 25.510192937611073, |
|
"learning_rate": 4.7069823613106687e-07, |
|
"logits/chosen": -0.25519174337387085, |
|
"logits/rejected": -0.21938219666481018, |
|
"logps/chosen": -32.64997100830078, |
|
"logps/rejected": -46.399112701416016, |
|
"loss": 0.198, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.36455288529396057, |
|
"rewards/margins": 3.620523452758789, |
|
"rewards/rejected": -3.985076904296875, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.9491525423728815, |
|
"grad_norm": 21.709479844123084, |
|
"learning_rate": 4.698220198669136e-07, |
|
"logits/chosen": -0.15014870464801788, |
|
"logits/rejected": -0.14446985721588135, |
|
"logps/chosen": -23.829439163208008, |
|
"logps/rejected": -37.09071350097656, |
|
"loss": 0.2222, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.201849102973938, |
|
"rewards/margins": 3.0588748455047607, |
|
"rewards/rejected": -3.26072359085083, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.9661016949152543, |
|
"grad_norm": 20.84348155110451, |
|
"learning_rate": 4.6893373554890917e-07, |
|
"logits/chosen": -0.1855657547712326, |
|
"logits/rejected": -0.1457989662885666, |
|
"logps/chosen": -30.961164474487305, |
|
"logps/rejected": -47.25037384033203, |
|
"loss": 0.217, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3446941375732422, |
|
"rewards/margins": 3.6179933547973633, |
|
"rewards/rejected": -3.9626879692077637, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.9830508474576272, |
|
"grad_norm": 14.188597523254197, |
|
"learning_rate": 4.6803343194315546e-07, |
|
"logits/chosen": -0.09809039533138275, |
|
"logits/rejected": -0.060599129647016525, |
|
"logps/chosen": -29.427833557128906, |
|
"logps/rejected": -46.29072952270508, |
|
"loss": 0.1172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34794139862060547, |
|
"rewards/margins": 3.890174388885498, |
|
"rewards/rejected": -4.2381157875061035, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.21262907810025, |
|
"learning_rate": 4.6712115847560353e-07, |
|
"logits/chosen": -0.0804528221487999, |
|
"logits/rejected": -0.0880361869931221, |
|
"logps/chosen": -22.719079971313477, |
|
"logps/rejected": -47.828243255615234, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.2510998249053955, |
|
"rewards/margins": 4.143679618835449, |
|
"rewards/rejected": -3.8925797939300537, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.016949152542373, |
|
"grad_norm": 7.256194218627331, |
|
"learning_rate": 4.661969652293402e-07, |
|
"logits/chosen": -0.057237230241298676, |
|
"logits/rejected": -0.03790592402219772, |
|
"logps/chosen": -21.60989761352539, |
|
"logps/rejected": -43.51523208618164, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11654786765575409, |
|
"rewards/margins": 3.8127760887145996, |
|
"rewards/rejected": -3.69622802734375, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.0338983050847457, |
|
"grad_norm": 8.74634777891102, |
|
"learning_rate": 4.652609029418388e-07, |
|
"logits/chosen": 0.03335125744342804, |
|
"logits/rejected": 0.031772270798683167, |
|
"logps/chosen": -21.453704833984375, |
|
"logps/rejected": -40.3062858581543, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10782061517238617, |
|
"rewards/margins": 4.060611248016357, |
|
"rewards/rejected": -3.9527902603149414, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0508474576271185, |
|
"grad_norm": 8.313099929127045, |
|
"learning_rate": 4.6431302300217366e-07, |
|
"logits/chosen": -0.20796310901641846, |
|
"logits/rejected": -0.18069806694984436, |
|
"logps/chosen": -27.584365844726562, |
|
"logps/rejected": -37.579673767089844, |
|
"loss": 0.0917, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3969431519508362, |
|
"rewards/margins": 3.233177900314331, |
|
"rewards/rejected": -2.8362350463867188, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.0677966101694913, |
|
"grad_norm": 10.855796103467934, |
|
"learning_rate": 4.633533774481987e-07, |
|
"logits/chosen": -0.07592164725065231, |
|
"logits/rejected": -0.0696810930967331, |
|
"logps/chosen": -27.249908447265625, |
|
"logps/rejected": -45.94511413574219, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.021982379257678986, |
|
"rewards/margins": 4.154269695281982, |
|
"rewards/rejected": -4.176252365112305, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.084745762711864, |
|
"grad_norm": 7.255720151076396, |
|
"learning_rate": 4.623820189636905e-07, |
|
"logits/chosen": -0.19116753339767456, |
|
"logits/rejected": -0.1705985963344574, |
|
"logps/chosen": -26.491065979003906, |
|
"logps/rejected": -50.236698150634766, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.14240173995494843, |
|
"rewards/margins": 4.492888927459717, |
|
"rewards/rejected": -4.350486755371094, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.1016949152542375, |
|
"grad_norm": 8.03074731997706, |
|
"learning_rate": 4.613990008754565e-07, |
|
"logits/chosen": -0.12923955917358398, |
|
"logits/rejected": -0.14741843938827515, |
|
"logps/chosen": -28.261474609375, |
|
"logps/rejected": -36.72936248779297, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.6439403295516968, |
|
"rewards/margins": 3.5893638134002686, |
|
"rewards/rejected": -2.9454240798950195, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 7.19275728016155, |
|
"learning_rate": 4.60404377150407e-07, |
|
"logits/chosen": -0.09195713698863983, |
|
"logits/rejected": -0.042211033403873444, |
|
"logps/chosen": -23.310510635375977, |
|
"logps/rejected": -41.93342590332031, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0040088072419166565, |
|
"rewards/margins": 3.2483134269714355, |
|
"rewards/rejected": -3.2443044185638428, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.135593220338983, |
|
"grad_norm": 7.466339863674321, |
|
"learning_rate": 4.593982023925925e-07, |
|
"logits/chosen": -0.07431389391422272, |
|
"logits/rejected": -0.06840626150369644, |
|
"logps/chosen": -25.431446075439453, |
|
"logps/rejected": -39.0665168762207, |
|
"loss": 0.09, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14491936564445496, |
|
"rewards/margins": 3.5672109127044678, |
|
"rewards/rejected": -3.4222917556762695, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.152542372881356, |
|
"grad_norm": 8.19688100505555, |
|
"learning_rate": 4.58380531840206e-07, |
|
"logits/chosen": -0.120096854865551, |
|
"logits/rejected": -0.10113926976919174, |
|
"logps/chosen": -26.030086517333984, |
|
"logps/rejected": -37.91970443725586, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21827784180641174, |
|
"rewards/margins": 4.060682773590088, |
|
"rewards/rejected": -3.842404842376709, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.169491525423729, |
|
"grad_norm": 9.892790899219712, |
|
"learning_rate": 4.5735142136255045e-07, |
|
"logits/chosen": -0.23804128170013428, |
|
"logits/rejected": -0.23227332532405853, |
|
"logps/chosen": -27.41203498840332, |
|
"logps/rejected": -49.19248962402344, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12989288568496704, |
|
"rewards/margins": 4.297806262969971, |
|
"rewards/rejected": -4.427699565887451, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.1864406779661016, |
|
"grad_norm": 6.571853125948924, |
|
"learning_rate": 4.5631092745697164e-07, |
|
"logits/chosen": -0.00046368176117539406, |
|
"logits/rejected": 0.014133242890238762, |
|
"logps/chosen": -25.415313720703125, |
|
"logps/rejected": -41.508079528808594, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3947104215621948, |
|
"rewards/margins": 4.418177127838135, |
|
"rewards/rejected": -4.023467063903809, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.2033898305084745, |
|
"grad_norm": 7.081057065438042, |
|
"learning_rate": 4.5525910724575645e-07, |
|
"logits/chosen": -0.20635852217674255, |
|
"logits/rejected": -0.1863619089126587, |
|
"logps/chosen": -27.593435287475586, |
|
"logps/rejected": -50.18062210083008, |
|
"loss": 0.0915, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3062703311443329, |
|
"rewards/margins": 4.946234703063965, |
|
"rewards/rejected": -4.639964580535889, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2203389830508473, |
|
"grad_norm": 6.94722893216983, |
|
"learning_rate": 4.54196018472997e-07, |
|
"logits/chosen": -0.1825593113899231, |
|
"logits/rejected": -0.18460941314697266, |
|
"logps/chosen": -25.40302276611328, |
|
"logps/rejected": -57.28022003173828, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3234606981277466, |
|
"rewards/margins": 5.864286422729492, |
|
"rewards/rejected": -6.187747001647949, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.23728813559322, |
|
"grad_norm": 6.703220344523385, |
|
"learning_rate": 4.5312171950142033e-07, |
|
"logits/chosen": -0.1518273502588272, |
|
"logits/rejected": -0.09540899842977524, |
|
"logps/chosen": -21.725143432617188, |
|
"logps/rejected": -38.91670608520508, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3190383315086365, |
|
"rewards/margins": 4.065824508666992, |
|
"rewards/rejected": -3.746786117553711, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.2542372881355934, |
|
"grad_norm": 7.318607428943175, |
|
"learning_rate": 4.520362693091845e-07, |
|
"logits/chosen": -0.12475726008415222, |
|
"logits/rejected": -0.12865117192268372, |
|
"logps/chosen": -23.161043167114258, |
|
"logps/rejected": -36.68880081176758, |
|
"loss": 0.0762, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.030918624252080917, |
|
"rewards/margins": 3.1682627201080322, |
|
"rewards/rejected": -3.1373443603515625, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.2711864406779663, |
|
"grad_norm": 6.4216049351024065, |
|
"learning_rate": 4.5093972748664087e-07, |
|
"logits/chosen": -0.09874700009822845, |
|
"logits/rejected": -0.10628420114517212, |
|
"logps/chosen": -28.58932113647461, |
|
"logps/rejected": -47.10905075073242, |
|
"loss": 0.055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31862539052963257, |
|
"rewards/margins": 4.816265106201172, |
|
"rewards/rejected": -4.4976396560668945, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.288135593220339, |
|
"grad_norm": 5.595876217706418, |
|
"learning_rate": 4.498321542330622e-07, |
|
"logits/chosen": -0.17151176929473877, |
|
"logits/rejected": -0.18770024180412292, |
|
"logps/chosen": -22.070384979248047, |
|
"logps/rejected": -49.778038024902344, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06101692467927933, |
|
"rewards/margins": 5.113625526428223, |
|
"rewards/rejected": -5.052608013153076, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.305084745762712, |
|
"grad_norm": 8.583744234061204, |
|
"learning_rate": 4.4871361035333833e-07, |
|
"logits/chosen": -0.1267111748456955, |
|
"logits/rejected": -0.11681263148784637, |
|
"logps/chosen": -21.870920181274414, |
|
"logps/rejected": -39.6839714050293, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31499701738357544, |
|
"rewards/margins": 3.777963638305664, |
|
"rewards/rejected": -3.4629664421081543, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.3220338983050848, |
|
"grad_norm": 7.125850476151505, |
|
"learning_rate": 4.475841572546374e-07, |
|
"logits/chosen": -0.19854867458343506, |
|
"logits/rejected": -0.16304975748062134, |
|
"logps/chosen": -28.775941848754883, |
|
"logps/rejected": -39.197044372558594, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10516883432865143, |
|
"rewards/margins": 3.753281593322754, |
|
"rewards/rejected": -3.858450174331665, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.3389830508474576, |
|
"grad_norm": 8.162386927617444, |
|
"learning_rate": 4.464438569430353e-07, |
|
"logits/chosen": -0.18249069154262543, |
|
"logits/rejected": -0.19290274381637573, |
|
"logps/chosen": -25.261497497558594, |
|
"logps/rejected": -37.97518539428711, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16362299025058746, |
|
"rewards/margins": 3.9931089878082275, |
|
"rewards/rejected": -3.829486131668091, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.3559322033898304, |
|
"grad_norm": 6.79825948010009, |
|
"learning_rate": 4.452927720201112e-07, |
|
"logits/chosen": -0.15876157581806183, |
|
"logits/rejected": -0.15914849936962128, |
|
"logps/chosen": -23.805156707763672, |
|
"logps/rejected": -43.227264404296875, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14774608612060547, |
|
"rewards/margins": 4.2368483543396, |
|
"rewards/rejected": -4.089102268218994, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.3728813559322033, |
|
"grad_norm": 5.9070394129722565, |
|
"learning_rate": 4.441309656795106e-07, |
|
"logits/chosen": -0.1470584124326706, |
|
"logits/rejected": -0.12824571132659912, |
|
"logps/chosen": -24.07137107849121, |
|
"logps/rejected": -51.49998474121094, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09202487766742706, |
|
"rewards/margins": 4.7118940353393555, |
|
"rewards/rejected": -4.619868755340576, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.389830508474576, |
|
"grad_norm": 6.6818032600348864, |
|
"learning_rate": 4.429585017034766e-07, |
|
"logits/chosen": -0.12072446942329407, |
|
"logits/rejected": -0.1437748223543167, |
|
"logps/chosen": -26.129920959472656, |
|
"logps/rejected": -50.33393096923828, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.024979308247566223, |
|
"rewards/margins": 5.7934794425964355, |
|
"rewards/rejected": -5.768500328063965, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.406779661016949, |
|
"grad_norm": 5.465459623937437, |
|
"learning_rate": 4.417754444593478e-07, |
|
"logits/chosen": -0.17397671937942505, |
|
"logits/rejected": -0.18419091403484344, |
|
"logps/chosen": -27.539466857910156, |
|
"logps/rejected": -45.487571716308594, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023346930742263794, |
|
"rewards/margins": 4.994349479675293, |
|
"rewards/rejected": -4.97100305557251, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.423728813559322, |
|
"grad_norm": 10.390645074466443, |
|
"learning_rate": 4.4058185889602497e-07, |
|
"logits/chosen": -0.22157034277915955, |
|
"logits/rejected": -0.22870029509067535, |
|
"logps/chosen": -16.434494018554688, |
|
"logps/rejected": -37.32805633544922, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.3774448335170746, |
|
"rewards/margins": 4.384706497192383, |
|
"rewards/rejected": -4.007261276245117, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.440677966101695, |
|
"grad_norm": 9.44436087598635, |
|
"learning_rate": 4.39377810540405e-07, |
|
"logits/chosen": -0.21542900800704956, |
|
"logits/rejected": -0.22131392359733582, |
|
"logps/chosen": -36.0152702331543, |
|
"logps/rejected": -38.466373443603516, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5316247344017029, |
|
"rewards/margins": 2.671638250350952, |
|
"rewards/rejected": -3.2032630443573, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.457627118644068, |
|
"grad_norm": 5.963157138060162, |
|
"learning_rate": 4.38163365493784e-07, |
|
"logits/chosen": -0.17747551202774048, |
|
"logits/rejected": -0.1994229406118393, |
|
"logps/chosen": -32.599082946777344, |
|
"logps/rejected": -62.15748596191406, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2045070230960846, |
|
"rewards/margins": 4.970805644989014, |
|
"rewards/rejected": -4.766298770904541, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.4745762711864407, |
|
"grad_norm": 6.998829586239467, |
|
"learning_rate": 4.3693859042822774e-07, |
|
"logits/chosen": -0.06130817532539368, |
|
"logits/rejected": -0.04164750128984451, |
|
"logps/chosen": -28.672290802001953, |
|
"logps/rejected": -44.092681884765625, |
|
"loss": 0.064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5117320418357849, |
|
"rewards/margins": 5.0716657638549805, |
|
"rewards/rejected": -4.559933662414551, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.4915254237288136, |
|
"grad_norm": 7.186169716835621, |
|
"learning_rate": 4.3570355258291223e-07, |
|
"logits/chosen": -0.16528643667697906, |
|
"logits/rejected": -0.14484813809394836, |
|
"logps/chosen": -27.115493774414062, |
|
"logps/rejected": -36.884578704833984, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5564872026443481, |
|
"rewards/margins": 3.266021251678467, |
|
"rewards/rejected": -2.709534168243408, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.5084745762711864, |
|
"grad_norm": 5.1159064429292735, |
|
"learning_rate": 4.344583197604318e-07, |
|
"logits/chosen": -0.20358271896839142, |
|
"logits/rejected": -0.20041170716285706, |
|
"logps/chosen": -23.109371185302734, |
|
"logps/rejected": -51.53319549560547, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.036565251648426056, |
|
"rewards/margins": 5.316205024719238, |
|
"rewards/rejected": -5.279640197753906, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.5254237288135593, |
|
"grad_norm": 8.402984257771724, |
|
"learning_rate": 4.332029603230767e-07, |
|
"logits/chosen": -0.08776924759149551, |
|
"logits/rejected": -0.07819744944572449, |
|
"logps/chosen": -36.21211624145508, |
|
"logps/rejected": -42.74664306640625, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16677923500537872, |
|
"rewards/margins": 4.416428089141846, |
|
"rewards/rejected": -4.583207130432129, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 6.450537035637719, |
|
"learning_rate": 4.319375431890806e-07, |
|
"logits/chosen": -0.21261297166347504, |
|
"logits/rejected": -0.15842606127262115, |
|
"logps/chosen": -23.646146774291992, |
|
"logps/rejected": -36.388458251953125, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24000459909439087, |
|
"rewards/margins": 5.469123363494873, |
|
"rewards/rejected": -5.229118824005127, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.559322033898305, |
|
"grad_norm": 6.100900257526249, |
|
"learning_rate": 4.306621378288364e-07, |
|
"logits/chosen": -0.12006445229053497, |
|
"logits/rejected": -0.09317637979984283, |
|
"logps/chosen": -25.193214416503906, |
|
"logps/rejected": -50.55509948730469, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.055319640785455704, |
|
"rewards/margins": 4.907276153564453, |
|
"rewards/rejected": -4.9625959396362305, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.576271186440678, |
|
"grad_norm": 5.335466869594214, |
|
"learning_rate": 4.2937681426108275e-07, |
|
"logits/chosen": -0.156333327293396, |
|
"logits/rejected": -0.1703069657087326, |
|
"logps/chosen": -25.732696533203125, |
|
"logps/rejected": -37.75965118408203, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1128598153591156, |
|
"rewards/margins": 3.560478448867798, |
|
"rewards/rejected": -3.4476187229156494, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.593220338983051, |
|
"grad_norm": 6.414862486449905, |
|
"learning_rate": 4.280816430490602e-07, |
|
"logits/chosen": -0.14309167861938477, |
|
"logits/rejected": -0.14619530737400055, |
|
"logps/chosen": -23.593332290649414, |
|
"logps/rejected": -41.5565071105957, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.14477895200252533, |
|
"rewards/margins": 4.543487071990967, |
|
"rewards/rejected": -4.398708343505859, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.610169491525424, |
|
"grad_norm": 5.895188410626077, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -0.1784745752811432, |
|
"logits/rejected": -0.16759036481380463, |
|
"logps/chosen": -22.0533390045166, |
|
"logps/rejected": -35.54384231567383, |
|
"loss": 0.0553, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02956150844693184, |
|
"rewards/margins": 4.328366756439209, |
|
"rewards/rejected": -4.298805236816406, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.6271186440677967, |
|
"grad_norm": 5.912717779717486, |
|
"learning_rate": 4.254620426444053e-07, |
|
"logits/chosen": -0.15713754296302795, |
|
"logits/rejected": -0.1796114146709442, |
|
"logps/chosen": -25.46520233154297, |
|
"logps/rejected": -48.37349319458008, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28247907757759094, |
|
"rewards/margins": 5.51485013961792, |
|
"rewards/rejected": -5.2323713302612305, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.6440677966101696, |
|
"grad_norm": 5.922436242193146, |
|
"learning_rate": 4.2413775726574923e-07, |
|
"logits/chosen": -0.11942790448665619, |
|
"logits/rejected": -0.11864694207906723, |
|
"logps/chosen": -24.162601470947266, |
|
"logps/rejected": -47.01225280761719, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.3353565037250519, |
|
"rewards/margins": 4.963751316070557, |
|
"rewards/rejected": -5.299108505249023, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.6610169491525424, |
|
"grad_norm": 6.106867092542455, |
|
"learning_rate": 4.228039118628815e-07, |
|
"logits/chosen": -0.12817731499671936, |
|
"logits/rejected": -0.09794219583272934, |
|
"logps/chosen": -23.699031829833984, |
|
"logps/rejected": -43.58228302001953, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08896563202142715, |
|
"rewards/margins": 4.032917499542236, |
|
"rewards/rejected": -4.121883392333984, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.6779661016949152, |
|
"grad_norm": 5.803302086144925, |
|
"learning_rate": 4.214605796628526e-07, |
|
"logits/chosen": -0.2880489230155945, |
|
"logits/rejected": -0.23902469873428345, |
|
"logps/chosen": -23.32792091369629, |
|
"logps/rejected": -45.10264587402344, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43519750237464905, |
|
"rewards/margins": 4.654225826263428, |
|
"rewards/rejected": -5.089423656463623, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.694915254237288, |
|
"grad_norm": 5.177802734038862, |
|
"learning_rate": 4.201078344135306e-07, |
|
"logits/chosen": -0.24913498759269714, |
|
"logits/rejected": -0.2534574270248413, |
|
"logps/chosen": -24.795732498168945, |
|
"logps/rejected": -42.07280349731445, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.02173246443271637, |
|
"rewards/margins": 4.118818283081055, |
|
"rewards/rejected": -4.14055061340332, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.711864406779661, |
|
"grad_norm": 9.038983465853134, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -0.18585993349552155, |
|
"logits/rejected": -0.16700756549835205, |
|
"logps/chosen": -27.172670364379883, |
|
"logps/rejected": -34.79685592651367, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2640396058559418, |
|
"rewards/margins": 4.567864894866943, |
|
"rewards/rejected": -4.303825378417969, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.7288135593220337, |
|
"grad_norm": 5.702053280294616, |
|
"learning_rate": 4.173744023382474e-07, |
|
"logits/chosen": -0.2842308282852173, |
|
"logits/rejected": -0.29381710290908813, |
|
"logps/chosen": -21.896320343017578, |
|
"logps/rejected": -41.444732666015625, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02725343219935894, |
|
"rewards/margins": 4.254402160644531, |
|
"rewards/rejected": -4.227148532867432, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.7457627118644066, |
|
"grad_norm": 6.4501142174750825, |
|
"learning_rate": 4.159938655755306e-07, |
|
"logits/chosen": -0.1036592572927475, |
|
"logits/rejected": -0.052220165729522705, |
|
"logps/chosen": -26.139209747314453, |
|
"logps/rejected": -46.38983154296875, |
|
"loss": 0.044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15247440338134766, |
|
"rewards/margins": 5.240863800048828, |
|
"rewards/rejected": -5.393338680267334, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.7627118644067794, |
|
"grad_norm": 5.150964666613272, |
|
"learning_rate": 4.1460421588177094e-07, |
|
"logits/chosen": -0.25343507528305054, |
|
"logits/rejected": -0.24906288087368011, |
|
"logps/chosen": -21.305830001831055, |
|
"logps/rejected": -43.92711639404297, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2159092128276825, |
|
"rewards/margins": 5.304495811462402, |
|
"rewards/rejected": -5.520405292510986, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.7796610169491527, |
|
"grad_norm": 6.220860659821832, |
|
"learning_rate": 4.1320552954763037e-07, |
|
"logits/chosen": -0.06625357270240784, |
|
"logits/rejected": -0.0591760016977787, |
|
"logps/chosen": -32.38239288330078, |
|
"logps/rejected": -39.54067611694336, |
|
"loss": 0.05, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11683804541826248, |
|
"rewards/margins": 3.713731288909912, |
|
"rewards/rejected": -3.83056902885437, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.7966101694915255, |
|
"grad_norm": 6.305844556479963, |
|
"learning_rate": 4.117978833598747e-07, |
|
"logits/chosen": -0.31626027822494507, |
|
"logits/rejected": -0.28030937910079956, |
|
"logps/chosen": -32.548240661621094, |
|
"logps/rejected": -42.81690979003906, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21723094582557678, |
|
"rewards/margins": 4.100663185119629, |
|
"rewards/rejected": -3.883432388305664, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.8135593220338984, |
|
"grad_norm": 6.559589012838323, |
|
"learning_rate": 4.1038135459715885e-07, |
|
"logits/chosen": -0.2386135458946228, |
|
"logits/rejected": -0.23032473027706146, |
|
"logps/chosen": -15.93246078491211, |
|
"logps/rejected": -36.63377380371094, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09315376728773117, |
|
"rewards/margins": 5.372439861297607, |
|
"rewards/rejected": -5.279285907745361, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.830508474576271, |
|
"grad_norm": 8.346466429496452, |
|
"learning_rate": 4.0895602102578373e-07, |
|
"logits/chosen": -0.19355379045009613, |
|
"logits/rejected": -0.2431831657886505, |
|
"logps/chosen": -29.353004455566406, |
|
"logps/rejected": -47.65980911254883, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30096274614334106, |
|
"rewards/margins": 4.469476699829102, |
|
"rewards/rejected": -4.770439624786377, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.847457627118644, |
|
"grad_norm": 7.84040587215191, |
|
"learning_rate": 4.075219608954278e-07, |
|
"logits/chosen": -0.0895601287484169, |
|
"logits/rejected": -0.06131096929311752, |
|
"logps/chosen": -21.794588088989258, |
|
"logps/rejected": -46.49802780151367, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.053712397813797, |
|
"rewards/margins": 5.101894855499268, |
|
"rewards/rejected": -5.155607223510742, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.864406779661017, |
|
"grad_norm": 10.599854581213274, |
|
"learning_rate": 4.0607925293484997e-07, |
|
"logits/chosen": -0.26595553755760193, |
|
"logits/rejected": -0.25741392374038696, |
|
"logps/chosen": -26.43805503845215, |
|
"logps/rejected": -34.98290252685547, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20261424779891968, |
|
"rewards/margins": 3.2389473915100098, |
|
"rewards/rejected": -3.441561222076416, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.8813559322033897, |
|
"grad_norm": 7.045992493613005, |
|
"learning_rate": 4.046279763475687e-07, |
|
"logits/chosen": -0.36673855781555176, |
|
"logits/rejected": -0.37882646918296814, |
|
"logps/chosen": -23.698484420776367, |
|
"logps/rejected": -42.687042236328125, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29158052802085876, |
|
"rewards/margins": 4.799960136413574, |
|
"rewards/rejected": -5.091540336608887, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.898305084745763, |
|
"grad_norm": 5.4596269860548645, |
|
"learning_rate": 4.031682108075128e-07, |
|
"logits/chosen": -0.23533686995506287, |
|
"logits/rejected": -0.2579227685928345, |
|
"logps/chosen": -24.494571685791016, |
|
"logps/rejected": -50.30744552612305, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4669819474220276, |
|
"rewards/margins": 5.18317985534668, |
|
"rewards/rejected": -5.6501617431640625, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.915254237288136, |
|
"grad_norm": 6.6964632868094, |
|
"learning_rate": 4.0170003645464835e-07, |
|
"logits/chosen": -0.28077659010887146, |
|
"logits/rejected": -0.2605874836444855, |
|
"logps/chosen": -30.141586303710938, |
|
"logps/rejected": -43.39360046386719, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2600446939468384, |
|
"rewards/margins": 4.748435020446777, |
|
"rewards/rejected": -5.008480072021484, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.9322033898305087, |
|
"grad_norm": 6.25941157775491, |
|
"learning_rate": 4.0022353389057793e-07, |
|
"logits/chosen": -0.18370503187179565, |
|
"logits/rejected": -0.15738657116889954, |
|
"logps/chosen": -28.340681076049805, |
|
"logps/rejected": -49.75542068481445, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15175539255142212, |
|
"rewards/margins": 4.8475141525268555, |
|
"rewards/rejected": -4.999269485473633, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.9491525423728815, |
|
"grad_norm": 5.2666179841342755, |
|
"learning_rate": 3.9873878417411685e-07, |
|
"logits/chosen": -0.25363242626190186, |
|
"logits/rejected": -0.22387123107910156, |
|
"logps/chosen": -30.49943733215332, |
|
"logps/rejected": -51.61265563964844, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4314861297607422, |
|
"rewards/margins": 5.466277122497559, |
|
"rewards/rejected": -5.897763252258301, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.9661016949152543, |
|
"grad_norm": 6.6142603605122705, |
|
"learning_rate": 3.97245868816842e-07, |
|
"logits/chosen": -0.18011420965194702, |
|
"logits/rejected": -0.14474789798259735, |
|
"logps/chosen": -22.61705207824707, |
|
"logps/rejected": -34.74039840698242, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35052689909935, |
|
"rewards/margins": 4.783888816833496, |
|
"rewards/rejected": -4.433361530303955, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.983050847457627, |
|
"grad_norm": 7.350936104887415, |
|
"learning_rate": 3.95744869778618e-07, |
|
"logits/chosen": -0.09902404993772507, |
|
"logits/rejected": -0.08743295818567276, |
|
"logps/chosen": -33.22180938720703, |
|
"logps/rejected": -48.17066192626953, |
|
"loss": 0.061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3400125801563263, |
|
"rewards/margins": 4.35988712310791, |
|
"rewards/rejected": -4.699898719787598, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 6.41090986992918, |
|
"learning_rate": 3.942358694630967e-07, |
|
"logits/chosen": -0.3509863615036011, |
|
"logits/rejected": -0.3755185306072235, |
|
"logps/chosen": -24.426481246948242, |
|
"logps/rejected": -49.73809051513672, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2657313942909241, |
|
"rewards/margins": 4.6201324462890625, |
|
"rewards/rejected": -4.885863780975342, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.016949152542373, |
|
"grad_norm": 3.543481556246516, |
|
"learning_rate": 3.927189507131938e-07, |
|
"logits/chosen": -0.2855956554412842, |
|
"logits/rejected": -0.2373581826686859, |
|
"logps/chosen": -25.790422439575195, |
|
"logps/rejected": -42.86233139038086, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47834354639053345, |
|
"rewards/margins": 4.5599493980407715, |
|
"rewards/rejected": -5.03829288482666, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 3.0338983050847457, |
|
"grad_norm": 4.068888114820521, |
|
"learning_rate": 3.9119419680654083e-07, |
|
"logits/chosen": -0.2456224113702774, |
|
"logits/rejected": -0.23849861323833466, |
|
"logps/chosen": -26.366769790649414, |
|
"logps/rejected": -45.77360153198242, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29546892642974854, |
|
"rewards/margins": 5.436995029449463, |
|
"rewards/rejected": -5.141526222229004, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 3.0508474576271185, |
|
"grad_norm": 3.4882014800516408, |
|
"learning_rate": 3.896616914509131e-07, |
|
"logits/chosen": -0.28572219610214233, |
|
"logits/rejected": -0.24028098583221436, |
|
"logps/chosen": -25.306299209594727, |
|
"logps/rejected": -41.360389709472656, |
|
"loss": 0.027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11109927296638489, |
|
"rewards/margins": 5.036979675292969, |
|
"rewards/rejected": -5.148078918457031, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.0677966101694913, |
|
"grad_norm": 4.061881260336592, |
|
"learning_rate": 3.881215187796344e-07, |
|
"logits/chosen": -0.17325271666049957, |
|
"logits/rejected": -0.15583127737045288, |
|
"logps/chosen": -22.642131805419922, |
|
"logps/rejected": -49.67926025390625, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09581390023231506, |
|
"rewards/margins": 5.447430610656738, |
|
"rewards/rejected": -5.351616382598877, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 3.084745762711864, |
|
"grad_norm": 4.520714234908951, |
|
"learning_rate": 3.865737633469579e-07, |
|
"logits/chosen": -0.21125821769237518, |
|
"logits/rejected": -0.16403470933437347, |
|
"logps/chosen": -33.79856872558594, |
|
"logps/rejected": -48.687171936035156, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7709572315216064, |
|
"rewards/margins": 5.184902191162109, |
|
"rewards/rejected": -5.955859661102295, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 3.1016949152542375, |
|
"grad_norm": 4.245352342549904, |
|
"learning_rate": 3.8501851012342444e-07, |
|
"logits/chosen": -0.28263112902641296, |
|
"logits/rejected": -0.24399010837078094, |
|
"logps/chosen": -29.092899322509766, |
|
"logps/rejected": -49.18566131591797, |
|
"loss": 0.0297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2785920202732086, |
|
"rewards/margins": 5.724462032318115, |
|
"rewards/rejected": -6.003054141998291, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 3.1186440677966103, |
|
"grad_norm": 4.31037076617115, |
|
"learning_rate": 3.834558444911977e-07, |
|
"logits/chosen": -0.22499172389507294, |
|
"logits/rejected": -0.2413562387228012, |
|
"logps/chosen": -28.549692153930664, |
|
"logps/rejected": -54.757652282714844, |
|
"loss": 0.044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07061734795570374, |
|
"rewards/margins": 5.652264595031738, |
|
"rewards/rejected": -5.722881317138672, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 3.135593220338983, |
|
"grad_norm": 4.866640213250526, |
|
"learning_rate": 3.818858522393763e-07, |
|
"logits/chosen": -0.14125032722949982, |
|
"logits/rejected": -0.14179250597953796, |
|
"logps/chosen": -22.976459503173828, |
|
"logps/rejected": -49.11492156982422, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.040695205330848694, |
|
"rewards/margins": 5.625366687774658, |
|
"rewards/rejected": -5.584671974182129, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.152542372881356, |
|
"grad_norm": 4.2109878427330685, |
|
"learning_rate": 3.8030861955928496e-07, |
|
"logits/chosen": -0.30937284231185913, |
|
"logits/rejected": -0.31210747361183167, |
|
"logps/chosen": -30.636043548583984, |
|
"logps/rejected": -59.81259536743164, |
|
"loss": 0.028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36846521496772766, |
|
"rewards/margins": 5.769496917724609, |
|
"rewards/rejected": -6.137962341308594, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.169491525423729, |
|
"grad_norm": 4.057066326184392, |
|
"learning_rate": 3.787242330397418e-07, |
|
"logits/chosen": -0.21361833810806274, |
|
"logits/rejected": -0.18969151377677917, |
|
"logps/chosen": -25.21249008178711, |
|
"logps/rejected": -47.042659759521484, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09672415256500244, |
|
"rewards/margins": 5.194358825683594, |
|
"rewards/rejected": -5.291082859039307, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.1864406779661016, |
|
"grad_norm": 3.447901220325472, |
|
"learning_rate": 3.7713277966230513e-07, |
|
"logits/chosen": -0.2784624397754669, |
|
"logits/rejected": -0.28683120012283325, |
|
"logps/chosen": -36.1049690246582, |
|
"logps/rejected": -57.15819549560547, |
|
"loss": 0.0284, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007891565561294556, |
|
"rewards/margins": 5.449771881103516, |
|
"rewards/rejected": -5.457663059234619, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.2033898305084745, |
|
"grad_norm": 4.442046435541958, |
|
"learning_rate": 3.755343467964981e-07, |
|
"logits/chosen": -0.31062349677085876, |
|
"logits/rejected": -0.3004721999168396, |
|
"logps/chosen": -28.58712387084961, |
|
"logps/rejected": -64.2608413696289, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4109271168708801, |
|
"rewards/margins": 7.114888668060303, |
|
"rewards/rejected": -7.525815010070801, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.2203389830508473, |
|
"grad_norm": 3.0719724662002896, |
|
"learning_rate": 3.739290221950123e-07, |
|
"logits/chosen": -0.17614498734474182, |
|
"logits/rejected": -0.1161608174443245, |
|
"logps/chosen": -19.90385627746582, |
|
"logps/rejected": -48.33121871948242, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1976543366909027, |
|
"rewards/margins": 6.546693325042725, |
|
"rewards/rejected": -6.349039077758789, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.23728813559322, |
|
"grad_norm": 3.947699710282849, |
|
"learning_rate": 3.723168939888901e-07, |
|
"logits/chosen": -0.2788640558719635, |
|
"logits/rejected": -0.2216426283121109, |
|
"logps/chosen": -31.930301666259766, |
|
"logps/rejected": -48.188316345214844, |
|
"loss": 0.035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2625292241573334, |
|
"rewards/margins": 6.2579474449157715, |
|
"rewards/rejected": -5.995418548583984, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.2542372881355934, |
|
"grad_norm": 3.948675289926565, |
|
"learning_rate": 3.7069805068268624e-07, |
|
"logits/chosen": -0.24821209907531738, |
|
"logits/rejected": -0.2691497802734375, |
|
"logps/chosen": -23.103912353515625, |
|
"logps/rejected": -45.67485427856445, |
|
"loss": 0.051, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5271704792976379, |
|
"rewards/margins": 5.4129743576049805, |
|
"rewards/rejected": -5.9401445388793945, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.2711864406779663, |
|
"grad_norm": 3.204036420155872, |
|
"learning_rate": 3.6907258114960915e-07, |
|
"logits/chosen": -0.20090129971504211, |
|
"logits/rejected": -0.1883653998374939, |
|
"logps/chosen": -21.614791870117188, |
|
"logps/rejected": -36.44792556762695, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26388826966285706, |
|
"rewards/margins": 5.233245372772217, |
|
"rewards/rejected": -5.497133255004883, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 3.288135593220339, |
|
"grad_norm": 4.608553625728515, |
|
"learning_rate": 3.6744057462664194e-07, |
|
"logits/chosen": -0.22761565446853638, |
|
"logits/rejected": -0.18411225080490112, |
|
"logps/chosen": -33.556297302246094, |
|
"logps/rejected": -45.10346984863281, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32763671875, |
|
"rewards/margins": 5.759217262268066, |
|
"rewards/rejected": -6.086853504180908, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.305084745762712, |
|
"grad_norm": 3.3003212602613052, |
|
"learning_rate": 3.658021207096432e-07, |
|
"logits/chosen": -0.26821860671043396, |
|
"logits/rejected": -0.23487797379493713, |
|
"logps/chosen": -26.26876449584961, |
|
"logps/rejected": -39.17176818847656, |
|
"loss": 0.0273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06157127395272255, |
|
"rewards/margins": 4.7874369621276855, |
|
"rewards/rejected": -4.725865364074707, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.3220338983050848, |
|
"grad_norm": 4.8557388954783915, |
|
"learning_rate": 3.6415730934842825e-07, |
|
"logits/chosen": -0.2502498924732208, |
|
"logits/rejected": -0.21418914198875427, |
|
"logps/chosen": -24.12335205078125, |
|
"logps/rejected": -39.51020431518555, |
|
"loss": 0.047, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.3541201651096344, |
|
"rewards/margins": 5.43333101272583, |
|
"rewards/rejected": -5.07921028137207, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.3389830508474576, |
|
"grad_norm": 3.1710739557100025, |
|
"learning_rate": 3.625062308418311e-07, |
|
"logits/chosen": -0.19088196754455566, |
|
"logits/rejected": -0.1449725329875946, |
|
"logps/chosen": -41.92289733886719, |
|
"logps/rejected": -52.62822341918945, |
|
"loss": 0.031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7417705059051514, |
|
"rewards/margins": 5.704789161682129, |
|
"rewards/rejected": -6.446559906005859, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 3.3559322033898304, |
|
"grad_norm": 3.8833880103526273, |
|
"learning_rate": 3.6084897583274715e-07, |
|
"logits/chosen": -0.33713212609291077, |
|
"logits/rejected": -0.32788529992103577, |
|
"logps/chosen": -18.311298370361328, |
|
"logps/rejected": -47.206260681152344, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28597769141197205, |
|
"rewards/margins": 5.729028701782227, |
|
"rewards/rejected": -6.015005588531494, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.3728813559322033, |
|
"grad_norm": 3.971746818851194, |
|
"learning_rate": 3.591856353031566e-07, |
|
"logits/chosen": -0.388487309217453, |
|
"logits/rejected": -0.3937668800354004, |
|
"logps/chosen": -20.602941513061523, |
|
"logps/rejected": -46.418514251708984, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0760723352432251, |
|
"rewards/margins": 6.133167266845703, |
|
"rewards/rejected": -6.209239959716797, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 2.669544955188557, |
|
"learning_rate": 3.5751630056913013e-07, |
|
"logits/chosen": -0.28054508566856384, |
|
"logits/rejected": -0.24293102324008942, |
|
"logps/chosen": -24.345874786376953, |
|
"logps/rejected": -43.055397033691406, |
|
"loss": 0.023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.006254836916923523, |
|
"rewards/margins": 5.404486179351807, |
|
"rewards/rejected": -5.410740375518799, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.406779661016949, |
|
"grad_norm": 3.472014476230378, |
|
"learning_rate": 3.558410632758153e-07, |
|
"logits/chosen": -0.3892117142677307, |
|
"logits/rejected": -0.3841942548751831, |
|
"logps/chosen": -22.507129669189453, |
|
"logps/rejected": -45.49005126953125, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08385208249092102, |
|
"rewards/margins": 4.855816841125488, |
|
"rewards/rejected": -4.939668655395508, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.423728813559322, |
|
"grad_norm": 3.5814887606335124, |
|
"learning_rate": 3.5416001539240574e-07, |
|
"logits/chosen": -0.300984263420105, |
|
"logits/rejected": -0.28749731183052063, |
|
"logps/chosen": -22.618236541748047, |
|
"logps/rejected": -54.328731536865234, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42270928621292114, |
|
"rewards/margins": 6.060704708099365, |
|
"rewards/rejected": -6.4834136962890625, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.440677966101695, |
|
"grad_norm": 3.9783986017754, |
|
"learning_rate": 3.5247324920709147e-07, |
|
"logits/chosen": -0.11381550878286362, |
|
"logits/rejected": -0.10474348813295364, |
|
"logps/chosen": -29.523387908935547, |
|
"logps/rejected": -44.939971923828125, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5131514668464661, |
|
"rewards/margins": 4.85312557220459, |
|
"rewards/rejected": -5.36627721786499, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.457627118644068, |
|
"grad_norm": 2.90882629880929, |
|
"learning_rate": 3.5078085732199307e-07, |
|
"logits/chosen": -0.17035694420337677, |
|
"logits/rejected": -0.14843972027301788, |
|
"logps/chosen": -24.29421615600586, |
|
"logps/rejected": -47.5906982421875, |
|
"loss": 0.027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.775327742099762, |
|
"rewards/margins": 5.236928462982178, |
|
"rewards/rejected": -6.012256145477295, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.4745762711864407, |
|
"grad_norm": 3.5359065761216906, |
|
"learning_rate": 3.490829326480773e-07, |
|
"logits/chosen": -0.2077549546957016, |
|
"logits/rejected": -0.139791339635849, |
|
"logps/chosen": -29.458728790283203, |
|
"logps/rejected": -46.196311950683594, |
|
"loss": 0.0261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43501347303390503, |
|
"rewards/margins": 5.490588188171387, |
|
"rewards/rejected": -5.925601005554199, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.4915254237288136, |
|
"grad_norm": 3.5732057063389924, |
|
"learning_rate": 3.4737956840005684e-07, |
|
"logits/chosen": -0.24159546196460724, |
|
"logits/rejected": -0.21804997324943542, |
|
"logps/chosen": -22.523195266723633, |
|
"logps/rejected": -40.27927780151367, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2203037589788437, |
|
"rewards/margins": 4.918404579162598, |
|
"rewards/rejected": -5.138708114624023, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 3.5084745762711864, |
|
"grad_norm": 3.2368948031127402, |
|
"learning_rate": 3.4567085809127245e-07, |
|
"logits/chosen": -0.3044562339782715, |
|
"logits/rejected": -0.28132855892181396, |
|
"logps/chosen": -23.9556827545166, |
|
"logps/rejected": -54.27796173095703, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4134722352027893, |
|
"rewards/margins": 6.515480041503906, |
|
"rewards/rejected": -6.928952217102051, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.5254237288135593, |
|
"grad_norm": 3.9848937033562515, |
|
"learning_rate": 3.439568955285595e-07, |
|
"logits/chosen": -0.3248399794101715, |
|
"logits/rejected": -0.2991315722465515, |
|
"logps/chosen": -19.110692977905273, |
|
"logps/rejected": -47.77824401855469, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6731768846511841, |
|
"rewards/margins": 6.201772689819336, |
|
"rewards/rejected": -6.874949932098389, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.542372881355932, |
|
"grad_norm": 3.1498741156916186, |
|
"learning_rate": 3.4223777480709804e-07, |
|
"logits/chosen": -0.3734952211380005, |
|
"logits/rejected": -0.32552629709243774, |
|
"logps/chosen": -18.623991012573242, |
|
"logps/rejected": -42.553443908691406, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30939486622810364, |
|
"rewards/margins": 5.721473217010498, |
|
"rewards/rejected": -6.030868053436279, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.559322033898305, |
|
"grad_norm": 4.040639255967625, |
|
"learning_rate": 3.405135903052465e-07, |
|
"logits/chosen": -0.4112386703491211, |
|
"logits/rejected": -0.3649882376194, |
|
"logps/chosen": -28.818723678588867, |
|
"logps/rejected": -44.70659637451172, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.48197856545448303, |
|
"rewards/margins": 5.537832260131836, |
|
"rewards/rejected": -6.019810676574707, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.576271186440678, |
|
"grad_norm": 3.3478217712753966, |
|
"learning_rate": 3.3878443667936136e-07, |
|
"logits/chosen": -0.16748064756393433, |
|
"logits/rejected": -0.19592073559761047, |
|
"logps/chosen": -37.14228439331055, |
|
"logps/rejected": -62.434722900390625, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.086951494216919, |
|
"rewards/margins": 6.2296953201293945, |
|
"rewards/rejected": -7.316647529602051, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.593220338983051, |
|
"grad_norm": 3.9463700359583074, |
|
"learning_rate": 3.3705040885859967e-07, |
|
"logits/chosen": -0.3255730867385864, |
|
"logits/rejected": -0.27438968420028687, |
|
"logps/chosen": -34.4691276550293, |
|
"logps/rejected": -47.688350677490234, |
|
"loss": 0.0187, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7244514226913452, |
|
"rewards/margins": 5.521853446960449, |
|
"rewards/rejected": -6.246304988861084, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.610169491525424, |
|
"grad_norm": 3.6196960397708686, |
|
"learning_rate": 3.3531160203970805e-07, |
|
"logits/chosen": -0.3483354151248932, |
|
"logits/rejected": -0.317913681268692, |
|
"logps/chosen": -28.75990867614746, |
|
"logps/rejected": -48.366981506347656, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.642256498336792, |
|
"rewards/margins": 5.73888635635376, |
|
"rewards/rejected": -6.381142616271973, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.6271186440677967, |
|
"grad_norm": 4.95065620942278, |
|
"learning_rate": 3.3356811168179627e-07, |
|
"logits/chosen": -0.20646288990974426, |
|
"logits/rejected": -0.18285736441612244, |
|
"logps/chosen": -29.683345794677734, |
|
"logps/rejected": -42.32093811035156, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3692317008972168, |
|
"rewards/margins": 6.064602851867676, |
|
"rewards/rejected": -6.433835029602051, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.6440677966101696, |
|
"grad_norm": 3.3699006260035813, |
|
"learning_rate": 3.318200335010967e-07, |
|
"logits/chosen": -0.42737993597984314, |
|
"logits/rejected": -0.3845828175544739, |
|
"logps/chosen": -25.335176467895508, |
|
"logps/rejected": -42.636924743652344, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28532662987709045, |
|
"rewards/margins": 6.151418685913086, |
|
"rewards/rejected": -5.866091728210449, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.6610169491525424, |
|
"grad_norm": 3.8837939121598777, |
|
"learning_rate": 3.3006746346570935e-07, |
|
"logits/chosen": -0.40326201915740967, |
|
"logits/rejected": -0.40920883417129517, |
|
"logps/chosen": -22.64775848388672, |
|
"logps/rejected": -39.44330596923828, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18482859432697296, |
|
"rewards/margins": 5.748718738555908, |
|
"rewards/rejected": -5.933547019958496, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.6779661016949152, |
|
"grad_norm": 4.333458578457773, |
|
"learning_rate": 3.2831049779033395e-07, |
|
"logits/chosen": -0.443619042634964, |
|
"logits/rejected": -0.41168978810310364, |
|
"logps/chosen": -37.534263610839844, |
|
"logps/rejected": -64.37035369873047, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5087466835975647, |
|
"rewards/margins": 7.267013072967529, |
|
"rewards/rejected": -7.775759696960449, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.694915254237288, |
|
"grad_norm": 4.250140275463436, |
|
"learning_rate": 3.2654923293098666e-07, |
|
"logits/chosen": -0.2549651861190796, |
|
"logits/rejected": -0.1890694946050644, |
|
"logps/chosen": -26.34837532043457, |
|
"logps/rejected": -43.935028076171875, |
|
"loss": 0.035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7253862023353577, |
|
"rewards/margins": 5.679473400115967, |
|
"rewards/rejected": -6.40485954284668, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.711864406779661, |
|
"grad_norm": 3.310632766464627, |
|
"learning_rate": 3.247837655797061e-07, |
|
"logits/chosen": -0.25092679262161255, |
|
"logits/rejected": -0.28778067231178284, |
|
"logps/chosen": -24.404443740844727, |
|
"logps/rejected": -47.01846694946289, |
|
"loss": 0.0223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29181113839149475, |
|
"rewards/margins": 6.665236473083496, |
|
"rewards/rejected": -6.957046985626221, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.7288135593220337, |
|
"grad_norm": 3.1436162956199496, |
|
"learning_rate": 3.2301419265924393e-07, |
|
"logits/chosen": -0.4150010645389557, |
|
"logits/rejected": -0.36361223459243774, |
|
"logps/chosen": -24.460697174072266, |
|
"logps/rejected": -44.857032775878906, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22335419058799744, |
|
"rewards/margins": 6.003718852996826, |
|
"rewards/rejected": -6.227072715759277, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.7457627118644066, |
|
"grad_norm": 3.729031618521559, |
|
"learning_rate": 3.2124061131774443e-07, |
|
"logits/chosen": -0.3509747385978699, |
|
"logits/rejected": -0.358395516872406, |
|
"logps/chosen": -24.089895248413086, |
|
"logps/rejected": -52.84262466430664, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14366820454597473, |
|
"rewards/margins": 5.806227207183838, |
|
"rewards/rejected": -5.94989538192749, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.7627118644067794, |
|
"grad_norm": 3.3066593649570315, |
|
"learning_rate": 3.194631189234109e-07, |
|
"logits/chosen": -0.4065392017364502, |
|
"logits/rejected": -0.37751972675323486, |
|
"logps/chosen": -32.56217956542969, |
|
"logps/rejected": -45.78569412231445, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3193157911300659, |
|
"rewards/margins": 5.7366862297058105, |
|
"rewards/rejected": -6.056003093719482, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.7796610169491527, |
|
"grad_norm": 2.793162644598459, |
|
"learning_rate": 3.1768181305916063e-07, |
|
"logits/chosen": -0.25837022066116333, |
|
"logits/rejected": -0.22268140316009521, |
|
"logps/chosen": -35.988895416259766, |
|
"logps/rejected": -54.8642463684082, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6435793042182922, |
|
"rewards/margins": 6.045925140380859, |
|
"rewards/rejected": -6.689504623413086, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 3.7966101694915255, |
|
"grad_norm": 5.31296637675809, |
|
"learning_rate": 3.158967915172669e-07, |
|
"logits/chosen": -0.25623688101768494, |
|
"logits/rejected": -0.2494334727525711, |
|
"logps/chosen": -25.375301361083984, |
|
"logps/rejected": -41.08918380737305, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.390929639339447, |
|
"rewards/margins": 5.473989009857178, |
|
"rewards/rejected": -5.864918231964111, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.8135593220338984, |
|
"grad_norm": 3.9032619129323582, |
|
"learning_rate": 3.141081522939911e-07, |
|
"logits/chosen": -0.31211555004119873, |
|
"logits/rejected": -0.23420506715774536, |
|
"logps/chosen": -35.506065368652344, |
|
"logps/rejected": -45.37016296386719, |
|
"loss": 0.0274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28194302320480347, |
|
"rewards/margins": 5.861372947692871, |
|
"rewards/rejected": -6.14331579208374, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.830508474576271, |
|
"grad_norm": 3.3703773992777712, |
|
"learning_rate": 3.1231599358420233e-07, |
|
"logits/chosen": -0.2667548954486847, |
|
"logits/rejected": -0.237786203622818, |
|
"logps/chosen": -25.19987678527832, |
|
"logps/rejected": -42.388084411621094, |
|
"loss": 0.0212, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0639249086380005, |
|
"rewards/margins": 5.1388325691223145, |
|
"rewards/rejected": -6.202757835388184, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 3.847457627118644, |
|
"grad_norm": 3.2509261883963583, |
|
"learning_rate": 3.105204137759867e-07, |
|
"logits/chosen": -0.35733070969581604, |
|
"logits/rejected": -0.29906269907951355, |
|
"logps/chosen": -31.326122283935547, |
|
"logps/rejected": -54.50325012207031, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14473173022270203, |
|
"rewards/margins": 6.546075820922852, |
|
"rewards/rejected": -6.690806865692139, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 3.864406779661017, |
|
"grad_norm": 4.276773716118761, |
|
"learning_rate": 3.0872151144524594e-07, |
|
"logits/chosen": -0.40903520584106445, |
|
"logits/rejected": -0.42379483580589294, |
|
"logps/chosen": -25.51406478881836, |
|
"logps/rejected": -56.04070281982422, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5232114791870117, |
|
"rewards/margins": 7.154451370239258, |
|
"rewards/rejected": -7.6776628494262695, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.8813559322033897, |
|
"grad_norm": 3.0586357868954885, |
|
"learning_rate": 3.069193853502855e-07, |
|
"logits/chosen": -0.35119858384132385, |
|
"logits/rejected": -0.31669121980667114, |
|
"logps/chosen": -26.634798049926758, |
|
"logps/rejected": -43.51852798461914, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6264432668685913, |
|
"rewards/margins": 5.7952752113342285, |
|
"rewards/rejected": -6.421718597412109, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 3.898305084745763, |
|
"grad_norm": 3.809867857045704, |
|
"learning_rate": 3.0511413442639297e-07, |
|
"logits/chosen": -0.3418273329734802, |
|
"logits/rejected": -0.3366440534591675, |
|
"logps/chosen": -26.767898559570312, |
|
"logps/rejected": -66.91107940673828, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3621063232421875, |
|
"rewards/margins": 8.028979301452637, |
|
"rewards/rejected": -9.391084671020508, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.915254237288136, |
|
"grad_norm": 1.7319311965224584, |
|
"learning_rate": 3.0330585778040675e-07, |
|
"logits/chosen": -0.22780543565750122, |
|
"logits/rejected": -0.1367052048444748, |
|
"logps/chosen": -19.499248504638672, |
|
"logps/rejected": -37.6104736328125, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3209352195262909, |
|
"rewards/margins": 6.441976070404053, |
|
"rewards/rejected": -6.1210408210754395, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.9322033898305087, |
|
"grad_norm": 2.919480742746747, |
|
"learning_rate": 3.0149465468527457e-07, |
|
"logits/chosen": -0.3633422255516052, |
|
"logits/rejected": -0.3510938286781311, |
|
"logps/chosen": -24.75160026550293, |
|
"logps/rejected": -43.96453094482422, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.055519312620162964, |
|
"rewards/margins": 6.384317874908447, |
|
"rewards/rejected": -6.328798294067383, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.9491525423728815, |
|
"grad_norm": 2.6875831264015626, |
|
"learning_rate": 2.9968062457460437e-07, |
|
"logits/chosen": -0.30877232551574707, |
|
"logits/rejected": -0.2673957049846649, |
|
"logps/chosen": -22.01394271850586, |
|
"logps/rejected": -46.45256042480469, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.722076416015625, |
|
"rewards/margins": 6.105856895446777, |
|
"rewards/rejected": -6.827932834625244, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 3.9661016949152543, |
|
"grad_norm": 5.291054230890989, |
|
"learning_rate": 2.978638670372047e-07, |
|
"logits/chosen": -0.33912044763565063, |
|
"logits/rejected": -0.2657839357852936, |
|
"logps/chosen": -30.723812103271484, |
|
"logps/rejected": -52.49626159667969, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.223615050315857, |
|
"rewards/margins": 6.520167350769043, |
|
"rewards/rejected": -7.743781089782715, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.983050847457627, |
|
"grad_norm": 4.5082449746889495, |
|
"learning_rate": 2.9604448181161755e-07, |
|
"logits/chosen": -0.2287699282169342, |
|
"logits/rejected": -0.278522789478302, |
|
"logps/chosen": -21.338584899902344, |
|
"logps/rejected": -43.86865234375, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26738277077674866, |
|
"rewards/margins": 5.24444580078125, |
|
"rewards/rejected": -5.511828422546387, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.760408994676017, |
|
"learning_rate": 2.9422256878064324e-07, |
|
"logits/chosen": -0.25730714201927185, |
|
"logits/rejected": -0.24561913311481476, |
|
"logps/chosen": -39.164676666259766, |
|
"logps/rejected": -58.313934326171875, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.588261127471924, |
|
"rewards/margins": 6.12579870223999, |
|
"rewards/rejected": -8.71406078338623, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 4.016949152542373, |
|
"grad_norm": 2.2785410277469302, |
|
"learning_rate": 2.923982279658564e-07, |
|
"logits/chosen": -0.34395280480384827, |
|
"logits/rejected": -0.23966065049171448, |
|
"logps/chosen": -38.35492706298828, |
|
"logps/rejected": -53.40243148803711, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2421057224273682, |
|
"rewards/margins": 6.917564392089844, |
|
"rewards/rejected": -8.159669876098633, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 4.033898305084746, |
|
"grad_norm": 3.2845417722614507, |
|
"learning_rate": 2.90571559522115e-07, |
|
"logits/chosen": -0.13574184477329254, |
|
"logits/rejected": -0.11650273948907852, |
|
"logps/chosen": -27.581148147583008, |
|
"logps/rejected": -39.88399887084961, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3537464737892151, |
|
"rewards/margins": 5.475383281707764, |
|
"rewards/rejected": -5.829129695892334, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 4.0508474576271185, |
|
"grad_norm": 2.1631508501013315, |
|
"learning_rate": 2.8874266373206215e-07, |
|
"logits/chosen": -0.3121250867843628, |
|
"logits/rejected": -0.24592992663383484, |
|
"logps/chosen": -29.24790382385254, |
|
"logps/rejected": -47.294334411621094, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47773995995521545, |
|
"rewards/margins": 5.754822254180908, |
|
"rewards/rejected": -6.2325615882873535, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 4.067796610169491, |
|
"grad_norm": 2.4004940122434544, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -0.36053359508514404, |
|
"logits/rejected": -0.34572604298591614, |
|
"logps/chosen": -31.519865036010742, |
|
"logps/rejected": -59.80055618286133, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.718724250793457, |
|
"rewards/margins": 7.598670959472656, |
|
"rewards/rejected": -8.317395210266113, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.084745762711864, |
|
"grad_norm": 2.6119447962907367, |
|
"learning_rate": 2.8507859184947953e-07, |
|
"logits/chosen": -0.43051332235336304, |
|
"logits/rejected": -0.4282737076282501, |
|
"logps/chosen": -26.50347137451172, |
|
"logps/rejected": -52.22574234008789, |
|
"loss": 0.0277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.289501428604126, |
|
"rewards/margins": 6.555995941162109, |
|
"rewards/rejected": -6.845498085021973, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 4.101694915254237, |
|
"grad_norm": 2.239976713467154, |
|
"learning_rate": 2.8324361691157853e-07, |
|
"logits/chosen": -0.24347716569900513, |
|
"logits/rejected": -0.24979354441165924, |
|
"logps/chosen": -30.006914138793945, |
|
"logps/rejected": -59.73139190673828, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8718441128730774, |
|
"rewards/margins": 6.680701732635498, |
|
"rewards/rejected": -7.55254602432251, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 4.11864406779661, |
|
"grad_norm": 2.6364819568694497, |
|
"learning_rate": 2.8140681692558034e-07, |
|
"logits/chosen": -0.25327029824256897, |
|
"logits/rejected": -0.21109545230865479, |
|
"logps/chosen": -29.609922409057617, |
|
"logps/rejected": -46.73149490356445, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5046648979187012, |
|
"rewards/margins": 6.538877487182617, |
|
"rewards/rejected": -7.043542385101318, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 4.135593220338983, |
|
"grad_norm": 2.1372418800599786, |
|
"learning_rate": 2.7956829273034146e-07, |
|
"logits/chosen": -0.13386383652687073, |
|
"logits/rejected": -0.1250249445438385, |
|
"logps/chosen": -26.58926773071289, |
|
"logps/rejected": -51.22819900512695, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8299556970596313, |
|
"rewards/margins": 6.830400466918945, |
|
"rewards/rejected": -7.660356521606445, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 4.1525423728813555, |
|
"grad_norm": 2.9141398948843804, |
|
"learning_rate": 2.7772814525937634e-07, |
|
"logits/chosen": -0.32944080233573914, |
|
"logits/rejected": -0.27718019485473633, |
|
"logps/chosen": -28.87648582458496, |
|
"logps/rejected": -48.459808349609375, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7797695994377136, |
|
"rewards/margins": 6.1633992195129395, |
|
"rewards/rejected": -6.943169116973877, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 4.169491525423728, |
|
"grad_norm": 2.497866238527004, |
|
"learning_rate": 2.7588647553531576e-07, |
|
"logits/chosen": -0.25429630279541016, |
|
"logits/rejected": -0.23005954921245575, |
|
"logps/chosen": -25.186725616455078, |
|
"logps/rejected": -55.01511001586914, |
|
"loss": 0.0223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38165754079818726, |
|
"rewards/margins": 7.027002334594727, |
|
"rewards/rejected": -7.408658981323242, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 4.186440677966102, |
|
"grad_norm": 1.7406944144206382, |
|
"learning_rate": 2.7404338466436116e-07, |
|
"logits/chosen": -0.2958889901638031, |
|
"logits/rejected": -0.26341933012008667, |
|
"logps/chosen": -28.1710205078125, |
|
"logps/rejected": -50.88844299316406, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06356866657733917, |
|
"rewards/margins": 7.469226360321045, |
|
"rewards/rejected": -7.532794952392578, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 4.203389830508475, |
|
"grad_norm": 2.330847556376873, |
|
"learning_rate": 2.721989738307337e-07, |
|
"logits/chosen": -0.3691413104534149, |
|
"logits/rejected": -0.35948917269706726, |
|
"logps/chosen": -29.122577667236328, |
|
"logps/rejected": -46.696510314941406, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09089075028896332, |
|
"rewards/margins": 4.961187839508057, |
|
"rewards/rejected": -5.052079200744629, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 4.220338983050848, |
|
"grad_norm": 2.5507390864394046, |
|
"learning_rate": 2.7035334429111955e-07, |
|
"logits/chosen": -0.22923773527145386, |
|
"logits/rejected": -0.1796061396598816, |
|
"logps/chosen": -37.402748107910156, |
|
"logps/rejected": -61.04646682739258, |
|
"loss": 0.0203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6148930191993713, |
|
"rewards/margins": 6.983782768249512, |
|
"rewards/rejected": -7.598675727844238, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"grad_norm": 1.95547934634835, |
|
"learning_rate": 2.685065973691107e-07, |
|
"logits/chosen": -0.20895695686340332, |
|
"logits/rejected": -0.2264058142900467, |
|
"logps/chosen": -31.016735076904297, |
|
"logps/rejected": -56.749725341796875, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8533796072006226, |
|
"rewards/margins": 6.410269737243652, |
|
"rewards/rejected": -7.2636494636535645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.254237288135593, |
|
"grad_norm": 2.1411469355757973, |
|
"learning_rate": 2.6665883444964277e-07, |
|
"logits/chosen": -0.16789795458316803, |
|
"logits/rejected": -0.14672429859638214, |
|
"logps/chosen": -23.094444274902344, |
|
"logps/rejected": -55.99787139892578, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8806984424591064, |
|
"rewards/margins": 8.1028470993042, |
|
"rewards/rejected": -8.983545303344727, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 4.271186440677966, |
|
"grad_norm": 2.372366174155855, |
|
"learning_rate": 2.6481015697342856e-07, |
|
"logits/chosen": -0.3404889404773712, |
|
"logits/rejected": -0.32007667422294617, |
|
"logps/chosen": -19.16732406616211, |
|
"logps/rejected": -42.858253479003906, |
|
"loss": 0.018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46728387475013733, |
|
"rewards/margins": 5.529178619384766, |
|
"rewards/rejected": -5.996462345123291, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.288135593220339, |
|
"grad_norm": 1.958723562417606, |
|
"learning_rate": 2.629606664313896e-07, |
|
"logits/chosen": -0.35188454389572144, |
|
"logits/rejected": -0.3609326481819153, |
|
"logps/chosen": -25.61526107788086, |
|
"logps/rejected": -50.27090072631836, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.02239191532135, |
|
"rewards/margins": 6.072734832763672, |
|
"rewards/rejected": -7.095126628875732, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 4.305084745762712, |
|
"grad_norm": 2.0112122888894115, |
|
"learning_rate": 2.611104643590838e-07, |
|
"logits/chosen": -0.29033514857292175, |
|
"logits/rejected": -0.26703035831451416, |
|
"logps/chosen": -21.255908966064453, |
|
"logps/rejected": -53.08380126953125, |
|
"loss": 0.0213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24649456143379211, |
|
"rewards/margins": 7.01984977722168, |
|
"rewards/rejected": -7.26634407043457, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 4.322033898305085, |
|
"grad_norm": 2.053603221627952, |
|
"learning_rate": 2.592596523311317e-07, |
|
"logits/chosen": -0.30223536491394043, |
|
"logits/rejected": -0.2536553740501404, |
|
"logps/chosen": -32.25640106201172, |
|
"logps/rejected": -43.348167419433594, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06762519478797913, |
|
"rewards/margins": 6.735665321350098, |
|
"rewards/rejected": -6.803289890289307, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.338983050847458, |
|
"grad_norm": 2.8451217392600707, |
|
"learning_rate": 2.5740833195563994e-07, |
|
"logits/chosen": -0.3592408299446106, |
|
"logits/rejected": -0.32396936416625977, |
|
"logps/chosen": -29.65281867980957, |
|
"logps/rejected": -46.565242767333984, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1844983100891113, |
|
"rewards/margins": 5.651597499847412, |
|
"rewards/rejected": -6.836095809936523, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 4.3559322033898304, |
|
"grad_norm": 2.4064312299996398, |
|
"learning_rate": 2.5555660486862293e-07, |
|
"logits/chosen": -0.3634299039840698, |
|
"logits/rejected": -0.3088497817516327, |
|
"logps/chosen": -28.253725051879883, |
|
"logps/rejected": -48.81061553955078, |
|
"loss": 0.021, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4266662895679474, |
|
"rewards/margins": 6.367308616638184, |
|
"rewards/rejected": -6.793975353240967, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 4.372881355932203, |
|
"grad_norm": 2.146631041454485, |
|
"learning_rate": 2.5370457272842315e-07, |
|
"logits/chosen": -0.24686959385871887, |
|
"logits/rejected": -0.18535006046295166, |
|
"logps/chosen": -33.190582275390625, |
|
"logps/rejected": -48.917503356933594, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8520054817199707, |
|
"rewards/margins": 5.909256935119629, |
|
"rewards/rejected": -6.761262893676758, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 4.389830508474576, |
|
"grad_norm": 2.0738992157558642, |
|
"learning_rate": 2.5185233721013053e-07, |
|
"logits/chosen": -0.359385222196579, |
|
"logits/rejected": -0.357438325881958, |
|
"logps/chosen": -24.519697189331055, |
|
"logps/rejected": -44.44859313964844, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16271838545799255, |
|
"rewards/margins": 6.145666122436523, |
|
"rewards/rejected": -6.308384418487549, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 4.406779661016949, |
|
"grad_norm": 3.2910145632235572, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.11309901624917984, |
|
"logits/rejected": -0.11735934764146805, |
|
"logps/chosen": -24.769031524658203, |
|
"logps/rejected": -52.153263092041016, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5375908017158508, |
|
"rewards/margins": 6.348197937011719, |
|
"rewards/rejected": -6.885788917541504, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.423728813559322, |
|
"grad_norm": 1.9904353477375836, |
|
"learning_rate": 2.4814766278986944e-07, |
|
"logits/chosen": -0.3224155604839325, |
|
"logits/rejected": -0.2858419716358185, |
|
"logps/chosen": -29.066646575927734, |
|
"logps/rejected": -63.335533142089844, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6143862009048462, |
|
"rewards/margins": 7.916276931762695, |
|
"rewards/rejected": -8.530662536621094, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 4.440677966101695, |
|
"grad_norm": 2.8941606742565, |
|
"learning_rate": 2.462954272715768e-07, |
|
"logits/chosen": -0.450508177280426, |
|
"logits/rejected": -0.4239945411682129, |
|
"logps/chosen": -35.900840759277344, |
|
"logps/rejected": -45.3778190612793, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7406729459762573, |
|
"rewards/margins": 4.7124552726745605, |
|
"rewards/rejected": -6.453128337860107, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 4.4576271186440675, |
|
"grad_norm": 2.38515146244392, |
|
"learning_rate": 2.4444339513137716e-07, |
|
"logits/chosen": -0.38119906187057495, |
|
"logits/rejected": -0.36609771847724915, |
|
"logps/chosen": -30.711692810058594, |
|
"logps/rejected": -60.621646881103516, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.59708571434021, |
|
"rewards/margins": 8.203582763671875, |
|
"rewards/rejected": -8.800668716430664, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 4.47457627118644, |
|
"grad_norm": 1.9234760349513347, |
|
"learning_rate": 2.4259166804436003e-07, |
|
"logits/chosen": -0.3686653971672058, |
|
"logits/rejected": -0.31526994705200195, |
|
"logps/chosen": -32.2381706237793, |
|
"logps/rejected": -53.97626876831055, |
|
"loss": 0.0127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1651465892791748, |
|
"rewards/margins": 6.137485027313232, |
|
"rewards/rejected": -7.302631855010986, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 4.491525423728813, |
|
"grad_norm": 2.611964451389311, |
|
"learning_rate": 2.4074034766886826e-07, |
|
"logits/chosen": -0.3324103355407715, |
|
"logits/rejected": -0.26400357484817505, |
|
"logps/chosen": -23.483598709106445, |
|
"logps/rejected": -49.0655632019043, |
|
"loss": 0.0188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20981693267822266, |
|
"rewards/margins": 7.410755157470703, |
|
"rewards/rejected": -7.620572090148926, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.508474576271187, |
|
"grad_norm": 2.9683559733463056, |
|
"learning_rate": 2.3888953564091616e-07, |
|
"logits/chosen": -0.39179760217666626, |
|
"logits/rejected": -0.38096728920936584, |
|
"logps/chosen": -31.189739227294922, |
|
"logps/rejected": -53.24143600463867, |
|
"loss": 0.0197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5632021427154541, |
|
"rewards/margins": 6.741451263427734, |
|
"rewards/rejected": -7.304653167724609, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 4.52542372881356, |
|
"grad_norm": 2.385454067550593, |
|
"learning_rate": 2.3703933356861044e-07, |
|
"logits/chosen": -0.41365846991539, |
|
"logits/rejected": -0.41495996713638306, |
|
"logps/chosen": -29.90151596069336, |
|
"logps/rejected": -53.238502502441406, |
|
"loss": 0.0179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4617680311203003, |
|
"rewards/margins": 7.544755458831787, |
|
"rewards/rejected": -9.006523132324219, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 4.5423728813559325, |
|
"grad_norm": 2.2006915125969946, |
|
"learning_rate": 2.3518984302657144e-07, |
|
"logits/chosen": -0.27264100313186646, |
|
"logits/rejected": -0.29000911116600037, |
|
"logps/chosen": -22.446334838867188, |
|
"logps/rejected": -57.679481506347656, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5886446833610535, |
|
"rewards/margins": 7.639779090881348, |
|
"rewards/rejected": -8.228424072265625, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.559322033898305, |
|
"grad_norm": 2.150067392598777, |
|
"learning_rate": 2.333411655503572e-07, |
|
"logits/chosen": -0.2162581980228424, |
|
"logits/rejected": -0.16464056074619293, |
|
"logps/chosen": -29.03925323486328, |
|
"logps/rejected": -61.93821716308594, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7494891881942749, |
|
"rewards/margins": 8.04469108581543, |
|
"rewards/rejected": -8.794179916381836, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 4.576271186440678, |
|
"grad_norm": 3.2103132884907355, |
|
"learning_rate": 2.3149340263088927e-07, |
|
"logits/chosen": -0.4069588780403137, |
|
"logits/rejected": -0.39735129475593567, |
|
"logps/chosen": -25.142169952392578, |
|
"logps/rejected": -54.4061279296875, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7782204747200012, |
|
"rewards/margins": 7.401907920837402, |
|
"rewards/rejected": -8.18012809753418, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.593220338983051, |
|
"grad_norm": 3.4106686634255814, |
|
"learning_rate": 2.296466557088805e-07, |
|
"logits/chosen": -0.4093379080295563, |
|
"logits/rejected": -0.3818233013153076, |
|
"logps/chosen": -24.618453979492188, |
|
"logps/rejected": -53.87172317504883, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7368046045303345, |
|
"rewards/margins": 8.036300659179688, |
|
"rewards/rejected": -8.773106575012207, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 4.610169491525424, |
|
"grad_norm": 2.227751631839648, |
|
"learning_rate": 2.278010261692663e-07, |
|
"logits/chosen": -0.3430100679397583, |
|
"logits/rejected": -0.32270756363868713, |
|
"logps/chosen": -27.739946365356445, |
|
"logps/rejected": -50.70249938964844, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3157005310058594, |
|
"rewards/margins": 6.844423294067383, |
|
"rewards/rejected": -8.160122871398926, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 4.627118644067797, |
|
"grad_norm": 2.718467637449855, |
|
"learning_rate": 2.2595661533563887e-07, |
|
"logits/chosen": -0.39202579855918884, |
|
"logits/rejected": -0.37344199419021606, |
|
"logps/chosen": -28.954833984375, |
|
"logps/rejected": -52.06825256347656, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1071019172668457, |
|
"rewards/margins": 5.494106769561768, |
|
"rewards/rejected": -6.6012091636657715, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.6440677966101696, |
|
"grad_norm": 1.8979857813927623, |
|
"learning_rate": 2.2411352446468424e-07, |
|
"logits/chosen": -0.2902525067329407, |
|
"logits/rejected": -0.2769823372364044, |
|
"logps/chosen": -21.65315055847168, |
|
"logps/rejected": -53.80813980102539, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.367279589176178, |
|
"rewards/margins": 7.097145080566406, |
|
"rewards/rejected": -7.464425086975098, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 4.661016949152542, |
|
"grad_norm": 1.9652537606332783, |
|
"learning_rate": 2.2227185474062374e-07, |
|
"logits/chosen": -0.3663102984428406, |
|
"logits/rejected": -0.3732694983482361, |
|
"logps/chosen": -25.794607162475586, |
|
"logps/rejected": -52.91725540161133, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9790402054786682, |
|
"rewards/margins": 6.477062702178955, |
|
"rewards/rejected": -7.4561028480529785, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.677966101694915, |
|
"grad_norm": 2.3633232838068854, |
|
"learning_rate": 2.2043170726965857e-07, |
|
"logits/chosen": -0.3861359655857086, |
|
"logits/rejected": -0.33153507113456726, |
|
"logps/chosen": -26.395111083984375, |
|
"logps/rejected": -46.5584716796875, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21754157543182373, |
|
"rewards/margins": 6.467673301696777, |
|
"rewards/rejected": -6.685215473175049, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.694915254237288, |
|
"grad_norm": 2.1756252476018925, |
|
"learning_rate": 2.1859318307441966e-07, |
|
"logits/chosen": -0.3655955493450165, |
|
"logits/rejected": -0.2858305871486664, |
|
"logps/chosen": -31.3674373626709, |
|
"logps/rejected": -55.38779067993164, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8195254802703857, |
|
"rewards/margins": 7.775407314300537, |
|
"rewards/rejected": -8.594932556152344, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 4.711864406779661, |
|
"grad_norm": 2.241164329559457, |
|
"learning_rate": 2.1675638308842142e-07, |
|
"logits/chosen": -0.32866764068603516, |
|
"logits/rejected": -0.3286994397640228, |
|
"logps/chosen": -23.2701358795166, |
|
"logps/rejected": -50.79416275024414, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15657079219818115, |
|
"rewards/margins": 7.880356788635254, |
|
"rewards/rejected": -7.723785877227783, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 4.728813559322034, |
|
"grad_norm": 2.2629759157234983, |
|
"learning_rate": 2.149214081505205e-07, |
|
"logits/chosen": -0.36036401987075806, |
|
"logits/rejected": -0.29053574800491333, |
|
"logps/chosen": -28.8673095703125, |
|
"logps/rejected": -45.81161880493164, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12538844347000122, |
|
"rewards/margins": 7.48973274230957, |
|
"rewards/rejected": -7.615121841430664, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.745762711864407, |
|
"grad_norm": 2.9812316443120133, |
|
"learning_rate": 2.1308835899937972e-07, |
|
"logits/chosen": -0.4776584506034851, |
|
"logits/rejected": -0.4330436587333679, |
|
"logps/chosen": -26.34911346435547, |
|
"logps/rejected": -46.94022750854492, |
|
"loss": 0.0167, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6425644159317017, |
|
"rewards/margins": 6.7085676193237305, |
|
"rewards/rejected": -7.351131916046143, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.762711864406779, |
|
"grad_norm": 3.0770144105013757, |
|
"learning_rate": 2.112573362679379e-07, |
|
"logits/chosen": -0.3524860143661499, |
|
"logits/rejected": -0.35296574234962463, |
|
"logps/chosen": -36.912437438964844, |
|
"logps/rejected": -64.40142059326172, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4731464087963104, |
|
"rewards/margins": 7.376462936401367, |
|
"rewards/rejected": -7.849608421325684, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 4.779661016949152, |
|
"grad_norm": 3.0797571681448845, |
|
"learning_rate": 2.09428440477885e-07, |
|
"logits/chosen": -0.5038030743598938, |
|
"logits/rejected": -0.3990883231163025, |
|
"logps/chosen": -24.284114837646484, |
|
"logps/rejected": -54.10459518432617, |
|
"loss": 0.0198, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30106982588768005, |
|
"rewards/margins": 9.012360572814941, |
|
"rewards/rejected": -9.313429832458496, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.796610169491525, |
|
"grad_norm": 1.96025124354275, |
|
"learning_rate": 2.0760177203414366e-07, |
|
"logits/chosen": -0.46829330921173096, |
|
"logits/rejected": -0.42585426568984985, |
|
"logps/chosen": -30.78460693359375, |
|
"logps/rejected": -44.711978912353516, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8381346464157104, |
|
"rewards/margins": 7.145539283752441, |
|
"rewards/rejected": -7.983673572540283, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 4.813559322033898, |
|
"grad_norm": 2.5513885163665013, |
|
"learning_rate": 2.0577743121935682e-07, |
|
"logits/chosen": -0.30383074283599854, |
|
"logits/rejected": -0.2893516719341278, |
|
"logps/chosen": -24.012680053710938, |
|
"logps/rejected": -55.98198318481445, |
|
"loss": 0.0228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7660890221595764, |
|
"rewards/margins": 6.679078578948975, |
|
"rewards/rejected": -7.445167064666748, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.830508474576272, |
|
"grad_norm": 2.2427849156465443, |
|
"learning_rate": 2.0395551818838243e-07, |
|
"logits/chosen": -0.3513972759246826, |
|
"logits/rejected": -0.36794793605804443, |
|
"logps/chosen": -35.454872131347656, |
|
"logps/rejected": -58.40122985839844, |
|
"loss": 0.0223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9875959753990173, |
|
"rewards/margins": 7.528386116027832, |
|
"rewards/rejected": -8.515982627868652, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.847457627118644, |
|
"grad_norm": 2.178682023578529, |
|
"learning_rate": 2.021361329627953e-07, |
|
"logits/chosen": -0.348906934261322, |
|
"logits/rejected": -0.2906019985675812, |
|
"logps/chosen": -21.8374080657959, |
|
"logps/rejected": -54.439029693603516, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.657446026802063, |
|
"rewards/margins": 7.482587814331055, |
|
"rewards/rejected": -8.140033721923828, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 4.864406779661017, |
|
"grad_norm": 2.548002935250282, |
|
"learning_rate": 2.003193754253957e-07, |
|
"logits/chosen": -0.3012135624885559, |
|
"logits/rejected": -0.2940428555011749, |
|
"logps/chosen": -28.2236385345459, |
|
"logps/rejected": -47.59425354003906, |
|
"loss": 0.02, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8078848123550415, |
|
"rewards/margins": 6.637413024902344, |
|
"rewards/rejected": -7.445297718048096, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 4.88135593220339, |
|
"grad_norm": 2.659959937090622, |
|
"learning_rate": 1.9850534531472544e-07, |
|
"logits/chosen": -0.3548402488231659, |
|
"logits/rejected": -0.3299209475517273, |
|
"logps/chosen": -26.74940299987793, |
|
"logps/rejected": -50.08027648925781, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9076037406921387, |
|
"rewards/margins": 6.909872531890869, |
|
"rewards/rejected": -7.817476272583008, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.898305084745763, |
|
"grad_norm": 2.1597951514795297, |
|
"learning_rate": 1.966941422195933e-07, |
|
"logits/chosen": -0.3348950147628784, |
|
"logits/rejected": -0.3803963363170624, |
|
"logps/chosen": -27.04452133178711, |
|
"logps/rejected": -57.45584487915039, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.858380913734436, |
|
"rewards/margins": 7.073944091796875, |
|
"rewards/rejected": -7.9323248863220215, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 4.915254237288136, |
|
"grad_norm": 2.8616875849096095, |
|
"learning_rate": 1.94885865573607e-07, |
|
"logits/chosen": -0.42029163241386414, |
|
"logits/rejected": -0.40385907888412476, |
|
"logps/chosen": -21.713485717773438, |
|
"logps/rejected": -51.253334045410156, |
|
"loss": 0.039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.67279452085495, |
|
"rewards/margins": 7.22087287902832, |
|
"rewards/rejected": -7.893667221069336, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.932203389830509, |
|
"grad_norm": 2.4652867757867347, |
|
"learning_rate": 1.930806146497146e-07, |
|
"logits/chosen": -0.3921091556549072, |
|
"logits/rejected": -0.37878188490867615, |
|
"logps/chosen": -24.921491622924805, |
|
"logps/rejected": -50.15573501586914, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5007016658782959, |
|
"rewards/margins": 6.720961093902588, |
|
"rewards/rejected": -7.221663475036621, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.9491525423728815, |
|
"grad_norm": 2.8758915083893832, |
|
"learning_rate": 1.912784885547541e-07, |
|
"logits/chosen": -0.257066547870636, |
|
"logits/rejected": -0.24492767453193665, |
|
"logps/chosen": -28.24458885192871, |
|
"logps/rejected": -53.056297302246094, |
|
"loss": 0.0208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5785134434700012, |
|
"rewards/margins": 5.2356109619140625, |
|
"rewards/rejected": -5.814124584197998, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 4.966101694915254, |
|
"grad_norm": 2.5031202245992956, |
|
"learning_rate": 1.8947958622401328e-07, |
|
"logits/chosen": -0.3068751394748688, |
|
"logits/rejected": -0.321804940700531, |
|
"logps/chosen": -25.078857421875, |
|
"logps/rejected": -51.03053283691406, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7383342385292053, |
|
"rewards/margins": 6.418414115905762, |
|
"rewards/rejected": -7.156747817993164, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 4.983050847457627, |
|
"grad_norm": 2.270555425985558, |
|
"learning_rate": 1.876840064157976e-07, |
|
"logits/chosen": -0.3506714403629303, |
|
"logits/rejected": -0.35707730054855347, |
|
"logps/chosen": -26.248760223388672, |
|
"logps/rejected": -51.72494888305664, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8412652611732483, |
|
"rewards/margins": 6.9783101081848145, |
|
"rewards/rejected": -7.819576263427734, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.0981316143274804, |
|
"learning_rate": 1.858918477060089e-07, |
|
"logits/chosen": -0.3620571792125702, |
|
"logits/rejected": -0.31848618388175964, |
|
"logps/chosen": -24.052921295166016, |
|
"logps/rejected": -48.84006881713867, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8626826405525208, |
|
"rewards/margins": 6.730321407318115, |
|
"rewards/rejected": -7.5930047035217285, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.016949152542373, |
|
"grad_norm": 1.7908906466642667, |
|
"learning_rate": 1.8410320848273313e-07, |
|
"logits/chosen": -0.40287381410598755, |
|
"logits/rejected": -0.383707195520401, |
|
"logps/chosen": -21.60245704650879, |
|
"logps/rejected": -49.50798797607422, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0992738008499146, |
|
"rewards/margins": 6.644189834594727, |
|
"rewards/rejected": -7.743463516235352, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 5.033898305084746, |
|
"grad_norm": 1.8189459242932866, |
|
"learning_rate": 1.8231818694083938e-07, |
|
"logits/chosen": -0.2570793330669403, |
|
"logits/rejected": -0.19739127159118652, |
|
"logps/chosen": -38.83268356323242, |
|
"logps/rejected": -66.01898956298828, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5877269506454468, |
|
"rewards/margins": 8.778990745544434, |
|
"rewards/rejected": -10.366718292236328, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 5.0508474576271185, |
|
"grad_norm": 2.1580633851808253, |
|
"learning_rate": 1.8053688107658905e-07, |
|
"logits/chosen": -0.40840768814086914, |
|
"logits/rejected": -0.375863254070282, |
|
"logps/chosen": -23.65566062927246, |
|
"logps/rejected": -42.537147521972656, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23549441993236542, |
|
"rewards/margins": 6.186650276184082, |
|
"rewards/rejected": -6.422145366668701, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 5.067796610169491, |
|
"grad_norm": 2.1748249256852206, |
|
"learning_rate": 1.787593886822556e-07, |
|
"logits/chosen": -0.23409932851791382, |
|
"logits/rejected": -0.25459229946136475, |
|
"logps/chosen": -24.52175521850586, |
|
"logps/rejected": -57.98483657836914, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7716866135597229, |
|
"rewards/margins": 7.725409030914307, |
|
"rewards/rejected": -8.497096061706543, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"grad_norm": 1.8537224482358896, |
|
"learning_rate": 1.7698580734075607e-07, |
|
"logits/chosen": -0.2868376672267914, |
|
"logits/rejected": -0.22661691904067993, |
|
"logps/chosen": -28.01044273376465, |
|
"logps/rejected": -51.4971923828125, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0641169548034668, |
|
"rewards/margins": 6.613253593444824, |
|
"rewards/rejected": -7.677370071411133, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.101694915254237, |
|
"grad_norm": 1.2874213290715422, |
|
"learning_rate": 1.7521623442029388e-07, |
|
"logits/chosen": -0.24358531832695007, |
|
"logits/rejected": -0.23622053861618042, |
|
"logps/chosen": -23.474660873413086, |
|
"logps/rejected": -57.48131561279297, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7229022979736328, |
|
"rewards/margins": 7.076157569885254, |
|
"rewards/rejected": -7.799059867858887, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 5.11864406779661, |
|
"grad_norm": 2.1339366626101572, |
|
"learning_rate": 1.7345076706901326e-07, |
|
"logits/chosen": -0.3415279984474182, |
|
"logits/rejected": -0.3399394154548645, |
|
"logps/chosen": -32.244102478027344, |
|
"logps/rejected": -61.445579528808594, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.47477126121521, |
|
"rewards/margins": 7.632542133331299, |
|
"rewards/rejected": -9.10731315612793, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 5.135593220338983, |
|
"grad_norm": 1.9591512485372344, |
|
"learning_rate": 1.7168950220966614e-07, |
|
"logits/chosen": -0.2298712432384491, |
|
"logits/rejected": -0.22750090062618256, |
|
"logps/chosen": -29.4824161529541, |
|
"logps/rejected": -53.65066146850586, |
|
"loss": 0.0235, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0305908918380737, |
|
"rewards/margins": 6.653254985809326, |
|
"rewards/rejected": -7.683846473693848, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 5.1525423728813555, |
|
"grad_norm": 2.5880324875437477, |
|
"learning_rate": 1.6993253653429062e-07, |
|
"logits/chosen": -0.3975529372692108, |
|
"logits/rejected": -0.3743340075016022, |
|
"logps/chosen": -34.480491638183594, |
|
"logps/rejected": -58.636436462402344, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9836530685424805, |
|
"rewards/margins": 6.951813697814941, |
|
"rewards/rejected": -8.935466766357422, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 5.169491525423728, |
|
"grad_norm": 2.067798884368433, |
|
"learning_rate": 1.681799664989033e-07, |
|
"logits/chosen": -0.2536097764968872, |
|
"logits/rejected": -0.23771128058433533, |
|
"logps/chosen": -23.92715072631836, |
|
"logps/rejected": -39.59846878051758, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.060520462691783905, |
|
"rewards/margins": 5.955630302429199, |
|
"rewards/rejected": -5.8951096534729, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 5.186440677966102, |
|
"grad_norm": 1.848984932148484, |
|
"learning_rate": 1.6643188831820374e-07, |
|
"logits/chosen": -0.31347960233688354, |
|
"logits/rejected": -0.33331871032714844, |
|
"logps/chosen": -27.515384674072266, |
|
"logps/rejected": -56.68106460571289, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7880456447601318, |
|
"rewards/margins": 8.233735084533691, |
|
"rewards/rejected": -10.021780967712402, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 5.203389830508475, |
|
"grad_norm": 1.73302837343722, |
|
"learning_rate": 1.6468839796029198e-07, |
|
"logits/chosen": -0.4207502603530884, |
|
"logits/rejected": -0.44503217935562134, |
|
"logps/chosen": -32.272743225097656, |
|
"logps/rejected": -66.51268005371094, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1082534790039062, |
|
"rewards/margins": 7.719623565673828, |
|
"rewards/rejected": -8.827877044677734, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 5.220338983050848, |
|
"grad_norm": 1.8347981610660942, |
|
"learning_rate": 1.6294959114140033e-07, |
|
"logits/chosen": -0.48544037342071533, |
|
"logits/rejected": -0.4978610873222351, |
|
"logps/chosen": -29.223657608032227, |
|
"logps/rejected": -50.626705169677734, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6578260660171509, |
|
"rewards/margins": 6.7553253173828125, |
|
"rewards/rejected": -7.413151264190674, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 5.237288135593221, |
|
"grad_norm": 1.7157007652326588, |
|
"learning_rate": 1.6121556332063861e-07, |
|
"logits/chosen": -0.3168514370918274, |
|
"logits/rejected": -0.2793565094470978, |
|
"logps/chosen": -35.01051712036133, |
|
"logps/rejected": -47.876895904541016, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6407256722450256, |
|
"rewards/margins": 6.404169082641602, |
|
"rewards/rejected": -7.044894218444824, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 5.254237288135593, |
|
"grad_norm": 2.1874539268273816, |
|
"learning_rate": 1.5948640969475345e-07, |
|
"logits/chosen": -0.3500838875770569, |
|
"logits/rejected": -0.3151024580001831, |
|
"logps/chosen": -22.813232421875, |
|
"logps/rejected": -44.60185241699219, |
|
"loss": 0.0179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16966784000396729, |
|
"rewards/margins": 7.1504058837890625, |
|
"rewards/rejected": -7.32007360458374, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.271186440677966, |
|
"grad_norm": 1.5893639618489923, |
|
"learning_rate": 1.5776222519290204e-07, |
|
"logits/chosen": -0.5237964987754822, |
|
"logits/rejected": -0.5302670001983643, |
|
"logps/chosen": -25.384904861450195, |
|
"logps/rejected": -52.52655792236328, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9161776304244995, |
|
"rewards/margins": 7.243150234222412, |
|
"rewards/rejected": -8.15932846069336, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 5.288135593220339, |
|
"grad_norm": 1.507891171137426, |
|
"learning_rate": 1.560431044714405e-07, |
|
"logits/chosen": -0.388788104057312, |
|
"logits/rejected": -0.3403037488460541, |
|
"logps/chosen": -34.243717193603516, |
|
"logps/rejected": -60.99458312988281, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.348089575767517, |
|
"rewards/margins": 7.640527248382568, |
|
"rewards/rejected": -8.988616943359375, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 5.305084745762712, |
|
"grad_norm": 2.10463616748223, |
|
"learning_rate": 1.5432914190872756e-07, |
|
"logits/chosen": -0.3692334294319153, |
|
"logits/rejected": -0.349362313747406, |
|
"logps/chosen": -26.829898834228516, |
|
"logps/rejected": -47.90415954589844, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8458276987075806, |
|
"rewards/margins": 6.770244121551514, |
|
"rewards/rejected": -7.6160712242126465, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 5.322033898305085, |
|
"grad_norm": 1.926573035403958, |
|
"learning_rate": 1.5262043159994314e-07, |
|
"logits/chosen": -0.44576406478881836, |
|
"logits/rejected": -0.39015570282936096, |
|
"logps/chosen": -24.399137496948242, |
|
"logps/rejected": -62.678646087646484, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23113268613815308, |
|
"rewards/margins": 9.731943130493164, |
|
"rewards/rejected": -9.963075637817383, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 5.338983050847458, |
|
"grad_norm": 1.7795060387621737, |
|
"learning_rate": 1.5091706735192266e-07, |
|
"logits/chosen": -0.3505421280860901, |
|
"logits/rejected": -0.3113071322441101, |
|
"logps/chosen": -22.754703521728516, |
|
"logps/rejected": -58.468963623046875, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9263325929641724, |
|
"rewards/margins": 7.451290130615234, |
|
"rewards/rejected": -8.377622604370117, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.3559322033898304, |
|
"grad_norm": 2.145087625077026, |
|
"learning_rate": 1.4921914267800699e-07, |
|
"logits/chosen": -0.3622016906738281, |
|
"logits/rejected": -0.3543117642402649, |
|
"logps/chosen": -20.036022186279297, |
|
"logps/rejected": -38.42483901977539, |
|
"loss": 0.017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31162628531455994, |
|
"rewards/margins": 5.150010108947754, |
|
"rewards/rejected": -5.461635589599609, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 5.372881355932203, |
|
"grad_norm": 1.7053004335113204, |
|
"learning_rate": 1.4752675079290848e-07, |
|
"logits/chosen": -0.31497931480407715, |
|
"logits/rejected": -0.2895013391971588, |
|
"logps/chosen": -28.917264938354492, |
|
"logps/rejected": -43.556121826171875, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9403586983680725, |
|
"rewards/margins": 5.686085224151611, |
|
"rewards/rejected": -6.626444339752197, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 5.389830508474576, |
|
"grad_norm": 1.6608947087640378, |
|
"learning_rate": 1.458399846075942e-07, |
|
"logits/chosen": -0.5058786273002625, |
|
"logits/rejected": -0.47814008593559265, |
|
"logps/chosen": -31.875675201416016, |
|
"logps/rejected": -60.682525634765625, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1719341278076172, |
|
"rewards/margins": 7.2327094078063965, |
|
"rewards/rejected": -8.404644012451172, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 5.406779661016949, |
|
"grad_norm": 2.159091958032586, |
|
"learning_rate": 1.441589367241846e-07, |
|
"logits/chosen": -0.3478569984436035, |
|
"logits/rejected": -0.3360307216644287, |
|
"logps/chosen": -25.11379051208496, |
|
"logps/rejected": -47.88860321044922, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6650610566139221, |
|
"rewards/margins": 6.3550872802734375, |
|
"rewards/rejected": -7.020147323608398, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 5.423728813559322, |
|
"grad_norm": 2.576395194299276, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -0.41911399364471436, |
|
"logits/rejected": -0.3637450933456421, |
|
"logps/chosen": -29.785608291625977, |
|
"logps/rejected": -50.008182525634766, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31600964069366455, |
|
"rewards/margins": 7.043180465698242, |
|
"rewards/rejected": -7.359189510345459, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.440677966101695, |
|
"grad_norm": 2.1653315983567416, |
|
"learning_rate": 1.4081436469684337e-07, |
|
"logits/chosen": -0.32830509543418884, |
|
"logits/rejected": -0.317745566368103, |
|
"logps/chosen": -25.206449508666992, |
|
"logps/rejected": -49.792205810546875, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6629531383514404, |
|
"rewards/margins": 7.332546234130859, |
|
"rewards/rejected": -7.995500087738037, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 5.4576271186440675, |
|
"grad_norm": 1.619909996410463, |
|
"learning_rate": 1.3915102416725286e-07, |
|
"logits/chosen": -0.43633776903152466, |
|
"logits/rejected": -0.4285232424736023, |
|
"logps/chosen": -21.11972427368164, |
|
"logps/rejected": -50.980804443359375, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.572965145111084, |
|
"rewards/margins": 6.161455154418945, |
|
"rewards/rejected": -6.734420299530029, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 5.47457627118644, |
|
"grad_norm": 2.1885009314879538, |
|
"learning_rate": 1.3749376915816885e-07, |
|
"logits/chosen": -0.21762433648109436, |
|
"logits/rejected": -0.196787029504776, |
|
"logps/chosen": -35.26130676269531, |
|
"logps/rejected": -56.66743087768555, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8203563690185547, |
|
"rewards/margins": 7.474347114562988, |
|
"rewards/rejected": -9.294703483581543, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 5.491525423728813, |
|
"grad_norm": 1.9365005337913619, |
|
"learning_rate": 1.3584269065157172e-07, |
|
"logits/chosen": -0.27862459421157837, |
|
"logits/rejected": -0.2110404521226883, |
|
"logps/chosen": -35.968971252441406, |
|
"logps/rejected": -56.928218841552734, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3680837154388428, |
|
"rewards/margins": 6.986885070800781, |
|
"rewards/rejected": -8.354969024658203, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 5.508474576271187, |
|
"grad_norm": 1.3422635792385325, |
|
"learning_rate": 1.341978792903568e-07, |
|
"logits/chosen": -0.30388015508651733, |
|
"logits/rejected": -0.2732846736907959, |
|
"logps/chosen": -23.869976043701172, |
|
"logps/rejected": -52.69965362548828, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31776559352874756, |
|
"rewards/margins": 8.570083618164062, |
|
"rewards/rejected": -8.887847900390625, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 5.52542372881356, |
|
"grad_norm": 2.3813246068255487, |
|
"learning_rate": 1.3255942537335804e-07, |
|
"logits/chosen": -0.33688196539878845, |
|
"logits/rejected": -0.35382434725761414, |
|
"logps/chosen": -28.49911117553711, |
|
"logps/rejected": -51.572757720947266, |
|
"loss": 0.0228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8108580112457275, |
|
"rewards/margins": 6.868773937225342, |
|
"rewards/rejected": -7.67963171005249, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 5.5423728813559325, |
|
"grad_norm": 1.4536548361254025, |
|
"learning_rate": 1.3092741885039085e-07, |
|
"logits/chosen": -0.2705250084400177, |
|
"logits/rejected": -0.2894834876060486, |
|
"logps/chosen": -27.802425384521484, |
|
"logps/rejected": -66.8345718383789, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.008652925491333, |
|
"rewards/margins": 8.34438419342041, |
|
"rewards/rejected": -9.35303783416748, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 5.559322033898305, |
|
"grad_norm": 2.049468728531298, |
|
"learning_rate": 1.2930194931731382e-07, |
|
"logits/chosen": -0.36835363507270813, |
|
"logits/rejected": -0.3584752380847931, |
|
"logps/chosen": -20.919490814208984, |
|
"logps/rejected": -39.850074768066406, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.48556971549987793, |
|
"rewards/margins": 6.541074752807617, |
|
"rewards/rejected": -7.026644229888916, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 5.576271186440678, |
|
"grad_norm": 1.5752341980459406, |
|
"learning_rate": 1.2768310601110993e-07, |
|
"logits/chosen": -0.4180675148963928, |
|
"logits/rejected": -0.4410182535648346, |
|
"logps/chosen": -25.881986618041992, |
|
"logps/rejected": -69.80418395996094, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.903774082660675, |
|
"rewards/margins": 9.744110107421875, |
|
"rewards/rejected": -10.647883415222168, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 5.593220338983051, |
|
"grad_norm": 1.7653806800087801, |
|
"learning_rate": 1.260709778049877e-07, |
|
"logits/chosen": -0.29894641041755676, |
|
"logits/rejected": -0.300833523273468, |
|
"logps/chosen": -26.03153419494629, |
|
"logps/rejected": -47.6400260925293, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7818778157234192, |
|
"rewards/margins": 6.886005878448486, |
|
"rewards/rejected": -7.667883396148682, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.610169491525424, |
|
"grad_norm": 1.4297534909157374, |
|
"learning_rate": 1.2446565320350182e-07, |
|
"logits/chosen": -0.3907126188278198, |
|
"logits/rejected": -0.37021511793136597, |
|
"logps/chosen": -21.903635025024414, |
|
"logps/rejected": -48.463523864746094, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.698778510093689, |
|
"rewards/margins": 6.993403434753418, |
|
"rewards/rejected": -7.692181587219238, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 5.627118644067797, |
|
"grad_norm": 1.9822675391192361, |
|
"learning_rate": 1.2286722033769492e-07, |
|
"logits/chosen": -0.4067448675632477, |
|
"logits/rejected": -0.3597560524940491, |
|
"logps/chosen": -27.199350357055664, |
|
"logps/rejected": -53.19655227661133, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44194698333740234, |
|
"rewards/margins": 7.5092453956604, |
|
"rewards/rejected": -7.9511919021606445, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 5.6440677966101696, |
|
"grad_norm": 1.6861714590542527, |
|
"learning_rate": 1.2127576696025826e-07, |
|
"logits/chosen": -0.38976797461509705, |
|
"logits/rejected": -0.3696633577346802, |
|
"logps/chosen": -30.58667755126953, |
|
"logps/rejected": -71.94692993164062, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8064645528793335, |
|
"rewards/margins": 10.191534042358398, |
|
"rewards/rejected": -10.99799919128418, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 5.661016949152542, |
|
"grad_norm": 2.050031332323167, |
|
"learning_rate": 1.19691380440715e-07, |
|
"logits/chosen": -0.3898102045059204, |
|
"logits/rejected": -0.37484288215637207, |
|
"logps/chosen": -28.617263793945312, |
|
"logps/rejected": -48.9388427734375, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1024081707000732, |
|
"rewards/margins": 5.788519382476807, |
|
"rewards/rejected": -6.890927314758301, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 5.677966101694915, |
|
"grad_norm": 1.4885965824230383, |
|
"learning_rate": 1.1811414776062365e-07, |
|
"logits/chosen": -0.21893128752708435, |
|
"logits/rejected": -0.17550604045391083, |
|
"logps/chosen": -31.78797149658203, |
|
"logps/rejected": -53.7734260559082, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8101913928985596, |
|
"rewards/margins": 7.902094841003418, |
|
"rewards/rejected": -8.712285995483398, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 5.694915254237288, |
|
"grad_norm": 1.8676903528380577, |
|
"learning_rate": 1.1654415550880242e-07, |
|
"logits/chosen": -0.4299631118774414, |
|
"logits/rejected": -0.4651949405670166, |
|
"logps/chosen": -24.422216415405273, |
|
"logps/rejected": -49.618309020996094, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24530749022960663, |
|
"rewards/margins": 8.013923645019531, |
|
"rewards/rejected": -8.259231567382812, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 5.711864406779661, |
|
"grad_norm": 2.0067876768226243, |
|
"learning_rate": 1.1498148987657549e-07, |
|
"logits/chosen": -0.290162056684494, |
|
"logits/rejected": -0.2921581566333771, |
|
"logps/chosen": -29.040874481201172, |
|
"logps/rejected": -60.61643981933594, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3281574249267578, |
|
"rewards/margins": 8.345926284790039, |
|
"rewards/rejected": -9.674084663391113, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 5.728813559322034, |
|
"grad_norm": 1.6926593631605538, |
|
"learning_rate": 1.1342623665304207e-07, |
|
"logits/chosen": -0.39946579933166504, |
|
"logits/rejected": -0.3756706416606903, |
|
"logps/chosen": -26.8501033782959, |
|
"logps/rejected": -57.20337677001953, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4086235761642456, |
|
"rewards/margins": 7.640737056732178, |
|
"rewards/rejected": -9.049360275268555, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 5.745762711864407, |
|
"grad_norm": 1.933017411699114, |
|
"learning_rate": 1.1187848122036562e-07, |
|
"logits/chosen": -0.38379529118537903, |
|
"logits/rejected": -0.35069793462753296, |
|
"logps/chosen": -27.219024658203125, |
|
"logps/rejected": -45.80855941772461, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0740251541137695, |
|
"rewards/margins": 6.810902118682861, |
|
"rewards/rejected": -7.884926795959473, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 5.762711864406779, |
|
"grad_norm": 2.68284847566437, |
|
"learning_rate": 1.1033830854908691e-07, |
|
"logits/chosen": -0.463611364364624, |
|
"logits/rejected": -0.46968621015548706, |
|
"logps/chosen": -23.01727294921875, |
|
"logps/rejected": -51.12052917480469, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40349888801574707, |
|
"rewards/margins": 7.609687328338623, |
|
"rewards/rejected": -8.013185501098633, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.779661016949152, |
|
"grad_norm": 1.830575531381985, |
|
"learning_rate": 1.0880580319345919e-07, |
|
"logits/chosen": -0.4400818645954132, |
|
"logits/rejected": -0.36093467473983765, |
|
"logps/chosen": -29.340173721313477, |
|
"logps/rejected": -51.93349075317383, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12024500221014023, |
|
"rewards/margins": 8.31672191619873, |
|
"rewards/rejected": -8.436967849731445, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 5.796610169491525, |
|
"grad_norm": 2.1072568448760323, |
|
"learning_rate": 1.0728104928680623e-07, |
|
"logits/chosen": -0.4102250039577484, |
|
"logits/rejected": -0.40293923020362854, |
|
"logps/chosen": -24.35076904296875, |
|
"logps/rejected": -49.26411437988281, |
|
"loss": 0.018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5383625030517578, |
|
"rewards/margins": 6.970728874206543, |
|
"rewards/rejected": -8.5090913772583, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 5.813559322033898, |
|
"grad_norm": 1.5832557378178098, |
|
"learning_rate": 1.0576413053690326e-07, |
|
"logits/chosen": -0.3550926446914673, |
|
"logits/rejected": -0.33369180560112, |
|
"logps/chosen": -23.023447036743164, |
|
"logps/rejected": -51.34148406982422, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41946250200271606, |
|
"rewards/margins": 8.600196838378906, |
|
"rewards/rejected": -9.019659042358398, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 5.830508474576272, |
|
"grad_norm": 1.2740745062843633, |
|
"learning_rate": 1.0425513022138202e-07, |
|
"logits/chosen": -0.44471290707588196, |
|
"logits/rejected": -0.45575839281082153, |
|
"logps/chosen": -30.049896240234375, |
|
"logps/rejected": -62.054786682128906, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.082101583480835, |
|
"rewards/margins": 8.661722183227539, |
|
"rewards/rejected": -9.743824005126953, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 5.847457627118644, |
|
"grad_norm": 1.8801561548243628, |
|
"learning_rate": 1.0275413118315798e-07, |
|
"logits/chosen": -0.4198082387447357, |
|
"logits/rejected": -0.4343384802341461, |
|
"logps/chosen": -26.124162673950195, |
|
"logps/rejected": -49.45093536376953, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23411789536476135, |
|
"rewards/margins": 7.040526390075684, |
|
"rewards/rejected": -7.27464485168457, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.864406779661017, |
|
"grad_norm": 2.125582956895436, |
|
"learning_rate": 1.0126121582588315e-07, |
|
"logits/chosen": -0.42699775099754333, |
|
"logits/rejected": -0.332169771194458, |
|
"logps/chosen": -42.01930618286133, |
|
"logps/rejected": -51.770362854003906, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4565836191177368, |
|
"rewards/margins": 6.331849098205566, |
|
"rewards/rejected": -7.788432598114014, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 5.88135593220339, |
|
"grad_norm": 1.7338911737005034, |
|
"learning_rate": 9.977646610942201e-08, |
|
"logits/chosen": -0.46750593185424805, |
|
"logits/rejected": -0.4310920536518097, |
|
"logps/chosen": -34.33905792236328, |
|
"logps/rejected": -55.792449951171875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1425302028656006, |
|
"rewards/margins": 6.989797592163086, |
|
"rewards/rejected": -8.13232707977295, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 5.898305084745763, |
|
"grad_norm": 2.124936709443199, |
|
"learning_rate": 9.829996354535172e-08, |
|
"logits/chosen": -0.19952382147312164, |
|
"logits/rejected": -0.21836933493614197, |
|
"logps/chosen": -19.52752685546875, |
|
"logps/rejected": -51.54100799560547, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42098936438560486, |
|
"rewards/margins": 7.090313911437988, |
|
"rewards/rejected": -7.511303424835205, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 5.915254237288136, |
|
"grad_norm": 1.936391711504055, |
|
"learning_rate": 9.68317891924871e-08, |
|
"logits/chosen": -0.37741342186927795, |
|
"logits/rejected": -0.3150150179862976, |
|
"logps/chosen": -33.96430587768555, |
|
"logps/rejected": -58.169151306152344, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8618389368057251, |
|
"rewards/margins": 7.058804988861084, |
|
"rewards/rejected": -7.9206438064575195, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 5.932203389830509, |
|
"grad_norm": 1.8102577538281432, |
|
"learning_rate": 9.53720236524313e-08, |
|
"logits/chosen": -0.3949698805809021, |
|
"logits/rejected": -0.29366767406463623, |
|
"logps/chosen": -37.89696502685547, |
|
"logps/rejected": -47.563255310058594, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.420377641916275, |
|
"rewards/margins": 6.494236946105957, |
|
"rewards/rejected": -6.914615631103516, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.9491525423728815, |
|
"grad_norm": 1.5979190927318805, |
|
"learning_rate": 9.392074706515002e-08, |
|
"logits/chosen": -0.2729552388191223, |
|
"logits/rejected": -0.28463542461395264, |
|
"logps/chosen": -28.566404342651367, |
|
"logps/rejected": -56.13119125366211, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8524938821792603, |
|
"rewards/margins": 7.293689727783203, |
|
"rewards/rejected": -8.146183967590332, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 5.966101694915254, |
|
"grad_norm": 2.3839776379838384, |
|
"learning_rate": 9.247803910457225e-08, |
|
"logits/chosen": -0.3895640969276428, |
|
"logits/rejected": -0.3975210189819336, |
|
"logps/chosen": -26.325103759765625, |
|
"logps/rejected": -53.08610534667969, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3783128261566162, |
|
"rewards/margins": 7.456340789794922, |
|
"rewards/rejected": -8.834653854370117, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 5.983050847457627, |
|
"grad_norm": 1.2920698746602828, |
|
"learning_rate": 9.104397897421623e-08, |
|
"logits/chosen": -0.32404041290283203, |
|
"logits/rejected": -0.27217093110084534, |
|
"logps/chosen": -25.754863739013672, |
|
"logps/rejected": -63.25767135620117, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3308719396591187, |
|
"rewards/margins": 8.711791038513184, |
|
"rewards/rejected": -10.04266357421875, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.282532794116693, |
|
"learning_rate": 8.961864540284119e-08, |
|
"logits/chosen": -0.49952465295791626, |
|
"logits/rejected": -0.5269231796264648, |
|
"logps/chosen": -22.62492561340332, |
|
"logps/rejected": -51.047019958496094, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5334063172340393, |
|
"rewards/margins": 8.3474702835083, |
|
"rewards/rejected": -8.880876541137695, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 6.016949152542373, |
|
"grad_norm": 1.7112403823625462, |
|
"learning_rate": 8.82021166401253e-08, |
|
"logits/chosen": -0.3233092129230499, |
|
"logits/rejected": -0.2621540427207947, |
|
"logps/chosen": -44.06984329223633, |
|
"logps/rejected": -58.04002380371094, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.143134355545044, |
|
"rewards/margins": 6.095809459686279, |
|
"rewards/rejected": -8.238943099975586, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 6.033898305084746, |
|
"grad_norm": 2.3344092696583947, |
|
"learning_rate": 8.679447045236962e-08, |
|
"logits/chosen": -0.3654767572879791, |
|
"logits/rejected": -0.3644530773162842, |
|
"logps/chosen": -20.51791763305664, |
|
"logps/rejected": -45.79965591430664, |
|
"loss": 0.017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5996273756027222, |
|
"rewards/margins": 7.613353729248047, |
|
"rewards/rejected": -8.212981224060059, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 6.0508474576271185, |
|
"grad_norm": 2.317485347300773, |
|
"learning_rate": 8.539578411822901e-08, |
|
"logits/chosen": -0.3773816227912903, |
|
"logits/rejected": -0.3972689211368561, |
|
"logps/chosen": -30.56630516052246, |
|
"logps/rejected": -50.376220703125, |
|
"loss": 0.0171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9383816719055176, |
|
"rewards/margins": 6.174047946929932, |
|
"rewards/rejected": -7.112429618835449, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 6.067796610169491, |
|
"grad_norm": 1.4474880701357473, |
|
"learning_rate": 8.400613442446947e-08, |
|
"logits/chosen": -0.5112478733062744, |
|
"logits/rejected": -0.4722178876399994, |
|
"logps/chosen": -27.13446807861328, |
|
"logps/rejected": -52.48398208618164, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3206241130828857, |
|
"rewards/margins": 7.3138251304626465, |
|
"rewards/rejected": -8.63444995880127, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 6.084745762711864, |
|
"grad_norm": 1.2482543871299383, |
|
"learning_rate": 8.262559766175253e-08, |
|
"logits/chosen": -0.37037163972854614, |
|
"logits/rejected": -0.40053224563598633, |
|
"logps/chosen": -26.4130802154541, |
|
"logps/rejected": -58.55597686767578, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.92750483751297, |
|
"rewards/margins": 8.936185836791992, |
|
"rewards/rejected": -9.863691329956055, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 6.101694915254237, |
|
"grad_norm": 1.3326694254649336, |
|
"learning_rate": 8.125424962044741e-08, |
|
"logits/chosen": -0.4082280993461609, |
|
"logits/rejected": -0.3952917158603668, |
|
"logps/chosen": -31.45101547241211, |
|
"logps/rejected": -57.48428726196289, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.774019479751587, |
|
"rewards/margins": 7.355801105499268, |
|
"rewards/rejected": -9.129819869995117, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.11864406779661, |
|
"grad_norm": 1.7964966769625663, |
|
"learning_rate": 7.989216558646941e-08, |
|
"logits/chosen": -0.37784266471862793, |
|
"logits/rejected": -0.3356171250343323, |
|
"logps/chosen": -33.39372634887695, |
|
"logps/rejected": -53.96965789794922, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1166173219680786, |
|
"rewards/margins": 7.514166831970215, |
|
"rewards/rejected": -8.630784034729004, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 6.135593220338983, |
|
"grad_norm": 1.4381980147201805, |
|
"learning_rate": 7.853942033714736e-08, |
|
"logits/chosen": -0.33557164669036865, |
|
"logits/rejected": -0.3193064332008362, |
|
"logps/chosen": -37.53783416748047, |
|
"logps/rejected": -60.92087173461914, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.018031120300293, |
|
"rewards/margins": 7.809800148010254, |
|
"rewards/rejected": -8.827831268310547, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 6.1525423728813555, |
|
"grad_norm": 1.7983591719289653, |
|
"learning_rate": 7.719608813711847e-08, |
|
"logits/chosen": -0.39093196392059326, |
|
"logits/rejected": -0.37135645747184753, |
|
"logps/chosen": -25.3659725189209, |
|
"logps/rejected": -43.70526123046875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.914777398109436, |
|
"rewards/margins": 6.569075584411621, |
|
"rewards/rejected": -7.483852863311768, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 6.169491525423728, |
|
"grad_norm": 2.0553124333196475, |
|
"learning_rate": 7.586224273425081e-08, |
|
"logits/chosen": -0.43935853242874146, |
|
"logits/rejected": -0.39239639043807983, |
|
"logps/chosen": -31.48431396484375, |
|
"logps/rejected": -54.33441162109375, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8172799944877625, |
|
"rewards/margins": 7.351185321807861, |
|
"rewards/rejected": -8.168466567993164, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 6.186440677966102, |
|
"grad_norm": 1.5277879793345497, |
|
"learning_rate": 7.45379573555947e-08, |
|
"logits/chosen": -0.35419967770576477, |
|
"logits/rejected": -0.29741495847702026, |
|
"logps/chosen": -32.288909912109375, |
|
"logps/rejected": -48.68520736694336, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7244929671287537, |
|
"rewards/margins": 6.385775089263916, |
|
"rewards/rejected": -7.110268592834473, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 6.203389830508475, |
|
"grad_norm": 1.2587042351574373, |
|
"learning_rate": 7.322330470336313e-08, |
|
"logits/chosen": -0.3986334502696991, |
|
"logits/rejected": -0.41473451256752014, |
|
"logps/chosen": -28.10173225402832, |
|
"logps/rejected": -60.30015182495117, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9395461082458496, |
|
"rewards/margins": 8.40530014038086, |
|
"rewards/rejected": -9.34484577178955, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 6.220338983050848, |
|
"grad_norm": 1.9104878326976753, |
|
"learning_rate": 7.19183569509398e-08, |
|
"logits/chosen": -0.42515650391578674, |
|
"logits/rejected": -0.4083452820777893, |
|
"logps/chosen": -25.77292251586914, |
|
"logps/rejected": -43.442447662353516, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8836389183998108, |
|
"rewards/margins": 6.8581342697143555, |
|
"rewards/rejected": -7.74177360534668, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 6.237288135593221, |
|
"grad_norm": 1.6423168042541676, |
|
"learning_rate": 7.062318573891715e-08, |
|
"logits/chosen": -0.27111876010894775, |
|
"logits/rejected": -0.22774375975131989, |
|
"logps/chosen": -25.00893783569336, |
|
"logps/rejected": -51.19509506225586, |
|
"loss": 0.0165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6221886873245239, |
|
"rewards/margins": 7.797216892242432, |
|
"rewards/rejected": -8.419405937194824, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 6.254237288135593, |
|
"grad_norm": 1.9302693331546565, |
|
"learning_rate": 6.933786217116364e-08, |
|
"logits/chosen": -0.3160867691040039, |
|
"logits/rejected": -0.2463129460811615, |
|
"logps/chosen": -24.83222198486328, |
|
"logps/rejected": -45.60934066772461, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16461661458015442, |
|
"rewards/margins": 6.204172134399414, |
|
"rewards/rejected": -6.368788242340088, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 6.271186440677966, |
|
"grad_norm": 1.5842681388350077, |
|
"learning_rate": 6.806245681091944e-08, |
|
"logits/chosen": -0.3545396029949188, |
|
"logits/rejected": -0.25907883048057556, |
|
"logps/chosen": -28.055213928222656, |
|
"logps/rejected": -56.62708282470703, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.246912956237793, |
|
"rewards/margins": 8.316703796386719, |
|
"rewards/rejected": -9.563617706298828, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.288135593220339, |
|
"grad_norm": 1.8111378961230746, |
|
"learning_rate": 6.679703967692321e-08, |
|
"logits/chosen": -0.21147161722183228, |
|
"logits/rejected": -0.20179268717765808, |
|
"logps/chosen": -23.622386932373047, |
|
"logps/rejected": -56.40178680419922, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9630917310714722, |
|
"rewards/margins": 7.12011194229126, |
|
"rewards/rejected": -8.08320426940918, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 6.305084745762712, |
|
"grad_norm": 1.1986230098938282, |
|
"learning_rate": 6.554168023956816e-08, |
|
"logits/chosen": -0.2591314911842346, |
|
"logits/rejected": -0.27817869186401367, |
|
"logps/chosen": -27.67983627319336, |
|
"logps/rejected": -50.832889556884766, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1853872537612915, |
|
"rewards/margins": 6.63606071472168, |
|
"rewards/rejected": -7.82144832611084, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 6.322033898305085, |
|
"grad_norm": 1.7049379706268657, |
|
"learning_rate": 6.429644741708779e-08, |
|
"logits/chosen": -0.4500387907028198, |
|
"logits/rejected": -0.36974358558654785, |
|
"logps/chosen": -23.562002182006836, |
|
"logps/rejected": -42.636146545410156, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6414427757263184, |
|
"rewards/margins": 6.672116756439209, |
|
"rewards/rejected": -7.313559532165527, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 6.338983050847458, |
|
"grad_norm": 1.359337092287595, |
|
"learning_rate": 6.306140957177225e-08, |
|
"logits/chosen": -0.3460231125354767, |
|
"logits/rejected": -0.3752771317958832, |
|
"logps/chosen": -25.647207260131836, |
|
"logps/rejected": -51.0827522277832, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7650890350341797, |
|
"rewards/margins": 6.918083667755127, |
|
"rewards/rejected": -7.683172702789307, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 6.3559322033898304, |
|
"grad_norm": 1.534380633126308, |
|
"learning_rate": 6.183663450621607e-08, |
|
"logits/chosen": -0.34895992279052734, |
|
"logits/rejected": -0.3292369842529297, |
|
"logps/chosen": -34.13381576538086, |
|
"logps/rejected": -55.714393615722656, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8866183757781982, |
|
"rewards/margins": 7.842109203338623, |
|
"rewards/rejected": -8.728727340698242, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.372881355932203, |
|
"grad_norm": 1.5779257981471628, |
|
"learning_rate": 6.062218945959496e-08, |
|
"logits/chosen": -0.4587939977645874, |
|
"logits/rejected": -0.4462360143661499, |
|
"logps/chosen": -33.18772888183594, |
|
"logps/rejected": -50.75407028198242, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7835342288017273, |
|
"rewards/margins": 7.535502910614014, |
|
"rewards/rejected": -8.319037437438965, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 6.389830508474576, |
|
"grad_norm": 1.4524083267309678, |
|
"learning_rate": 5.9418141103975026e-08, |
|
"logits/chosen": -0.3016980290412903, |
|
"logits/rejected": -0.3382137417793274, |
|
"logps/chosen": -28.424884796142578, |
|
"logps/rejected": -67.0705337524414, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.947837233543396, |
|
"rewards/margins": 10.247238159179688, |
|
"rewards/rejected": -11.195074081420898, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 6.406779661016949, |
|
"grad_norm": 1.5502969744773236, |
|
"learning_rate": 5.822455554065217e-08, |
|
"logits/chosen": -0.22019946575164795, |
|
"logits/rejected": -0.18748457729816437, |
|
"logps/chosen": -21.47047996520996, |
|
"logps/rejected": -43.39370346069336, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4421558678150177, |
|
"rewards/margins": 6.153988361358643, |
|
"rewards/rejected": -6.59614372253418, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 6.423728813559322, |
|
"grad_norm": 1.4169584989322257, |
|
"learning_rate": 5.704149829652341e-08, |
|
"logits/chosen": -0.45599544048309326, |
|
"logits/rejected": -0.38583889603614807, |
|
"logps/chosen": -30.23516082763672, |
|
"logps/rejected": -60.14368438720703, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9913487434387207, |
|
"rewards/margins": 8.080925941467285, |
|
"rewards/rejected": -9.072275161743164, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 6.440677966101695, |
|
"grad_norm": 1.9051661634723038, |
|
"learning_rate": 5.586903432048942e-08, |
|
"logits/chosen": -0.49622446298599243, |
|
"logits/rejected": -0.4194895625114441, |
|
"logps/chosen": -29.30303192138672, |
|
"logps/rejected": -59.797874450683594, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.547768473625183, |
|
"rewards/margins": 8.939881324768066, |
|
"rewards/rejected": -10.487649917602539, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.4576271186440675, |
|
"grad_norm": 1.6431936228958974, |
|
"learning_rate": 5.470722797988883e-08, |
|
"logits/chosen": -0.2737140953540802, |
|
"logits/rejected": -0.27121812105178833, |
|
"logps/chosen": -25.59175682067871, |
|
"logps/rejected": -46.32392120361328, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4059231281280518, |
|
"rewards/margins": 6.439781188964844, |
|
"rewards/rejected": -7.845704078674316, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 6.47457627118644, |
|
"grad_norm": 1.3079903093761553, |
|
"learning_rate": 5.355614305696468e-08, |
|
"logits/chosen": -0.3695864677429199, |
|
"logits/rejected": -0.30790218710899353, |
|
"logps/chosen": -27.365276336669922, |
|
"logps/rejected": -51.38225555419922, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4411066770553589, |
|
"rewards/margins": 7.909936904907227, |
|
"rewards/rejected": -8.351043701171875, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 6.491525423728813, |
|
"grad_norm": 1.5133700851337937, |
|
"learning_rate": 5.241584274536259e-08, |
|
"logits/chosen": -0.298088937997818, |
|
"logits/rejected": -0.267940878868103, |
|
"logps/chosen": -28.913124084472656, |
|
"logps/rejected": -59.5203971862793, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8007093667984009, |
|
"rewards/margins": 8.938852310180664, |
|
"rewards/rejected": -9.739561080932617, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 6.508474576271187, |
|
"grad_norm": 1.405914234766755, |
|
"learning_rate": 5.1286389646661654e-08, |
|
"logits/chosen": -0.2601643204689026, |
|
"logits/rejected": -0.2083461433649063, |
|
"logps/chosen": -28.819889068603516, |
|
"logps/rejected": -52.013458251953125, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2809916734695435, |
|
"rewards/margins": 7.361697196960449, |
|
"rewards/rejected": -8.642688751220703, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 6.52542372881356, |
|
"grad_norm": 1.4580353927814265, |
|
"learning_rate": 5.0167845766937806e-08, |
|
"logits/chosen": -0.4725567400455475, |
|
"logits/rejected": -0.4601272642612457, |
|
"logps/chosen": -29.988752365112305, |
|
"logps/rejected": -49.545658111572266, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3534915447235107, |
|
"rewards/margins": 6.975593090057373, |
|
"rewards/rejected": -8.329084396362305, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 6.5423728813559325, |
|
"grad_norm": 1.6115119401528328, |
|
"learning_rate": 4.906027251335917e-08, |
|
"logits/chosen": -0.3040216565132141, |
|
"logits/rejected": -0.2666282653808594, |
|
"logps/chosen": -23.24091148376465, |
|
"logps/rejected": -58.34555435180664, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.226002812385559, |
|
"rewards/margins": 8.7108154296875, |
|
"rewards/rejected": -9.936819076538086, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 6.559322033898305, |
|
"grad_norm": 1.9607341782533316, |
|
"learning_rate": 4.7963730690815467e-08, |
|
"logits/chosen": -0.3676231801509857, |
|
"logits/rejected": -0.3557916581630707, |
|
"logps/chosen": -15.970260620117188, |
|
"logps/rejected": -43.20943069458008, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12866909801959991, |
|
"rewards/margins": 7.353845119476318, |
|
"rewards/rejected": -7.2251763343811035, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 6.576271186440678, |
|
"grad_norm": 2.247467950803516, |
|
"learning_rate": 4.687828049857967e-08, |
|
"logits/chosen": -0.40337732434272766, |
|
"logits/rejected": -0.3796375095844269, |
|
"logps/chosen": -29.360713958740234, |
|
"logps/rejected": -45.309486389160156, |
|
"loss": 0.0243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8816546201705933, |
|
"rewards/margins": 6.715970516204834, |
|
"rewards/rejected": -7.597624778747559, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 6.593220338983051, |
|
"grad_norm": 1.5736622891761218, |
|
"learning_rate": 4.580398152700304e-08, |
|
"logits/chosen": -0.41009533405303955, |
|
"logits/rejected": -0.445589154958725, |
|
"logps/chosen": -24.89777374267578, |
|
"logps/rejected": -54.37321853637695, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.717568576335907, |
|
"rewards/margins": 8.025527000427246, |
|
"rewards/rejected": -8.743096351623535, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 6.610169491525424, |
|
"grad_norm": 1.8210132983277654, |
|
"learning_rate": 4.47408927542435e-08, |
|
"logits/chosen": -0.23610210418701172, |
|
"logits/rejected": -0.2229936569929123, |
|
"logps/chosen": -23.190319061279297, |
|
"logps/rejected": -47.796974182128906, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9015488028526306, |
|
"rewards/margins": 6.892556667327881, |
|
"rewards/rejected": -7.794105052947998, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.627118644067797, |
|
"grad_norm": 1.8113881906369103, |
|
"learning_rate": 4.368907254302837e-08, |
|
"logits/chosen": -0.4094342589378357, |
|
"logits/rejected": -0.4127545654773712, |
|
"logps/chosen": -18.310993194580078, |
|
"logps/rejected": -50.38030242919922, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3419753909111023, |
|
"rewards/margins": 8.148374557495117, |
|
"rewards/rejected": -8.490348815917969, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 6.6440677966101696, |
|
"grad_norm": 1.2223010613658718, |
|
"learning_rate": 4.264857863744956e-08, |
|
"logits/chosen": -0.3197595477104187, |
|
"logits/rejected": -0.2664377689361572, |
|
"logps/chosen": -22.68889617919922, |
|
"logps/rejected": -50.56337356567383, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.839199960231781, |
|
"rewards/margins": 8.539407730102539, |
|
"rewards/rejected": -9.378606796264648, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 6.661016949152542, |
|
"grad_norm": 1.712303748336679, |
|
"learning_rate": 4.161946815979403e-08, |
|
"logits/chosen": -0.34751880168914795, |
|
"logits/rejected": -0.32943466305732727, |
|
"logps/chosen": -31.06998062133789, |
|
"logps/rejected": -54.03396987915039, |
|
"loss": 0.0127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07644249498844147, |
|
"rewards/margins": 7.472164154052734, |
|
"rewards/rejected": -7.5486063957214355, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 6.677966101694915, |
|
"grad_norm": 1.5445735625557495, |
|
"learning_rate": 4.0601797607407505e-08, |
|
"logits/chosen": -0.43604975938796997, |
|
"logits/rejected": -0.44707322120666504, |
|
"logps/chosen": -24.34992218017578, |
|
"logps/rejected": -48.38789749145508, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0394837856292725, |
|
"rewards/margins": 6.550841331481934, |
|
"rewards/rejected": -7.590324878692627, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 6.694915254237288, |
|
"grad_norm": 1.3515811447555408, |
|
"learning_rate": 3.9595622849593e-08, |
|
"logits/chosen": -0.48450133204460144, |
|
"logits/rejected": -0.425273060798645, |
|
"logps/chosen": -24.3756046295166, |
|
"logps/rejected": -54.65086364746094, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8011985421180725, |
|
"rewards/margins": 8.252460479736328, |
|
"rewards/rejected": -9.053659439086914, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 6.711864406779661, |
|
"grad_norm": 1.82157364531828, |
|
"learning_rate": 3.8600999124543455e-08, |
|
"logits/chosen": -0.43978190422058105, |
|
"logits/rejected": -0.3958742618560791, |
|
"logps/chosen": -23.50148582458496, |
|
"logps/rejected": -49.621158599853516, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3347681760787964, |
|
"rewards/margins": 7.130214214324951, |
|
"rewards/rejected": -7.464982032775879, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 6.728813559322034, |
|
"grad_norm": 1.376347630528171, |
|
"learning_rate": 3.7617981036309533e-08, |
|
"logits/chosen": -0.44994401931762695, |
|
"logits/rejected": -0.4660834074020386, |
|
"logps/chosen": -22.31543731689453, |
|
"logps/rejected": -48.74754333496094, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6207740306854248, |
|
"rewards/margins": 7.773540019989014, |
|
"rewards/rejected": -8.39431381225586, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 6.745762711864407, |
|
"grad_norm": 1.6630761397695306, |
|
"learning_rate": 3.664662255180134e-08, |
|
"logits/chosen": -0.2478867769241333, |
|
"logits/rejected": -0.2218003273010254, |
|
"logps/chosen": -27.264450073242188, |
|
"logps/rejected": -48.04404830932617, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0151184797286987, |
|
"rewards/margins": 5.991296768188477, |
|
"rewards/rejected": -7.006415367126465, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 6.762711864406779, |
|
"grad_norm": 1.7823746972379073, |
|
"learning_rate": 3.5686976997826245e-08, |
|
"logits/chosen": -0.4420131742954254, |
|
"logits/rejected": -0.4338444769382477, |
|
"logps/chosen": -37.46350860595703, |
|
"logps/rejected": -59.96247100830078, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.341228723526001, |
|
"rewards/margins": 7.825350761413574, |
|
"rewards/rejected": -9.166579246520996, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 1.8399624391407163, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -0.31618526577949524, |
|
"logits/rejected": -0.2617036700248718, |
|
"logps/chosen": -35.39426803588867, |
|
"logps/rejected": -58.3309326171875, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.725992202758789, |
|
"rewards/margins": 8.16711711883545, |
|
"rewards/rejected": -9.893108367919922, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.796610169491525, |
|
"grad_norm": 1.3821869838967202, |
|
"learning_rate": 3.3803034770659824e-08, |
|
"logits/chosen": -0.43311774730682373, |
|
"logits/rejected": -0.400162935256958, |
|
"logps/chosen": -36.61433029174805, |
|
"logps/rejected": -77.34138488769531, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.166048526763916, |
|
"rewards/margins": 9.511601448059082, |
|
"rewards/rejected": -10.677648544311523, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 6.813559322033898, |
|
"grad_norm": 1.5978002425036417, |
|
"learning_rate": 3.287884152439646e-08, |
|
"logits/chosen": -0.2973329722881317, |
|
"logits/rejected": -0.27377772331237793, |
|
"logps/chosen": -30.09129524230957, |
|
"logps/rejected": -53.16349792480469, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7582967281341553, |
|
"rewards/margins": 7.514451503753662, |
|
"rewards/rejected": -8.272747993469238, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 6.830508474576272, |
|
"grad_norm": 1.8281621888445494, |
|
"learning_rate": 3.19665680568445e-08, |
|
"logits/chosen": -0.4268870949745178, |
|
"logits/rejected": -0.37249866127967834, |
|
"logps/chosen": -32.59174346923828, |
|
"logps/rejected": -46.462005615234375, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4710607528686523, |
|
"rewards/margins": 5.770994186401367, |
|
"rewards/rejected": -7.242054462432861, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 6.847457627118644, |
|
"grad_norm": 0.9001061616975613, |
|
"learning_rate": 3.106626445109081e-08, |
|
"logits/chosen": -0.37813207507133484, |
|
"logits/rejected": -0.3872162401676178, |
|
"logps/chosen": -30.775136947631836, |
|
"logps/rejected": -61.652565002441406, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1428020000457764, |
|
"rewards/margins": 7.899393081665039, |
|
"rewards/rejected": -9.042195320129395, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 6.864406779661017, |
|
"grad_norm": 1.597346490336906, |
|
"learning_rate": 3.017798013308645e-08, |
|
"logits/chosen": -0.3538016080856323, |
|
"logits/rejected": -0.341571182012558, |
|
"logps/chosen": -31.822330474853516, |
|
"logps/rejected": -50.422752380371094, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8719119429588318, |
|
"rewards/margins": 7.065603256225586, |
|
"rewards/rejected": -7.937515735626221, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.88135593220339, |
|
"grad_norm": 1.810856752512637, |
|
"learning_rate": 2.9301763868933153e-08, |
|
"logits/chosen": -0.4209059178829193, |
|
"logits/rejected": -0.373024046421051, |
|
"logps/chosen": -22.65794563293457, |
|
"logps/rejected": -46.467655181884766, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.554482102394104, |
|
"rewards/margins": 7.356679439544678, |
|
"rewards/rejected": -7.911161422729492, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 6.898305084745763, |
|
"grad_norm": 1.573195940423645, |
|
"learning_rate": 2.843766376220616e-08, |
|
"logits/chosen": -0.48762577772140503, |
|
"logits/rejected": -0.5053017735481262, |
|
"logps/chosen": -26.841205596923828, |
|
"logps/rejected": -52.88998794555664, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3870327472686768, |
|
"rewards/margins": 7.6463212966918945, |
|
"rewards/rejected": -9.033354759216309, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 6.915254237288136, |
|
"grad_norm": 2.0992362165416494, |
|
"learning_rate": 2.7585727251313195e-08, |
|
"logits/chosen": -0.39123690128326416, |
|
"logits/rejected": -0.33466434478759766, |
|
"logps/chosen": -41.52196502685547, |
|
"logps/rejected": -67.35308837890625, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1684083938598633, |
|
"rewards/margins": 7.4681878089904785, |
|
"rewards/rejected": -10.636595726013184, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 6.932203389830509, |
|
"grad_norm": 1.4571552338914753, |
|
"learning_rate": 2.6746001106890377e-08, |
|
"logits/chosen": -0.4723522663116455, |
|
"logits/rejected": -0.4505174458026886, |
|
"logps/chosen": -29.244626998901367, |
|
"logps/rejected": -52.072750091552734, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2372843027114868, |
|
"rewards/margins": 6.951776027679443, |
|
"rewards/rejected": -8.18906021118164, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 6.9491525423728815, |
|
"grad_norm": 1.4632431809951456, |
|
"learning_rate": 2.5918531429234364e-08, |
|
"logits/chosen": -0.34531697630882263, |
|
"logits/rejected": -0.2954227924346924, |
|
"logps/chosen": -26.551538467407227, |
|
"logps/rejected": -64.3398666381836, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1470048427581787, |
|
"rewards/margins": 9.478975296020508, |
|
"rewards/rejected": -10.62597942352295, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.966101694915254, |
|
"grad_norm": 1.4110992084448712, |
|
"learning_rate": 2.5103363645771536e-08, |
|
"logits/chosen": -0.5194912552833557, |
|
"logits/rejected": -0.48006966710090637, |
|
"logps/chosen": -37.35237121582031, |
|
"logps/rejected": -52.63975524902344, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2981250286102295, |
|
"rewards/margins": 6.82828950881958, |
|
"rewards/rejected": -8.126415252685547, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 6.983050847457627, |
|
"grad_norm": 1.4538836011603475, |
|
"learning_rate": 2.4300542508564114e-08, |
|
"logits/chosen": -0.3935295641422272, |
|
"logits/rejected": -0.336182177066803, |
|
"logps/chosen": -25.596094131469727, |
|
"logps/rejected": -51.74674987792969, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5983898639678955, |
|
"rewards/margins": 7.388174057006836, |
|
"rewards/rejected": -7.986563682556152, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.5706199385371322, |
|
"learning_rate": 2.3510112091853357e-08, |
|
"logits/chosen": -0.2152971625328064, |
|
"logits/rejected": -0.1929609775543213, |
|
"logps/chosen": -20.163312911987305, |
|
"logps/rejected": -52.776371002197266, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37090063095092773, |
|
"rewards/margins": 7.480232238769531, |
|
"rewards/rejected": -7.851133346557617, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 7.016949152542373, |
|
"grad_norm": 1.7457089160064294, |
|
"learning_rate": 2.27321157896396e-08, |
|
"logits/chosen": -0.305334210395813, |
|
"logits/rejected": -0.29125475883483887, |
|
"logps/chosen": -27.516210556030273, |
|
"logps/rejected": -57.63336944580078, |
|
"loss": 0.0141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9373904466629028, |
|
"rewards/margins": 8.481929779052734, |
|
"rewards/rejected": -9.419321060180664, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 7.033898305084746, |
|
"grad_norm": 1.5469113812840338, |
|
"learning_rate": 2.1966596313300362e-08, |
|
"logits/chosen": -0.5671955943107605, |
|
"logits/rejected": -0.5605946779251099, |
|
"logps/chosen": -27.200397491455078, |
|
"logps/rejected": -47.673065185546875, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.48190128803253174, |
|
"rewards/margins": 6.851955413818359, |
|
"rewards/rejected": -7.33385705947876, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 7.0508474576271185, |
|
"grad_norm": 1.2508305219444056, |
|
"learning_rate": 2.1213595689245384e-08, |
|
"logits/chosen": -0.3499354422092438, |
|
"logits/rejected": -0.3194410800933838, |
|
"logps/chosen": -24.055994033813477, |
|
"logps/rejected": -44.12571716308594, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39127033948898315, |
|
"rewards/margins": 6.136053085327148, |
|
"rewards/rejected": -6.527322769165039, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 7.067796610169491, |
|
"grad_norm": 1.6178572230856318, |
|
"learning_rate": 2.0473155256609363e-08, |
|
"logits/chosen": -0.4242691993713379, |
|
"logits/rejected": -0.4158502221107483, |
|
"logps/chosen": -27.026355743408203, |
|
"logps/rejected": -50.12807083129883, |
|
"loss": 0.02, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.9110782146453857, |
|
"rewards/margins": 6.59192419052124, |
|
"rewards/rejected": -7.503002643585205, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 7.084745762711864, |
|
"grad_norm": 1.4383810028427624, |
|
"learning_rate": 1.9745315664982277e-08, |
|
"logits/chosen": -0.452391117811203, |
|
"logits/rejected": -0.42704349756240845, |
|
"logps/chosen": -21.179094314575195, |
|
"logps/rejected": -45.48261260986328, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6325095891952515, |
|
"rewards/margins": 7.283842086791992, |
|
"rewards/rejected": -7.916351318359375, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 7.101694915254237, |
|
"grad_norm": 1.4230254989220643, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"logits/chosen": -0.4608815014362335, |
|
"logits/rejected": -0.4329046905040741, |
|
"logps/chosen": -25.20135498046875, |
|
"logps/rejected": -48.06166458129883, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5603317022323608, |
|
"rewards/margins": 6.86769962310791, |
|
"rewards/rejected": -7.428031921386719, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 7.11864406779661, |
|
"grad_norm": 1.7148759769374629, |
|
"learning_rate": 1.8327598142041656e-08, |
|
"logits/chosen": -0.06360499560832977, |
|
"logits/rejected": -0.02670701965689659, |
|
"logps/chosen": -38.62311935424805, |
|
"logps/rejected": -69.75874328613281, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8160368800163269, |
|
"rewards/margins": 8.83346176147461, |
|
"rewards/rejected": -9.649497032165527, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.135593220338983, |
|
"grad_norm": 1.6040424998385574, |
|
"learning_rate": 1.7637798042291125e-08, |
|
"logits/chosen": -0.40952420234680176, |
|
"logits/rejected": -0.4033817648887634, |
|
"logps/chosen": -31.083284378051758, |
|
"logps/rejected": -49.10109329223633, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0806251764297485, |
|
"rewards/margins": 6.7387261390686035, |
|
"rewards/rejected": -7.819350719451904, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 7.1525423728813555, |
|
"grad_norm": 1.7913863319457528, |
|
"learning_rate": 1.696075444240305e-08, |
|
"logits/chosen": -0.3758937120437622, |
|
"logits/rejected": -0.33229541778564453, |
|
"logps/chosen": -22.26805877685547, |
|
"logps/rejected": -49.145355224609375, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9506510496139526, |
|
"rewards/margins": 6.685871601104736, |
|
"rewards/rejected": -7.636523246765137, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 7.169491525423728, |
|
"grad_norm": 1.8102037954527834, |
|
"learning_rate": 1.6296504511531834e-08, |
|
"logits/chosen": -0.43989044427871704, |
|
"logits/rejected": -0.44529837369918823, |
|
"logps/chosen": -27.95105743408203, |
|
"logps/rejected": -56.04852294921875, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9269706010818481, |
|
"rewards/margins": 7.820449352264404, |
|
"rewards/rejected": -8.747420310974121, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 7.186440677966102, |
|
"grad_norm": 1.5701815802054835, |
|
"learning_rate": 1.5645084716469776e-08, |
|
"logits/chosen": -0.4497320353984833, |
|
"logits/rejected": -0.41386500000953674, |
|
"logps/chosen": -33.39286804199219, |
|
"logps/rejected": -56.49354934692383, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3042948246002197, |
|
"rewards/margins": 8.047407150268555, |
|
"rewards/rejected": -9.351702690124512, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 7.203389830508475, |
|
"grad_norm": 1.3727625339462444, |
|
"learning_rate": 1.5006530819644923e-08, |
|
"logits/chosen": -0.2934183180332184, |
|
"logits/rejected": -0.3134685158729553, |
|
"logps/chosen": -31.583393096923828, |
|
"logps/rejected": -53.04698944091797, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4079774618148804, |
|
"rewards/margins": 7.08270263671875, |
|
"rewards/rejected": -8.490680694580078, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 7.220338983050848, |
|
"grad_norm": 1.2271197429060396, |
|
"learning_rate": 1.4380877877157832e-08, |
|
"logits/chosen": -0.3554607629776001, |
|
"logits/rejected": -0.365239679813385, |
|
"logps/chosen": -28.676355361938477, |
|
"logps/rejected": -57.925479888916016, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0384249687194824, |
|
"rewards/margins": 7.682095527648926, |
|
"rewards/rejected": -8.720520973205566, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 7.237288135593221, |
|
"grad_norm": 0.9777815105599793, |
|
"learning_rate": 1.3768160236856674e-08, |
|
"logits/chosen": -0.3703988194465637, |
|
"logits/rejected": -0.3820286691188812, |
|
"logps/chosen": -29.876129150390625, |
|
"logps/rejected": -59.59389114379883, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.152010202407837, |
|
"rewards/margins": 7.627685070037842, |
|
"rewards/rejected": -8.779695510864258, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 7.254237288135593, |
|
"grad_norm": 1.626489536598415, |
|
"learning_rate": 1.316841153645215e-08, |
|
"logits/chosen": -0.4109363853931427, |
|
"logits/rejected": -0.34275108575820923, |
|
"logps/chosen": -29.276466369628906, |
|
"logps/rejected": -53.1593132019043, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.102203369140625, |
|
"rewards/margins": 7.080381870269775, |
|
"rewards/rejected": -8.182584762573242, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 7.271186440677966, |
|
"grad_norm": 1.6657284917403243, |
|
"learning_rate": 1.2581664701670296e-08, |
|
"logits/chosen": -0.44309279322624207, |
|
"logits/rejected": -0.3362104892730713, |
|
"logps/chosen": -29.43478012084961, |
|
"logps/rejected": -52.261634826660156, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8790161609649658, |
|
"rewards/margins": 7.74953031539917, |
|
"rewards/rejected": -9.628546714782715, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 7.288135593220339, |
|
"grad_norm": 1.5026292891085353, |
|
"learning_rate": 1.2007951944445121e-08, |
|
"logits/chosen": -0.3713536262512207, |
|
"logits/rejected": -0.3408533036708832, |
|
"logps/chosen": -21.457298278808594, |
|
"logps/rejected": -46.37548065185547, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2886759042739868, |
|
"rewards/margins": 6.526200771331787, |
|
"rewards/rejected": -6.814876556396484, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.305084745762712, |
|
"grad_norm": 1.639671277812395, |
|
"learning_rate": 1.144730476115019e-08, |
|
"logits/chosen": -0.4143469035625458, |
|
"logits/rejected": -0.4372211694717407, |
|
"logps/chosen": -27.68434715270996, |
|
"logps/rejected": -61.32562255859375, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6124215126037598, |
|
"rewards/margins": 7.509528636932373, |
|
"rewards/rejected": -9.121950149536133, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 7.322033898305085, |
|
"grad_norm": 1.3049845757020513, |
|
"learning_rate": 1.0899753930869394e-08, |
|
"logits/chosen": -0.4528166949748993, |
|
"logits/rejected": -0.4254574775695801, |
|
"logps/chosen": -26.2335147857666, |
|
"logps/rejected": -55.94972229003906, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2019011974334717, |
|
"rewards/margins": 8.213329315185547, |
|
"rewards/rejected": -9.415230751037598, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 7.338983050847458, |
|
"grad_norm": 1.6327474891460472, |
|
"learning_rate": 1.036532951370736e-08, |
|
"logits/chosen": -0.41717565059661865, |
|
"logits/rejected": -0.3538900911808014, |
|
"logps/chosen": -28.74737548828125, |
|
"logps/rejected": -59.961647033691406, |
|
"loss": 0.0159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015626758337020874, |
|
"rewards/margins": 8.777100563049316, |
|
"rewards/rejected": -8.792726516723633, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 7.3559322033898304, |
|
"grad_norm": 1.5187852224535574, |
|
"learning_rate": 9.844060849138997e-09, |
|
"logits/chosen": -0.4029984474182129, |
|
"logits/rejected": -0.38465699553489685, |
|
"logps/chosen": -21.391469955444336, |
|
"logps/rejected": -47.680503845214844, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.46827206015586853, |
|
"rewards/margins": 7.35407829284668, |
|
"rewards/rejected": -7.822350025177002, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 7.372881355932203, |
|
"grad_norm": 1.3171463040757392, |
|
"learning_rate": 9.335976554398912e-09, |
|
"logits/chosen": -0.5152924060821533, |
|
"logits/rejected": -0.43552643060684204, |
|
"logps/chosen": -28.43988800048828, |
|
"logps/rejected": -45.27113342285156, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8594914674758911, |
|
"rewards/margins": 6.36436128616333, |
|
"rewards/rejected": -7.223852157592773, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 7.389830508474576, |
|
"grad_norm": 1.352843609066883, |
|
"learning_rate": 8.841104522910342e-09, |
|
"logits/chosen": -0.3669931888580322, |
|
"logits/rejected": -0.34047171473503113, |
|
"logps/chosen": -35.265140533447266, |
|
"logps/rejected": -61.45825958251953, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.776789665222168, |
|
"rewards/margins": 8.250631332397461, |
|
"rewards/rejected": -10.027421951293945, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 7.406779661016949, |
|
"grad_norm": 1.8098418726298369, |
|
"learning_rate": 8.359471922753714e-09, |
|
"logits/chosen": -0.355437695980072, |
|
"logits/rejected": -0.3270444869995117, |
|
"logps/chosen": -29.349018096923828, |
|
"logps/rejected": -60.3387336730957, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6025338768959045, |
|
"rewards/margins": 8.68097972869873, |
|
"rewards/rejected": -9.283513069152832, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 7.423728813559322, |
|
"grad_norm": 1.8786140797643052, |
|
"learning_rate": 7.891105195175356e-09, |
|
"logits/chosen": -0.41774412989616394, |
|
"logits/rejected": -0.379474401473999, |
|
"logps/chosen": -31.24578094482422, |
|
"logps/rejected": -48.23630905151367, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4310747385025024, |
|
"rewards/margins": 6.719178676605225, |
|
"rewards/rejected": -8.150252342224121, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 7.440677966101695, |
|
"grad_norm": 1.607505847132714, |
|
"learning_rate": 7.4360300531355894e-09, |
|
"logits/chosen": -0.2568835914134979, |
|
"logits/rejected": -0.22977690398693085, |
|
"logps/chosen": -32.3515625, |
|
"logps/rejected": -66.43345642089844, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2904284000396729, |
|
"rewards/margins": 8.297982215881348, |
|
"rewards/rejected": -9.588411331176758, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 7.4576271186440675, |
|
"grad_norm": 1.3035469316341721, |
|
"learning_rate": 6.994271479897313e-09, |
|
"logits/chosen": -0.43775883316993713, |
|
"logits/rejected": -0.4212513267993927, |
|
"logps/chosen": -22.23575210571289, |
|
"logps/rejected": -42.02503967285156, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44508761167526245, |
|
"rewards/margins": 6.237364768981934, |
|
"rewards/rejected": -6.68245267868042, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.47457627118644, |
|
"grad_norm": 1.366151434834416, |
|
"learning_rate": 6.565853727654502e-09, |
|
"logits/chosen": -0.5031697154045105, |
|
"logits/rejected": -0.5177669525146484, |
|
"logps/chosen": -31.38254737854004, |
|
"logps/rejected": -55.45163345336914, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3961818218231201, |
|
"rewards/margins": 7.067687034606934, |
|
"rewards/rejected": -8.463868141174316, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 7.491525423728813, |
|
"grad_norm": 1.4529924037385114, |
|
"learning_rate": 6.150800316200605e-09, |
|
"logits/chosen": -0.44623109698295593, |
|
"logits/rejected": -0.42772334814071655, |
|
"logps/chosen": -26.20204734802246, |
|
"logps/rejected": -44.2577018737793, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4152941405773163, |
|
"rewards/margins": 7.550034046173096, |
|
"rewards/rejected": -7.965329170227051, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 7.508474576271187, |
|
"grad_norm": 1.7067297640882242, |
|
"learning_rate": 5.7491340316373485e-09, |
|
"logits/chosen": -0.291814386844635, |
|
"logits/rejected": -0.2504284083843231, |
|
"logps/chosen": -27.000459671020508, |
|
"logps/rejected": -54.978851318359375, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1894947290420532, |
|
"rewards/margins": 7.88712739944458, |
|
"rewards/rejected": -9.076622009277344, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 7.52542372881356, |
|
"grad_norm": 1.8852004572462866, |
|
"learning_rate": 5.360876925123992e-09, |
|
"logits/chosen": -0.4699954390525818, |
|
"logits/rejected": -0.4355739653110504, |
|
"logps/chosen": -34.63423156738281, |
|
"logps/rejected": -65.0200424194336, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3088154792785645, |
|
"rewards/margins": 8.483866691589355, |
|
"rewards/rejected": -9.792682647705078, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 7.5423728813559325, |
|
"grad_norm": 1.9525606885122415, |
|
"learning_rate": 4.9860503116665176e-09, |
|
"logits/chosen": -0.580295205116272, |
|
"logits/rejected": -0.5321290493011475, |
|
"logps/chosen": -26.729717254638672, |
|
"logps/rejected": -52.8782958984375, |
|
"loss": 0.0159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3733120858669281, |
|
"rewards/margins": 6.56894588470459, |
|
"rewards/rejected": -6.942258358001709, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 7.559322033898305, |
|
"grad_norm": 1.9545216316177383, |
|
"learning_rate": 4.624674768947484e-09, |
|
"logits/chosen": -0.47320348024368286, |
|
"logits/rejected": -0.42538437247276306, |
|
"logps/chosen": -27.289257049560547, |
|
"logps/rejected": -52.25529479980469, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5522884130477905, |
|
"rewards/margins": 6.996241092681885, |
|
"rewards/rejected": -7.548530578613281, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 7.576271186440678, |
|
"grad_norm": 1.884083633370762, |
|
"learning_rate": 4.2767701361964835e-09, |
|
"logits/chosen": -0.31534552574157715, |
|
"logits/rejected": -0.2909752428531647, |
|
"logps/chosen": -37.41596221923828, |
|
"logps/rejected": -55.43064880371094, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.208031415939331, |
|
"rewards/margins": 6.201999664306641, |
|
"rewards/rejected": -8.41003131866455, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 7.593220338983051, |
|
"grad_norm": 1.6257233201960972, |
|
"learning_rate": 3.942355513100792e-09, |
|
"logits/chosen": -0.40161648392677307, |
|
"logits/rejected": -0.4106261134147644, |
|
"logps/chosen": -26.018543243408203, |
|
"logps/rejected": -62.679073333740234, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1849199533462524, |
|
"rewards/margins": 8.389888763427734, |
|
"rewards/rejected": -9.574809074401855, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 7.610169491525424, |
|
"grad_norm": 1.7831606635295467, |
|
"learning_rate": 3.6214492587569313e-09, |
|
"logits/chosen": -0.3574334383010864, |
|
"logits/rejected": -0.350351482629776, |
|
"logps/chosen": -33.58333206176758, |
|
"logps/rejected": -48.65354537963867, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2194223403930664, |
|
"rewards/margins": 7.209741592407227, |
|
"rewards/rejected": -8.429162979125977, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 7.627118644067797, |
|
"grad_norm": 1.9827926138744145, |
|
"learning_rate": 3.314068990662805e-09, |
|
"logits/chosen": -0.5334146022796631, |
|
"logits/rejected": -0.4546634256839752, |
|
"logps/chosen": -25.30044174194336, |
|
"logps/rejected": -49.781150817871094, |
|
"loss": 0.016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5673917531967163, |
|
"rewards/margins": 7.860011577606201, |
|
"rewards/rejected": -8.427403450012207, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.6440677966101696, |
|
"grad_norm": 1.5345319966235849, |
|
"learning_rate": 3.0202315837502545e-09, |
|
"logits/chosen": -0.41027843952178955, |
|
"logits/rejected": -0.36624419689178467, |
|
"logps/chosen": -29.778715133666992, |
|
"logps/rejected": -46.88585662841797, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.713862657546997, |
|
"rewards/margins": 5.918593406677246, |
|
"rewards/rejected": -7.632455825805664, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 7.661016949152542, |
|
"grad_norm": 1.45498172133433, |
|
"learning_rate": 2.7399531694589917e-09, |
|
"logits/chosen": -0.49980151653289795, |
|
"logits/rejected": -0.5059882998466492, |
|
"logps/chosen": -27.507404327392578, |
|
"logps/rejected": -53.86846923828125, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6134958267211914, |
|
"rewards/margins": 7.079102039337158, |
|
"rewards/rejected": -8.692597389221191, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 7.677966101694915, |
|
"grad_norm": 1.3296441933194811, |
|
"learning_rate": 2.473249134850808e-09, |
|
"logits/chosen": -0.3527723550796509, |
|
"logits/rejected": -0.31979426741600037, |
|
"logps/chosen": -22.46451187133789, |
|
"logps/rejected": -50.37282180786133, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8375161290168762, |
|
"rewards/margins": 7.392008304595947, |
|
"rewards/rejected": -8.229524612426758, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 7.694915254237288, |
|
"grad_norm": 1.9704674503284925, |
|
"learning_rate": 2.220134121764833e-09, |
|
"logits/chosen": -0.43200796842575073, |
|
"logits/rejected": -0.4080568850040436, |
|
"logps/chosen": -15.70004940032959, |
|
"logps/rejected": -44.24908447265625, |
|
"loss": 0.0192, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03188416361808777, |
|
"rewards/margins": 7.455360412597656, |
|
"rewards/rejected": -7.423476219177246, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 7.711864406779661, |
|
"grad_norm": 1.58493742628634, |
|
"learning_rate": 1.9806220260137065e-09, |
|
"logits/chosen": -0.4422493577003479, |
|
"logits/rejected": -0.37290158867836, |
|
"logps/chosen": -30.651966094970703, |
|
"logps/rejected": -55.6935920715332, |
|
"loss": 0.0246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4701915681362152, |
|
"rewards/margins": 8.02414321899414, |
|
"rewards/rejected": -8.494333267211914, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 7.728813559322034, |
|
"grad_norm": 1.309516904226872, |
|
"learning_rate": 1.7547259966207705e-09, |
|
"logits/chosen": -0.5261704325675964, |
|
"logits/rejected": -0.49233362078666687, |
|
"logps/chosen": -27.28386116027832, |
|
"logps/rejected": -53.776641845703125, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0615800619125366, |
|
"rewards/margins": 8.335307121276855, |
|
"rewards/rejected": -9.396886825561523, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 7.745762711864407, |
|
"grad_norm": 1.7021291625550554, |
|
"learning_rate": 1.5424584350981485e-09, |
|
"logits/chosen": -0.3087800443172455, |
|
"logits/rejected": -0.3105306923389435, |
|
"logps/chosen": -24.47256851196289, |
|
"logps/rejected": -50.43601989746094, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5138031244277954, |
|
"rewards/margins": 7.677865505218506, |
|
"rewards/rejected": -8.191668510437012, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 7.762711864406779, |
|
"grad_norm": 1.476829222148452, |
|
"learning_rate": 1.343830994765982e-09, |
|
"logits/chosen": -0.44474345445632935, |
|
"logits/rejected": -0.42049241065979004, |
|
"logps/chosen": -23.719074249267578, |
|
"logps/rejected": -62.17032241821289, |
|
"loss": 0.0196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6281836032867432, |
|
"rewards/margins": 9.182320594787598, |
|
"rewards/rejected": -9.810504913330078, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 7.779661016949152, |
|
"grad_norm": 1.350716594904905, |
|
"learning_rate": 1.1588545801125837e-09, |
|
"logits/chosen": -0.5191625356674194, |
|
"logits/rejected": -0.4718668460845947, |
|
"logps/chosen": -35.341068267822266, |
|
"logps/rejected": -59.45354461669922, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2722684144973755, |
|
"rewards/margins": 7.540390968322754, |
|
"rewards/rejected": -8.81265926361084, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 7.796610169491525, |
|
"grad_norm": 1.4635314314598586, |
|
"learning_rate": 9.87539346195776e-10, |
|
"logits/chosen": -0.3168594241142273, |
|
"logits/rejected": -0.2879508435726166, |
|
"logps/chosen": -28.040536880493164, |
|
"logps/rejected": -43.86100387573242, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7127920389175415, |
|
"rewards/margins": 6.108259201049805, |
|
"rewards/rejected": -6.821051120758057, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.813559322033898, |
|
"grad_norm": 1.5494249427881754, |
|
"learning_rate": 8.298946980855315e-10, |
|
"logits/chosen": -0.4457828998565674, |
|
"logits/rejected": -0.3980650007724762, |
|
"logps/chosen": -25.963443756103516, |
|
"logps/rejected": -45.423763275146484, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5499848127365112, |
|
"rewards/margins": 6.5887556076049805, |
|
"rewards/rejected": -7.138739585876465, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 7.830508474576272, |
|
"grad_norm": 1.4534702698382904, |
|
"learning_rate": 6.8592929034747e-10, |
|
"logits/chosen": -0.35777002573013306, |
|
"logits/rejected": -0.3949616849422455, |
|
"logps/chosen": -28.67134666442871, |
|
"logps/rejected": -57.564937591552734, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1654642820358276, |
|
"rewards/margins": 6.651158332824707, |
|
"rewards/rejected": -7.816622257232666, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 7.847457627118644, |
|
"grad_norm": 1.032270598397053, |
|
"learning_rate": 5.556510265678771e-10, |
|
"logits/chosen": -0.4886370003223419, |
|
"logits/rejected": -0.5037115216255188, |
|
"logps/chosen": -20.78964614868164, |
|
"logps/rejected": -48.41303253173828, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6196805834770203, |
|
"rewards/margins": 7.310200214385986, |
|
"rewards/rejected": -7.9298810958862305, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 7.864406779661017, |
|
"grad_norm": 1.2578929925717066, |
|
"learning_rate": 4.390670589196621e-10, |
|
"logits/chosen": -0.2916780114173889, |
|
"logits/rejected": -0.2758171856403351, |
|
"logps/chosen": -25.246580123901367, |
|
"logps/rejected": -56.34712219238281, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3181313276290894, |
|
"rewards/margins": 8.5591402053833, |
|
"rewards/rejected": -9.877272605895996, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 7.88135593220339, |
|
"grad_norm": 1.718937820229263, |
|
"learning_rate": 3.3618378776981147e-10, |
|
"logits/chosen": -0.2728620767593384, |
|
"logits/rejected": -0.25526100397109985, |
|
"logps/chosen": -27.420053482055664, |
|
"logps/rejected": -48.728145599365234, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45923471450805664, |
|
"rewards/margins": 6.748472213745117, |
|
"rewards/rejected": -7.207706928253174, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 7.898305084745763, |
|
"grad_norm": 1.3557652621850438, |
|
"learning_rate": 2.4700686132803075e-10, |
|
"logits/chosen": -0.3592544496059418, |
|
"logits/rejected": -0.37164703011512756, |
|
"logps/chosen": -28.122146606445312, |
|
"logps/rejected": -53.134910583496094, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5242568254470825, |
|
"rewards/margins": 7.785172939300537, |
|
"rewards/rejected": -8.309430122375488, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 7.915254237288136, |
|
"grad_norm": 1.2354179862035723, |
|
"learning_rate": 1.715411753365481e-10, |
|
"logits/chosen": -0.5242431163787842, |
|
"logits/rejected": -0.4909352958202362, |
|
"logps/chosen": -26.54534912109375, |
|
"logps/rejected": -55.37403106689453, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6659201383590698, |
|
"rewards/margins": 7.7830328941345215, |
|
"rewards/rejected": -9.448952674865723, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 7.932203389830509, |
|
"grad_norm": 1.6240885957181501, |
|
"learning_rate": 1.0979087280141297e-10, |
|
"logits/chosen": -0.32274141907691956, |
|
"logits/rejected": -0.32932335138320923, |
|
"logps/chosen": -20.59052085876465, |
|
"logps/rejected": -46.62628173828125, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.706177294254303, |
|
"rewards/margins": 7.163034439086914, |
|
"rewards/rejected": -7.8692121505737305, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 7.9491525423728815, |
|
"grad_norm": 1.495293278605491, |
|
"learning_rate": 6.175934376509429e-11, |
|
"logits/chosen": -0.272166907787323, |
|
"logits/rejected": -0.29551127552986145, |
|
"logps/chosen": -27.317262649536133, |
|
"logps/rejected": -73.80632781982422, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1428358554840088, |
|
"rewards/margins": 10.341331481933594, |
|
"rewards/rejected": -11.484167098999023, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 7.966101694915254, |
|
"grad_norm": 1.4312305917111094, |
|
"learning_rate": 2.7449225120268482e-11, |
|
"logits/chosen": -0.2674176096916199, |
|
"logits/rejected": -0.25498396158218384, |
|
"logps/chosen": -26.586524963378906, |
|
"logps/rejected": -52.65561294555664, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.174375295639038, |
|
"rewards/margins": 6.976294040679932, |
|
"rewards/rejected": -8.150670051574707, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.983050847457627, |
|
"grad_norm": 1.5506645838575677, |
|
"learning_rate": 6.862400465157403e-12, |
|
"logits/chosen": -0.22756405174732208, |
|
"logits/rejected": -0.21930274367332458, |
|
"logps/chosen": -35.59461212158203, |
|
"logps/rejected": -42.98273849487305, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.217814564704895, |
|
"rewards/margins": 5.8322625160217285, |
|
"rewards/rejected": -7.050076484680176, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.608716199113347, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.49069491028785706, |
|
"logits/rejected": -0.4363957941532135, |
|
"logps/chosen": -31.856151580810547, |
|
"logps/rejected": -46.5308837890625, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7556713223457336, |
|
"rewards/margins": 6.963629245758057, |
|
"rewards/rejected": -7.719299793243408, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 472, |
|
"total_flos": 0.0, |
|
"train_loss": 0.11897581996064696, |
|
"train_runtime": 99870.155, |
|
"train_samples_per_second": 0.605, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 472, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 400, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|