|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": 0.9550814628601074, |
|
"logits/rejected": 1.0664727687835693, |
|
"logps/chosen": -190.47879028320312, |
|
"logps/rejected": -177.6958770751953, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": 1.0215020179748535, |
|
"logits/rejected": 1.073843240737915, |
|
"logps/chosen": -277.8812561035156, |
|
"logps/rejected": -268.32220458984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": 0.00012883776798844337, |
|
"rewards/margins": 0.0005512596690095961, |
|
"rewards/rejected": -0.00042242190102115273, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": 1.054603099822998, |
|
"logits/rejected": 1.0355112552642822, |
|
"logps/chosen": -258.0608215332031, |
|
"logps/rejected": -219.5281219482422, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0007742593297734857, |
|
"rewards/margins": -0.0006406827596947551, |
|
"rewards/rejected": -0.00013357654097490013, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": 0.9782761335372925, |
|
"logits/rejected": 0.9957435727119446, |
|
"logps/chosen": -234.4627685546875, |
|
"logps/rejected": -216.38687133789062, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0007474010926671326, |
|
"rewards/margins": 0.00048109880299307406, |
|
"rewards/rejected": -0.0012284999247640371, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": 1.060667634010315, |
|
"logits/rejected": 1.0616825819015503, |
|
"logps/chosen": -269.3723449707031, |
|
"logps/rejected": -236.52392578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0005697375163435936, |
|
"rewards/margins": -0.00030056110699661076, |
|
"rewards/rejected": -0.0002691763802431524, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": 1.0123419761657715, |
|
"logits/rejected": 1.0498076677322388, |
|
"logps/chosen": -245.165771484375, |
|
"logps/rejected": -241.9037322998047, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0007764647016301751, |
|
"rewards/margins": -0.00020735012367367744, |
|
"rewards/rejected": -0.0005691145197488368, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": 0.9758418202400208, |
|
"logits/rejected": 1.0930532217025757, |
|
"logps/chosen": -283.73480224609375, |
|
"logps/rejected": -234.12576293945312, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -4.083226667717099e-05, |
|
"rewards/margins": -0.00016547185077797621, |
|
"rewards/rejected": 0.00012463955499697477, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": 1.005788803100586, |
|
"logits/rejected": 1.0820422172546387, |
|
"logps/chosen": -271.99468994140625, |
|
"logps/rejected": -231.10446166992188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 4.1856186726363376e-05, |
|
"rewards/margins": 0.0007076783804222941, |
|
"rewards/rejected": -0.0006658221827819943, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": 1.0230119228363037, |
|
"logits/rejected": 1.0622894763946533, |
|
"logps/chosen": -283.88946533203125, |
|
"logps/rejected": -261.6778869628906, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.000257449341006577, |
|
"rewards/margins": -8.570156205678359e-05, |
|
"rewards/rejected": -0.00017174780077766627, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": 1.043121337890625, |
|
"logits/rejected": 1.0927269458770752, |
|
"logps/chosen": -278.48974609375, |
|
"logps/rejected": -235.72775268554688, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0013169237645342946, |
|
"rewards/margins": -0.0013213430065661669, |
|
"rewards/rejected": 4.419172000780236e-06, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": 0.991162121295929, |
|
"logits/rejected": 1.0666497945785522, |
|
"logps/chosen": -237.25302124023438, |
|
"logps/rejected": -218.53671264648438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00023656387929804623, |
|
"rewards/margins": 0.0011323514627292752, |
|
"rewards/rejected": -0.0008957876125350595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": 0.9709606766700745, |
|
"eval_logits/rejected": 1.0631499290466309, |
|
"eval_logps/chosen": -277.5757751464844, |
|
"eval_logps/rejected": -243.87449645996094, |
|
"eval_loss": 0.6931320428848267, |
|
"eval_rewards/accuracies": 0.4514999985694885, |
|
"eval_rewards/chosen": -0.00028967749676667154, |
|
"eval_rewards/margins": 0.00029621709836646914, |
|
"eval_rewards/rejected": -0.0005858945660293102, |
|
"eval_runtime": 539.9229, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": 0.9960800409317017, |
|
"logits/rejected": 1.081016182899475, |
|
"logps/chosen": -283.5791015625, |
|
"logps/rejected": -250.1773681640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0004133354814257473, |
|
"rewards/margins": 0.0019074681913480163, |
|
"rewards/rejected": -0.0014941326808184385, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": 1.0297129154205322, |
|
"logits/rejected": 1.075050950050354, |
|
"logps/chosen": -227.7956085205078, |
|
"logps/rejected": -234.15097045898438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.00013152281462680548, |
|
"rewards/margins": 0.0006020874716341496, |
|
"rewards/rejected": -0.00047056470066308975, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": 1.0477956533432007, |
|
"logits/rejected": 1.093898892402649, |
|
"logps/chosen": -282.6871643066406, |
|
"logps/rejected": -239.27749633789062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00015316384087782353, |
|
"rewards/margins": 0.0016984669491648674, |
|
"rewards/rejected": -0.0015453032683581114, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": 1.026609182357788, |
|
"logits/rejected": 1.0265687704086304, |
|
"logps/chosen": -263.9731750488281, |
|
"logps/rejected": -237.11538696289062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0007792095420882106, |
|
"rewards/margins": 0.0024368534795939922, |
|
"rewards/rejected": -0.0016576439375057817, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": 1.007411241531372, |
|
"logits/rejected": 1.0271480083465576, |
|
"logps/chosen": -262.67718505859375, |
|
"logps/rejected": -235.0521240234375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00037160428473725915, |
|
"rewards/margins": 0.002256640698760748, |
|
"rewards/rejected": -0.0018850360065698624, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": 0.9555414915084839, |
|
"logits/rejected": 1.0848571062088013, |
|
"logps/chosen": -258.272705078125, |
|
"logps/rejected": -240.0525360107422, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.00037167343543842435, |
|
"rewards/margins": 0.0017876753117889166, |
|
"rewards/rejected": -0.0014160019345581532, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": 0.9961720705032349, |
|
"logits/rejected": 1.0719153881072998, |
|
"logps/chosen": -268.48541259765625, |
|
"logps/rejected": -218.2855987548828, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0007700158166699111, |
|
"rewards/margins": 0.0027975619304925203, |
|
"rewards/rejected": -0.0020275460556149483, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": 0.9853116869926453, |
|
"logits/rejected": 1.033042073249817, |
|
"logps/chosen": -272.58697509765625, |
|
"logps/rejected": -237.8570556640625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.000967078551184386, |
|
"rewards/margins": 0.0026892449241131544, |
|
"rewards/rejected": -0.0017221663147211075, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": 0.9925041198730469, |
|
"logits/rejected": 1.0097901821136475, |
|
"logps/chosen": -269.4550476074219, |
|
"logps/rejected": -235.5220489501953, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0016971088480204344, |
|
"rewards/margins": 0.0036631212569773197, |
|
"rewards/rejected": -0.0019660124089568853, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": 1.0215815305709839, |
|
"logits/rejected": 1.1321773529052734, |
|
"logps/chosen": -278.18402099609375, |
|
"logps/rejected": -249.7351531982422, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.003087539691478014, |
|
"rewards/margins": 0.004473397042602301, |
|
"rewards/rejected": -0.0013858575839549303, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": 0.9704993367195129, |
|
"eval_logits/rejected": 1.0632352828979492, |
|
"eval_logps/chosen": -277.26611328125, |
|
"eval_logps/rejected": -243.9904022216797, |
|
"eval_loss": 0.6929376721382141, |
|
"eval_rewards/accuracies": 0.5885000228881836, |
|
"eval_rewards/chosen": 0.0028066388331353664, |
|
"eval_rewards/margins": 0.004551402758806944, |
|
"eval_rewards/rejected": -0.0017447640420868993, |
|
"eval_runtime": 540.2748, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.925, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": 1.0122153759002686, |
|
"logits/rejected": 1.091338038444519, |
|
"logps/chosen": -260.8949279785156, |
|
"logps/rejected": -233.3127899169922, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0018235554452985525, |
|
"rewards/margins": 0.004228769801557064, |
|
"rewards/rejected": -0.0024052143562585115, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": 1.041244387626648, |
|
"logits/rejected": 1.1119762659072876, |
|
"logps/chosen": -277.6720886230469, |
|
"logps/rejected": -243.11624145507812, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00286296010017395, |
|
"rewards/margins": 0.004079398699104786, |
|
"rewards/rejected": -0.0012164388317614794, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": 1.0305936336517334, |
|
"logits/rejected": 1.0384341478347778, |
|
"logps/chosen": -268.91888427734375, |
|
"logps/rejected": -275.10919189453125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004722142592072487, |
|
"rewards/margins": 0.005156674422323704, |
|
"rewards/rejected": -0.0004345317429397255, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": 1.0291810035705566, |
|
"logits/rejected": 1.046808123588562, |
|
"logps/chosen": -272.0259704589844, |
|
"logps/rejected": -231.6946258544922, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.007824023254215717, |
|
"rewards/margins": 0.009780755266547203, |
|
"rewards/rejected": -0.001956732477992773, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": 0.9261485934257507, |
|
"logits/rejected": 1.0824263095855713, |
|
"logps/chosen": -262.42010498046875, |
|
"logps/rejected": -207.1998291015625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004594790283590555, |
|
"rewards/margins": 0.006828789599239826, |
|
"rewards/rejected": -0.0022339997813105583, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": 1.0177654027938843, |
|
"logits/rejected": 1.0079872608184814, |
|
"logps/chosen": -255.89602661132812, |
|
"logps/rejected": -249.2772979736328, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.004748777486383915, |
|
"rewards/margins": 0.007689561694860458, |
|
"rewards/rejected": -0.0029407842084765434, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": 0.9201077222824097, |
|
"logits/rejected": 1.0668845176696777, |
|
"logps/chosen": -250.4193115234375, |
|
"logps/rejected": -225.25808715820312, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00446278415620327, |
|
"rewards/margins": 0.009258858859539032, |
|
"rewards/rejected": -0.004796075168997049, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": 0.9566753506660461, |
|
"logits/rejected": 0.9955002069473267, |
|
"logps/chosen": -262.57159423828125, |
|
"logps/rejected": -244.9207763671875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.007930249907076359, |
|
"rewards/margins": 0.012794476933777332, |
|
"rewards/rejected": -0.0048642284236848354, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": 0.9608441591262817, |
|
"logits/rejected": 1.0510733127593994, |
|
"logps/chosen": -258.68780517578125, |
|
"logps/rejected": -229.01083374023438, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008841058239340782, |
|
"rewards/margins": 0.01509904582053423, |
|
"rewards/rejected": -0.006257989443838596, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": 0.927672266960144, |
|
"logits/rejected": 1.0490531921386719, |
|
"logps/chosen": -257.1780090332031, |
|
"logps/rejected": -227.1814422607422, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.008853832259774208, |
|
"rewards/margins": 0.014651511795818806, |
|
"rewards/rejected": -0.00579767907038331, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": 0.9545422196388245, |
|
"eval_logits/rejected": 1.0488145351409912, |
|
"eval_logps/chosen": -276.5484924316406, |
|
"eval_logps/rejected": -244.36415100097656, |
|
"eval_loss": 0.6924864649772644, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": 0.009982902556657791, |
|
"eval_rewards/margins": 0.015465173870325089, |
|
"eval_rewards/rejected": -0.005482269451022148, |
|
"eval_runtime": 540.3904, |
|
"eval_samples_per_second": 3.701, |
|
"eval_steps_per_second": 0.925, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": 0.9173529744148254, |
|
"logits/rejected": 1.0776686668395996, |
|
"logps/chosen": -256.60821533203125, |
|
"logps/rejected": -226.8739013671875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.012835139408707619, |
|
"rewards/margins": 0.01847982034087181, |
|
"rewards/rejected": -0.005644683726131916, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": 0.8929821252822876, |
|
"logits/rejected": 1.021347165107727, |
|
"logps/chosen": -282.7056579589844, |
|
"logps/rejected": -256.8482360839844, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.010254921391606331, |
|
"rewards/margins": 0.0137600377202034, |
|
"rewards/rejected": -0.003505116328597069, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": 0.9728350639343262, |
|
"logits/rejected": 1.0812106132507324, |
|
"logps/chosen": -278.6542663574219, |
|
"logps/rejected": -243.5943603515625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.005831174552440643, |
|
"rewards/margins": 0.016604231670498848, |
|
"rewards/rejected": -0.01077305804938078, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": 0.9642572402954102, |
|
"logits/rejected": 0.9961212873458862, |
|
"logps/chosen": -273.8861389160156, |
|
"logps/rejected": -268.4089050292969, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.007403290830552578, |
|
"rewards/margins": 0.017763126641511917, |
|
"rewards/rejected": -0.010359834879636765, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": 0.9777683019638062, |
|
"logits/rejected": 0.9659522771835327, |
|
"logps/chosen": -284.44354248046875, |
|
"logps/rejected": -250.377685546875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.009272046387195587, |
|
"rewards/margins": 0.019789326936006546, |
|
"rewards/rejected": -0.010517279617488384, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": 0.926777184009552, |
|
"logits/rejected": 1.0163743495941162, |
|
"logps/chosen": -301.6736755371094, |
|
"logps/rejected": -258.0189514160156, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.009693610481917858, |
|
"rewards/margins": 0.02282491698861122, |
|
"rewards/rejected": -0.013131308369338512, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": 0.9656028747558594, |
|
"logits/rejected": 0.953553318977356, |
|
"logps/chosen": -304.98760986328125, |
|
"logps/rejected": -260.2730407714844, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00983491726219654, |
|
"rewards/margins": 0.022675124928355217, |
|
"rewards/rejected": -0.012840206734836102, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": 0.942939281463623, |
|
"logits/rejected": 1.044382929801941, |
|
"logps/chosen": -255.0367431640625, |
|
"logps/rejected": -223.09664916992188, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.012080366723239422, |
|
"rewards/margins": 0.02917185053229332, |
|
"rewards/rejected": -0.017091484740376472, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": 0.9582707285881042, |
|
"logits/rejected": 0.9803364872932434, |
|
"logps/chosen": -298.5652770996094, |
|
"logps/rejected": -255.8463592529297, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01266313623636961, |
|
"rewards/margins": 0.030872434377670288, |
|
"rewards/rejected": -0.018209297209978104, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": 0.9647480249404907, |
|
"logits/rejected": 0.9665771722793579, |
|
"logps/chosen": -278.61871337890625, |
|
"logps/rejected": -262.8523254394531, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0069937086664140224, |
|
"rewards/margins": 0.03327130153775215, |
|
"rewards/rejected": -0.02627759613096714, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": 0.8978338837623596, |
|
"eval_logits/rejected": 0.992965817451477, |
|
"eval_logps/chosen": -276.977783203125, |
|
"eval_logps/rejected": -246.2157440185547, |
|
"eval_loss": 0.6920285820960999, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": 0.005690301302820444, |
|
"eval_rewards/margins": 0.029688764363527298, |
|
"eval_rewards/rejected": -0.02399846352636814, |
|
"eval_runtime": 540.0105, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": 0.8835350871086121, |
|
"logits/rejected": 0.8935839533805847, |
|
"logps/chosen": -293.0201721191406, |
|
"logps/rejected": -244.9386444091797, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.005041834898293018, |
|
"rewards/margins": 0.02935781143605709, |
|
"rewards/rejected": -0.024315981194376945, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": 0.9205878376960754, |
|
"logits/rejected": 1.0264567136764526, |
|
"logps/chosen": -278.33697509765625, |
|
"logps/rejected": -245.725341796875, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0019535294268280268, |
|
"rewards/margins": 0.029373669996857643, |
|
"rewards/rejected": -0.02742014452815056, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": 0.8263881802558899, |
|
"logits/rejected": 0.9619684219360352, |
|
"logps/chosen": -264.7477111816406, |
|
"logps/rejected": -259.3697509765625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.009586657397449017, |
|
"rewards/margins": 0.028161650523543358, |
|
"rewards/rejected": -0.0377482995390892, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": 0.9048721194267273, |
|
"logits/rejected": 0.8793436288833618, |
|
"logps/chosen": -283.28668212890625, |
|
"logps/rejected": -261.3004150390625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.013652992434799671, |
|
"rewards/margins": 0.024908630177378654, |
|
"rewards/rejected": -0.0385616198182106, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": 0.8724738359451294, |
|
"logits/rejected": 0.9701471328735352, |
|
"logps/chosen": -262.9942321777344, |
|
"logps/rejected": -236.6386260986328, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.015233024954795837, |
|
"rewards/margins": 0.0235325675457716, |
|
"rewards/rejected": -0.038765594363212585, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": 0.910510241985321, |
|
"logits/rejected": 1.033053994178772, |
|
"logps/chosen": -252.5099334716797, |
|
"logps/rejected": -252.48379516601562, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.006133816204965115, |
|
"rewards/margins": 0.03705804795026779, |
|
"rewards/rejected": -0.04319187253713608, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": 0.8955548405647278, |
|
"logits/rejected": 0.9851361513137817, |
|
"logps/chosen": -296.1971435546875, |
|
"logps/rejected": -249.4320526123047, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.00848749466240406, |
|
"rewards/margins": 0.03199051320552826, |
|
"rewards/rejected": -0.04047800973057747, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": 0.8434032201766968, |
|
"logits/rejected": 0.9481671452522278, |
|
"logps/chosen": -243.55819702148438, |
|
"logps/rejected": -213.0118865966797, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.011666789650917053, |
|
"rewards/margins": 0.036795832216739655, |
|
"rewards/rejected": -0.04846261814236641, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": 0.8387807011604309, |
|
"logits/rejected": 0.9744217991828918, |
|
"logps/chosen": -276.4521484375, |
|
"logps/rejected": -238.837890625, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.016785232350230217, |
|
"rewards/margins": 0.03324298933148384, |
|
"rewards/rejected": -0.05002821609377861, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": 0.9428791999816895, |
|
"logits/rejected": 0.9261376261711121, |
|
"logps/chosen": -245.7830047607422, |
|
"logps/rejected": -235.5664825439453, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.022965628653764725, |
|
"rewards/margins": 0.034700777381658554, |
|
"rewards/rejected": -0.05766640976071358, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": 0.8238934874534607, |
|
"eval_logits/rejected": 0.9187954068183899, |
|
"eval_logps/chosen": -280.7515869140625, |
|
"eval_logps/rejected": -250.6851348876953, |
|
"eval_loss": 0.6917084455490112, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.03204774856567383, |
|
"eval_rewards/margins": 0.03664441406726837, |
|
"eval_rewards/rejected": -0.0686921626329422, |
|
"eval_runtime": 540.183, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.926, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": 0.8044353723526001, |
|
"logits/rejected": 0.970533549785614, |
|
"logps/chosen": -285.99761962890625, |
|
"logps/rejected": -224.1236114501953, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.026267999783158302, |
|
"rewards/margins": 0.038414839655160904, |
|
"rewards/rejected": -0.06468284130096436, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": 0.8804060220718384, |
|
"logits/rejected": 0.9580115079879761, |
|
"logps/chosen": -290.8780822753906, |
|
"logps/rejected": -256.63543701171875, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.026573490351438522, |
|
"rewards/margins": 0.04471993073821068, |
|
"rewards/rejected": -0.0712934285402298, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": 0.863343358039856, |
|
"logits/rejected": 0.9443706274032593, |
|
"logps/chosen": -268.26751708984375, |
|
"logps/rejected": -238.9235076904297, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.022545838728547096, |
|
"rewards/margins": 0.04438090696930885, |
|
"rewards/rejected": -0.0669267401099205, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": 0.8318734169006348, |
|
"logits/rejected": 0.8924848437309265, |
|
"logps/chosen": -244.3855438232422, |
|
"logps/rejected": -226.19100952148438, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.028137261047959328, |
|
"rewards/margins": 0.04323118180036545, |
|
"rewards/rejected": -0.07136844098567963, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": 0.8746344447135925, |
|
"logits/rejected": 0.9145771861076355, |
|
"logps/chosen": -266.9718322753906, |
|
"logps/rejected": -231.31741333007812, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04218477010726929, |
|
"rewards/margins": 0.05346935987472534, |
|
"rewards/rejected": -0.09565412998199463, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": 0.9077315330505371, |
|
"logits/rejected": 0.9803347587585449, |
|
"logps/chosen": -288.12908935546875, |
|
"logps/rejected": -266.33343505859375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.044233765453100204, |
|
"rewards/margins": 0.0387597382068634, |
|
"rewards/rejected": -0.0829935073852539, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": 0.8836727142333984, |
|
"logits/rejected": 1.0670572519302368, |
|
"logps/chosen": -271.2341003417969, |
|
"logps/rejected": -226.1123046875, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.03294859081506729, |
|
"rewards/margins": 0.06488404422998428, |
|
"rewards/rejected": -0.09783263504505157, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": 0.9106415510177612, |
|
"logits/rejected": 0.8857673406600952, |
|
"logps/chosen": -260.3927307128906, |
|
"logps/rejected": -232.7233428955078, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0389188714325428, |
|
"rewards/margins": 0.03832856938242912, |
|
"rewards/rejected": -0.07724744826555252, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": 0.9676392674446106, |
|
"logits/rejected": 0.9093373417854309, |
|
"logps/chosen": -271.92193603515625, |
|
"logps/rejected": -235.1454620361328, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04100045561790466, |
|
"rewards/margins": 0.03780464082956314, |
|
"rewards/rejected": -0.0788050964474678, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": 0.9376009702682495, |
|
"logits/rejected": 0.9335840344429016, |
|
"logps/chosen": -242.3965301513672, |
|
"logps/rejected": -265.5517578125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0576152577996254, |
|
"rewards/margins": 0.03630609065294266, |
|
"rewards/rejected": -0.09392134845256805, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": 0.8586252927780151, |
|
"eval_logits/rejected": 0.9506573677062988, |
|
"eval_logps/chosen": -283.59686279296875, |
|
"eval_logps/rejected": -254.26144409179688, |
|
"eval_loss": 0.6914932131767273, |
|
"eval_rewards/accuracies": 0.6215000152587891, |
|
"eval_rewards/chosen": -0.06050081178545952, |
|
"eval_rewards/margins": 0.0439542680978775, |
|
"eval_rewards/rejected": -0.10445508360862732, |
|
"eval_runtime": 539.8865, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": 0.9270865321159363, |
|
"logits/rejected": 0.946628212928772, |
|
"logps/chosen": -250.1840057373047, |
|
"logps/rejected": -228.7745361328125, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.06987909972667694, |
|
"rewards/margins": 0.03838449344038963, |
|
"rewards/rejected": -0.10826359689235687, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": 0.9000824093818665, |
|
"logits/rejected": 0.9378656148910522, |
|
"logps/chosen": -292.17010498046875, |
|
"logps/rejected": -237.016357421875, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06269699335098267, |
|
"rewards/margins": 0.043883226811885834, |
|
"rewards/rejected": -0.1065802201628685, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": 0.7903264760971069, |
|
"logits/rejected": 0.9245864152908325, |
|
"logps/chosen": -301.8384704589844, |
|
"logps/rejected": -256.8984069824219, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04840216785669327, |
|
"rewards/margins": 0.0328446589410305, |
|
"rewards/rejected": -0.08124681562185287, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": 0.8983148336410522, |
|
"logits/rejected": 0.9722617268562317, |
|
"logps/chosen": -278.6772155761719, |
|
"logps/rejected": -235.2743377685547, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04875911772251129, |
|
"rewards/margins": 0.03910985589027405, |
|
"rewards/rejected": -0.08786897361278534, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": 0.9069989323616028, |
|
"logits/rejected": 1.027419924736023, |
|
"logps/chosen": -308.1993103027344, |
|
"logps/rejected": -285.44903564453125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05254577472805977, |
|
"rewards/margins": 0.04926187917590141, |
|
"rewards/rejected": -0.10180766880512238, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": 0.8621240854263306, |
|
"logits/rejected": 0.9766268730163574, |
|
"logps/chosen": -288.5992431640625, |
|
"logps/rejected": -216.592041015625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0491311252117157, |
|
"rewards/margins": 0.03728429228067398, |
|
"rewards/rejected": -0.08641541749238968, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": 0.9439069032669067, |
|
"logits/rejected": 0.9264806509017944, |
|
"logps/chosen": -282.1919250488281, |
|
"logps/rejected": -278.38037109375, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05234139412641525, |
|
"rewards/margins": 0.04387987032532692, |
|
"rewards/rejected": -0.09622126072645187, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": 0.8950139880180359, |
|
"logits/rejected": 0.9192444682121277, |
|
"logps/chosen": -258.887939453125, |
|
"logps/rejected": -246.53256225585938, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03944612294435501, |
|
"rewards/margins": 0.04930661618709564, |
|
"rewards/rejected": -0.08875273913145065, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": 0.8779371380805969, |
|
"logits/rejected": 0.9450112581253052, |
|
"logps/chosen": -288.85736083984375, |
|
"logps/rejected": -251.94943237304688, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.032757535576820374, |
|
"rewards/margins": 0.05075981095433235, |
|
"rewards/rejected": -0.08351735770702362, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": 0.7992750406265259, |
|
"logits/rejected": 0.9617929458618164, |
|
"logps/chosen": -251.58767700195312, |
|
"logps/rejected": -232.5711669921875, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04521554335951805, |
|
"rewards/margins": 0.0445394404232502, |
|
"rewards/rejected": -0.08975498378276825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": 0.8818045854568481, |
|
"eval_logits/rejected": 0.9764631390571594, |
|
"eval_logps/chosen": -281.1485595703125, |
|
"eval_logps/rejected": -251.7943878173828, |
|
"eval_loss": 0.6914020776748657, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -0.03601725026965141, |
|
"eval_rewards/margins": 0.04376746341586113, |
|
"eval_rewards/rejected": -0.07978471368551254, |
|
"eval_runtime": 539.8363, |
|
"eval_samples_per_second": 3.705, |
|
"eval_steps_per_second": 0.926, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": 0.9707541465759277, |
|
"logits/rejected": 1.0011179447174072, |
|
"logps/chosen": -286.44696044921875, |
|
"logps/rejected": -283.7610778808594, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0358760841190815, |
|
"rewards/margins": 0.044750213623046875, |
|
"rewards/rejected": -0.08062629401683807, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": 0.8825947046279907, |
|
"logits/rejected": 0.8657267689704895, |
|
"logps/chosen": -280.93768310546875, |
|
"logps/rejected": -255.46212768554688, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.026189198717474937, |
|
"rewards/margins": 0.043071091175079346, |
|
"rewards/rejected": -0.06926029175519943, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": 0.9044111371040344, |
|
"logits/rejected": 0.9175283312797546, |
|
"logps/chosen": -301.9758605957031, |
|
"logps/rejected": -256.6850280761719, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.024754587560892105, |
|
"rewards/margins": 0.04776642844080925, |
|
"rewards/rejected": -0.07252101600170135, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": 0.8817905187606812, |
|
"logits/rejected": 0.9648914337158203, |
|
"logps/chosen": -273.3789367675781, |
|
"logps/rejected": -221.35986328125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03455664590001106, |
|
"rewards/margins": 0.03378739953041077, |
|
"rewards/rejected": -0.06834404170513153, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": 0.9233236312866211, |
|
"logits/rejected": 0.9309977293014526, |
|
"logps/chosen": -278.74212646484375, |
|
"logps/rejected": -244.9453582763672, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.013511193916201591, |
|
"rewards/margins": 0.05613558739423752, |
|
"rewards/rejected": -0.06964678317308426, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": 0.9726675152778625, |
|
"logits/rejected": 0.9908739924430847, |
|
"logps/chosen": -249.2692108154297, |
|
"logps/rejected": -234.45663452148438, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.023977819830179214, |
|
"rewards/margins": 0.054052967578172684, |
|
"rewards/rejected": -0.0780307799577713, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": 0.9504178166389465, |
|
"logits/rejected": 0.9623494148254395, |
|
"logps/chosen": -265.9614562988281, |
|
"logps/rejected": -223.534423828125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.033301644027233124, |
|
"rewards/margins": 0.033827196806669235, |
|
"rewards/rejected": -0.06712885200977325, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": 0.8907458186149597, |
|
"logits/rejected": 0.9131177663803101, |
|
"logps/chosen": -279.34661865234375, |
|
"logps/rejected": -248.4618377685547, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04006670415401459, |
|
"rewards/margins": 0.05590524524450302, |
|
"rewards/rejected": -0.09597194939851761, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": 0.8844520449638367, |
|
"logits/rejected": 0.9553192257881165, |
|
"logps/chosen": -248.71533203125, |
|
"logps/rejected": -241.25350952148438, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.039020974189043045, |
|
"rewards/margins": 0.04393316060304642, |
|
"rewards/rejected": -0.08295414596796036, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": 0.9063314199447632, |
|
"logits/rejected": 0.9573804140090942, |
|
"logps/chosen": -261.8778381347656, |
|
"logps/rejected": -234.04788208007812, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04035869985818863, |
|
"rewards/margins": 0.04505294933915138, |
|
"rewards/rejected": -0.08541164547204971, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.9022059440612793, |
|
"eval_logits/rejected": 0.9965441226959229, |
|
"eval_logps/chosen": -281.8776550292969, |
|
"eval_logps/rejected": -252.8778533935547, |
|
"eval_loss": 0.6913056373596191, |
|
"eval_rewards/accuracies": 0.6240000128746033, |
|
"eval_rewards/chosen": -0.04330845922231674, |
|
"eval_rewards/margins": 0.04731076583266258, |
|
"eval_rewards/rejected": -0.09061922132968903, |
|
"eval_runtime": 540.1978, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.926, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": 0.9879693984985352, |
|
"logits/rejected": 0.9654221534729004, |
|
"logps/chosen": -284.70416259765625, |
|
"logps/rejected": -282.3924255371094, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04387697950005531, |
|
"rewards/margins": 0.05213203281164169, |
|
"rewards/rejected": -0.0960090160369873, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": 0.9151199460029602, |
|
"logits/rejected": 0.9490255117416382, |
|
"logps/chosen": -214.4105987548828, |
|
"logps/rejected": -204.9471435546875, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04438359662890434, |
|
"rewards/margins": 0.04097510129213333, |
|
"rewards/rejected": -0.08535870164632797, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": 0.910225510597229, |
|
"logits/rejected": 0.9843104481697083, |
|
"logps/chosen": -279.525634765625, |
|
"logps/rejected": -255.632568359375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.060014404356479645, |
|
"rewards/margins": 0.03720748424530029, |
|
"rewards/rejected": -0.09722188115119934, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": 0.9961544275283813, |
|
"logits/rejected": 1.0286822319030762, |
|
"logps/chosen": -268.55023193359375, |
|
"logps/rejected": -252.0181121826172, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.07715798169374466, |
|
"rewards/margins": 0.04793631657958031, |
|
"rewards/rejected": -0.12509429454803467, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": 0.9325528144836426, |
|
"logits/rejected": 1.0585540533065796, |
|
"logps/chosen": -295.1412658691406, |
|
"logps/rejected": -265.9173889160156, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0818694531917572, |
|
"rewards/margins": 0.049801088869571686, |
|
"rewards/rejected": -0.13167054951190948, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": 0.9676458239555359, |
|
"logits/rejected": 1.0127732753753662, |
|
"logps/chosen": -307.82403564453125, |
|
"logps/rejected": -271.78009033203125, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0727236270904541, |
|
"rewards/margins": 0.0467851385474205, |
|
"rewards/rejected": -0.1195087656378746, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": 0.9034944772720337, |
|
"logits/rejected": 1.0843673944473267, |
|
"logps/chosen": -318.55096435546875, |
|
"logps/rejected": -294.28131103515625, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.050767261534929276, |
|
"rewards/margins": 0.045433562248945236, |
|
"rewards/rejected": -0.09620082378387451, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": 0.990381121635437, |
|
"logits/rejected": 1.1196520328521729, |
|
"logps/chosen": -261.2209777832031, |
|
"logps/rejected": -227.89700317382812, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.05034313350915909, |
|
"rewards/margins": 0.06942330300807953, |
|
"rewards/rejected": -0.11976643651723862, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": 0.9858494997024536, |
|
"logits/rejected": 1.1457973718643188, |
|
"logps/chosen": -296.84906005859375, |
|
"logps/rejected": -270.0372009277344, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04343515634536743, |
|
"rewards/margins": 0.07933736592531204, |
|
"rewards/rejected": -0.12277251482009888, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": 0.9974620938301086, |
|
"logits/rejected": 0.9908777475357056, |
|
"logps/chosen": -273.75848388671875, |
|
"logps/rejected": -256.10833740234375, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05218503996729851, |
|
"rewards/margins": 0.057306624948978424, |
|
"rewards/rejected": -0.10949166119098663, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": 0.9266276955604553, |
|
"eval_logits/rejected": 1.0206482410430908, |
|
"eval_logps/chosen": -282.83209228515625, |
|
"eval_logps/rejected": -254.36526489257812, |
|
"eval_loss": 0.6912428140640259, |
|
"eval_rewards/accuracies": 0.6244999766349792, |
|
"eval_rewards/chosen": -0.052852813154459, |
|
"eval_rewards/margins": 0.052640702575445175, |
|
"eval_rewards/rejected": -0.10549352318048477, |
|
"eval_runtime": 539.8724, |
|
"eval_samples_per_second": 3.705, |
|
"eval_steps_per_second": 0.926, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": 1.0043308734893799, |
|
"logits/rejected": 1.0745335817337036, |
|
"logps/chosen": -312.68194580078125, |
|
"logps/rejected": -295.7378845214844, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.05200989916920662, |
|
"rewards/margins": 0.03657294064760208, |
|
"rewards/rejected": -0.088582843542099, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": 0.9822233319282532, |
|
"logits/rejected": 1.0417166948318481, |
|
"logps/chosen": -336.60919189453125, |
|
"logps/rejected": -289.87689208984375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04931178689002991, |
|
"rewards/margins": 0.06655998528003693, |
|
"rewards/rejected": -0.11587176471948624, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": 0.9825722575187683, |
|
"logits/rejected": 1.0669372081756592, |
|
"logps/chosen": -291.48394775390625, |
|
"logps/rejected": -246.3392333984375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06181565672159195, |
|
"rewards/margins": 0.05421454459428787, |
|
"rewards/rejected": -0.11603017896413803, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": 0.9451677203178406, |
|
"logits/rejected": 0.9427906274795532, |
|
"logps/chosen": -267.2729797363281, |
|
"logps/rejected": -239.00460815429688, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.062487781047821045, |
|
"rewards/margins": 0.05825795605778694, |
|
"rewards/rejected": -0.12074574083089828, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": 0.9309576749801636, |
|
"logits/rejected": 0.9632508158683777, |
|
"logps/chosen": -292.92864990234375, |
|
"logps/rejected": -279.8366394042969, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08061058819293976, |
|
"rewards/margins": 0.05397840216755867, |
|
"rewards/rejected": -0.13458898663520813, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": 0.8813568353652954, |
|
"logits/rejected": 0.9471953511238098, |
|
"logps/chosen": -284.23333740234375, |
|
"logps/rejected": -256.72662353515625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08392615616321564, |
|
"rewards/margins": 0.04842451959848404, |
|
"rewards/rejected": -0.13235066831111908, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": 0.9631437063217163, |
|
"logits/rejected": 1.0291635990142822, |
|
"logps/chosen": -284.9555358886719, |
|
"logps/rejected": -221.7299041748047, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07446320354938507, |
|
"rewards/margins": 0.03399290144443512, |
|
"rewards/rejected": -0.10845611244440079, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": 0.9003992080688477, |
|
"logits/rejected": 0.9652150273323059, |
|
"logps/chosen": -267.9747619628906, |
|
"logps/rejected": -250.9159393310547, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.05370105430483818, |
|
"rewards/margins": 0.05308745428919792, |
|
"rewards/rejected": -0.1067885160446167, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": 0.9419862627983093, |
|
"logits/rejected": 1.0019476413726807, |
|
"logps/chosen": -249.2467803955078, |
|
"logps/rejected": -216.28857421875, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04485908895730972, |
|
"rewards/margins": 0.03853844106197357, |
|
"rewards/rejected": -0.0833975300192833, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": 0.9215824007987976, |
|
"logits/rejected": 0.9359531402587891, |
|
"logps/chosen": -265.0127868652344, |
|
"logps/rejected": -257.2760314941406, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.047946006059646606, |
|
"rewards/margins": 0.042474087327718735, |
|
"rewards/rejected": -0.09042008966207504, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 0.9215968251228333, |
|
"eval_logits/rejected": 1.01704740524292, |
|
"eval_logps/chosen": -281.5216064453125, |
|
"eval_logps/rejected": -252.86395263671875, |
|
"eval_loss": 0.6912136673927307, |
|
"eval_rewards/accuracies": 0.6290000081062317, |
|
"eval_rewards/chosen": -0.03974788263440132, |
|
"eval_rewards/margins": 0.0507323183119297, |
|
"eval_rewards/rejected": -0.09048020094633102, |
|
"eval_runtime": 539.8968, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": 0.9124513864517212, |
|
"logits/rejected": 1.0127254724502563, |
|
"logps/chosen": -300.19659423828125, |
|
"logps/rejected": -253.8302764892578, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.04729009419679642, |
|
"rewards/margins": 0.055154770612716675, |
|
"rewards/rejected": -0.1024448499083519, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": 0.9808125495910645, |
|
"logits/rejected": 1.0599421262741089, |
|
"logps/chosen": -282.46038818359375, |
|
"logps/rejected": -267.24188232421875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.05007319524884224, |
|
"rewards/margins": 0.05987462401390076, |
|
"rewards/rejected": -0.1099478155374527, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": 0.9697202444076538, |
|
"logits/rejected": 1.0168477296829224, |
|
"logps/chosen": -282.2762145996094, |
|
"logps/rejected": -255.68844604492188, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.044876083731651306, |
|
"rewards/margins": 0.03766594082117081, |
|
"rewards/rejected": -0.08254201710224152, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": 0.9049477577209473, |
|
"logits/rejected": 1.021236538887024, |
|
"logps/chosen": -253.36471557617188, |
|
"logps/rejected": -239.316650390625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.038831353187561035, |
|
"rewards/margins": 0.035190094262361526, |
|
"rewards/rejected": -0.07402144372463226, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": 1.0183782577514648, |
|
"logits/rejected": 1.024595022201538, |
|
"logps/chosen": -254.61691284179688, |
|
"logps/rejected": -231.1280975341797, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.027654901146888733, |
|
"rewards/margins": 0.042562730610370636, |
|
"rewards/rejected": -0.07021763920783997, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": 0.9354788661003113, |
|
"logits/rejected": 1.0331499576568604, |
|
"logps/chosen": -297.9024963378906, |
|
"logps/rejected": -269.0444030761719, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.016384651884436607, |
|
"rewards/margins": 0.05180732160806656, |
|
"rewards/rejected": -0.06819198280572891, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": 1.014983057975769, |
|
"logits/rejected": 1.0802810192108154, |
|
"logps/chosen": -258.166015625, |
|
"logps/rejected": -207.4542999267578, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.028195038437843323, |
|
"rewards/margins": 0.05233887955546379, |
|
"rewards/rejected": -0.08053391426801682, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": 0.9456683397293091, |
|
"logits/rejected": 1.018128752708435, |
|
"logps/chosen": -300.35211181640625, |
|
"logps/rejected": -264.9364318847656, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.030218075960874557, |
|
"rewards/margins": 0.046960409730672836, |
|
"rewards/rejected": -0.0771784856915474, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": 0.9062995910644531, |
|
"logits/rejected": 1.0201383829116821, |
|
"logps/chosen": -305.39776611328125, |
|
"logps/rejected": -233.7451171875, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04302235692739487, |
|
"rewards/margins": 0.04250651225447655, |
|
"rewards/rejected": -0.08552887290716171, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": 0.8466746211051941, |
|
"logits/rejected": 1.021051049232483, |
|
"logps/chosen": -266.606689453125, |
|
"logps/rejected": -235.42123413085938, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04704426974058151, |
|
"rewards/margins": 0.05677234008908272, |
|
"rewards/rejected": -0.10381660610437393, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": 0.9243915677070618, |
|
"eval_logits/rejected": 1.018973708152771, |
|
"eval_logps/chosen": -283.05096435546875, |
|
"eval_logps/rejected": -253.97817993164062, |
|
"eval_loss": 0.6911666989326477, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -0.05504164099693298, |
|
"eval_rewards/margins": 0.046581096947193146, |
|
"eval_rewards/rejected": -0.10162272304296494, |
|
"eval_runtime": 539.981, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": 0.9825204610824585, |
|
"logits/rejected": 1.0412126779556274, |
|
"logps/chosen": -299.93157958984375, |
|
"logps/rejected": -247.61044311523438, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04361564666032791, |
|
"rewards/margins": 0.049379851669073105, |
|
"rewards/rejected": -0.09299550205469131, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": 0.9352057576179504, |
|
"logits/rejected": 1.0108729600906372, |
|
"logps/chosen": -261.39959716796875, |
|
"logps/rejected": -262.29864501953125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03935537114739418, |
|
"rewards/margins": 0.043966446071863174, |
|
"rewards/rejected": -0.08332181721925735, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": 0.9064655303955078, |
|
"logits/rejected": 0.9808236956596375, |
|
"logps/chosen": -262.05462646484375, |
|
"logps/rejected": -245.01651000976562, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.042961589992046356, |
|
"rewards/margins": 0.04068412259221077, |
|
"rewards/rejected": -0.08364571630954742, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": 0.9776175618171692, |
|
"logits/rejected": 0.9820224642753601, |
|
"logps/chosen": -282.891357421875, |
|
"logps/rejected": -266.565673828125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.034526508301496506, |
|
"rewards/margins": 0.04868306592106819, |
|
"rewards/rejected": -0.0832095816731453, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": 0.9672806859016418, |
|
"logits/rejected": 1.0234858989715576, |
|
"logps/chosen": -281.4849548339844, |
|
"logps/rejected": -285.44293212890625, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.032487623393535614, |
|
"rewards/margins": 0.037405431270599365, |
|
"rewards/rejected": -0.06989306956529617, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": 0.9714674949645996, |
|
"logits/rejected": 1.0344150066375732, |
|
"logps/chosen": -274.4117126464844, |
|
"logps/rejected": -232.0000762939453, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.031604982912540436, |
|
"rewards/margins": 0.046002503484487534, |
|
"rewards/rejected": -0.07760748267173767, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": 0.9162635803222656, |
|
"logits/rejected": 1.0360467433929443, |
|
"logps/chosen": -251.49404907226562, |
|
"logps/rejected": -223.4456329345703, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.031815849244594574, |
|
"rewards/margins": 0.059083469212055206, |
|
"rewards/rejected": -0.09089931845664978, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": 0.977371096611023, |
|
"logits/rejected": 0.9612863659858704, |
|
"logps/chosen": -313.4932556152344, |
|
"logps/rejected": -262.97113037109375, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.05341174453496933, |
|
"rewards/margins": 0.05502920225262642, |
|
"rewards/rejected": -0.10844095051288605, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": 0.9038007855415344, |
|
"logits/rejected": 0.8652862310409546, |
|
"logps/chosen": -306.91912841796875, |
|
"logps/rejected": -275.84429931640625, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.041279759258031845, |
|
"rewards/margins": 0.039026908576488495, |
|
"rewards/rejected": -0.08030666410923004, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": 0.9440900683403015, |
|
"logits/rejected": 1.0484023094177246, |
|
"logps/chosen": -281.1288757324219, |
|
"logps/rejected": -216.98269653320312, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.047745514661073685, |
|
"rewards/margins": 0.06614203751087189, |
|
"rewards/rejected": -0.11388754844665527, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 0.9163612127304077, |
|
"eval_logits/rejected": 1.0101075172424316, |
|
"eval_logps/chosen": -283.2487487792969, |
|
"eval_logps/rejected": -254.8289031982422, |
|
"eval_loss": 0.6911502480506897, |
|
"eval_rewards/accuracies": 0.6230000257492065, |
|
"eval_rewards/chosen": -0.057019300758838654, |
|
"eval_rewards/margins": 0.0531105101108551, |
|
"eval_rewards/rejected": -0.11012981832027435, |
|
"eval_runtime": 539.827, |
|
"eval_samples_per_second": 3.705, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": 0.9531642198562622, |
|
"logits/rejected": 0.9094236493110657, |
|
"logps/chosen": -278.7569274902344, |
|
"logps/rejected": -242.00259399414062, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04623215273022652, |
|
"rewards/margins": 0.04663626849651337, |
|
"rewards/rejected": -0.09286841005086899, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": 1.053362488746643, |
|
"logits/rejected": 1.00510573387146, |
|
"logps/chosen": -268.8019714355469, |
|
"logps/rejected": -252.3562774658203, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.047048769891262054, |
|
"rewards/margins": 0.0653461441397667, |
|
"rewards/rejected": -0.11239492893218994, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": 0.9648410677909851, |
|
"logits/rejected": 1.080945611000061, |
|
"logps/chosen": -280.63470458984375, |
|
"logps/rejected": -238.6513214111328, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.029626190662384033, |
|
"rewards/margins": 0.06667675822973251, |
|
"rewards/rejected": -0.09630295634269714, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": 0.9461795687675476, |
|
"logits/rejected": 0.9696234464645386, |
|
"logps/chosen": -280.750244140625, |
|
"logps/rejected": -253.75149536132812, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.02992023155093193, |
|
"rewards/margins": 0.06105700135231018, |
|
"rewards/rejected": -0.09097723662853241, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": 0.9404398798942566, |
|
"logits/rejected": 1.006503701210022, |
|
"logps/chosen": -306.0481262207031, |
|
"logps/rejected": -267.99591064453125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.015811875462532043, |
|
"rewards/margins": 0.061067551374435425, |
|
"rewards/rejected": -0.07687942683696747, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": 0.9584512710571289, |
|
"logits/rejected": 1.0292441844940186, |
|
"logps/chosen": -243.0128173828125, |
|
"logps/rejected": -224.4785614013672, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.021735265851020813, |
|
"rewards/margins": 0.06055579334497452, |
|
"rewards/rejected": -0.08229105174541473, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": 0.9586073160171509, |
|
"logits/rejected": 1.008643388748169, |
|
"logps/chosen": -286.1580505371094, |
|
"logps/rejected": -270.8837585449219, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.023701880127191544, |
|
"rewards/margins": 0.04824981838464737, |
|
"rewards/rejected": -0.07195170223712921, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": 0.9629222750663757, |
|
"logits/rejected": 1.0080350637435913, |
|
"logps/chosen": -238.8936309814453, |
|
"logps/rejected": -228.6975860595703, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.022201109677553177, |
|
"rewards/margins": 0.04362647980451584, |
|
"rewards/rejected": -0.06582758575677872, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": 0.9939113855361938, |
|
"logits/rejected": 1.0935704708099365, |
|
"logps/chosen": -289.91900634765625, |
|
"logps/rejected": -280.5440673828125, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.020857712253928185, |
|
"rewards/margins": 0.0328826978802681, |
|
"rewards/rejected": -0.05374041199684143, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": 1.0020853281021118, |
|
"logits/rejected": 1.0316828489303589, |
|
"logps/chosen": -279.2798767089844, |
|
"logps/rejected": -262.09442138671875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.027787720784544945, |
|
"rewards/margins": 0.048239342868328094, |
|
"rewards/rejected": -0.07602706551551819, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": 0.9400979280471802, |
|
"eval_logits/rejected": 1.0356999635696411, |
|
"eval_logps/chosen": -279.8863830566406, |
|
"eval_logps/rejected": -251.13418579101562, |
|
"eval_loss": 0.6911138296127319, |
|
"eval_rewards/accuracies": 0.6129999756813049, |
|
"eval_rewards/chosen": -0.023395679891109467, |
|
"eval_rewards/margins": 0.04978705570101738, |
|
"eval_rewards/rejected": -0.07318273931741714, |
|
"eval_runtime": 540.0054, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": 0.8979179263114929, |
|
"logits/rejected": 0.9356774091720581, |
|
"logps/chosen": -284.64422607421875, |
|
"logps/rejected": -251.070556640625, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.017777901142835617, |
|
"rewards/margins": 0.06165579706430435, |
|
"rewards/rejected": -0.07943369448184967, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": 0.9831737279891968, |
|
"logits/rejected": 1.103745460510254, |
|
"logps/chosen": -233.90310668945312, |
|
"logps/rejected": -216.5464324951172, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.01819593645632267, |
|
"rewards/margins": 0.0664977878332138, |
|
"rewards/rejected": -0.08469371497631073, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": 0.948932945728302, |
|
"logits/rejected": 1.0110113620758057, |
|
"logps/chosen": -296.0926818847656, |
|
"logps/rejected": -249.8265380859375, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.02898983657360077, |
|
"rewards/margins": 0.047562919557094574, |
|
"rewards/rejected": -0.07655275613069534, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": 0.9645043611526489, |
|
"logits/rejected": 0.9807823896408081, |
|
"logps/chosen": -274.97869873046875, |
|
"logps/rejected": -243.57064819335938, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.024762103334069252, |
|
"rewards/margins": 0.04700572043657303, |
|
"rewards/rejected": -0.07176782190799713, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": 0.9856332540512085, |
|
"logits/rejected": 1.0186711549758911, |
|
"logps/chosen": -286.65740966796875, |
|
"logps/rejected": -285.44451904296875, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.015045429579913616, |
|
"rewards/margins": 0.0600103922188282, |
|
"rewards/rejected": -0.0750558152794838, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": 0.9584442973136902, |
|
"logits/rejected": 1.0773932933807373, |
|
"logps/chosen": -303.4087829589844, |
|
"logps/rejected": -257.34381103515625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.03773612901568413, |
|
"rewards/margins": 0.043736010789871216, |
|
"rewards/rejected": -0.08147214353084564, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": 0.9791196584701538, |
|
"logits/rejected": 1.037467360496521, |
|
"logps/chosen": -256.17120361328125, |
|
"logps/rejected": -225.8929443359375, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.015562976710498333, |
|
"rewards/margins": 0.03945142775774002, |
|
"rewards/rejected": -0.05501440912485123, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": 0.9337407946586609, |
|
"logits/rejected": 1.0309889316558838, |
|
"logps/chosen": -289.6443176269531, |
|
"logps/rejected": -257.22784423828125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.005114209838211536, |
|
"rewards/margins": 0.05096329376101494, |
|
"rewards/rejected": -0.0560775101184845, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": 0.9751637578010559, |
|
"logits/rejected": 1.0514873266220093, |
|
"logps/chosen": -266.8908386230469, |
|
"logps/rejected": -240.3486328125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.006348415277898312, |
|
"rewards/margins": 0.04528724402189255, |
|
"rewards/rejected": -0.05163566395640373, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": 1.0069153308868408, |
|
"logits/rejected": 1.0419646501541138, |
|
"logps/chosen": -287.21966552734375, |
|
"logps/rejected": -246.9211883544922, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.008892977610230446, |
|
"rewards/margins": 0.038406871259212494, |
|
"rewards/rejected": -0.04729985073208809, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": 0.9341767430305481, |
|
"eval_logits/rejected": 1.0311079025268555, |
|
"eval_logps/chosen": -279.1179504394531, |
|
"eval_logps/rejected": -250.15402221679688, |
|
"eval_loss": 0.6911120414733887, |
|
"eval_rewards/accuracies": 0.6294999718666077, |
|
"eval_rewards/chosen": -0.015711043030023575, |
|
"eval_rewards/margins": 0.04766979068517685, |
|
"eval_rewards/rejected": -0.06338082998991013, |
|
"eval_runtime": 540.2273, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": 0.9394723773002625, |
|
"logits/rejected": 0.9873741269111633, |
|
"logps/chosen": -274.50396728515625, |
|
"logps/rejected": -242.7906951904297, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.014722615480422974, |
|
"rewards/margins": 0.039573125541210175, |
|
"rewards/rejected": -0.05429573729634285, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": 0.9101311564445496, |
|
"logits/rejected": 1.0188195705413818, |
|
"logps/chosen": -288.55426025390625, |
|
"logps/rejected": -259.5224304199219, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.029792049899697304, |
|
"rewards/margins": 0.036666542291641235, |
|
"rewards/rejected": -0.06645859032869339, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": 0.9995678663253784, |
|
"logits/rejected": 1.0337575674057007, |
|
"logps/chosen": -249.91671752929688, |
|
"logps/rejected": -249.8981475830078, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0360335037112236, |
|
"rewards/margins": 0.040712870657444, |
|
"rewards/rejected": -0.076746366918087, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": 0.9858711361885071, |
|
"logits/rejected": 1.0380220413208008, |
|
"logps/chosen": -242.677490234375, |
|
"logps/rejected": -215.031494140625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04031269997358322, |
|
"rewards/margins": 0.04829539731144905, |
|
"rewards/rejected": -0.08860810101032257, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": 0.9379655122756958, |
|
"logits/rejected": 1.0792181491851807, |
|
"logps/chosen": -273.37957763671875, |
|
"logps/rejected": -246.2140350341797, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.047497231513261795, |
|
"rewards/margins": 0.0587293803691864, |
|
"rewards/rejected": -0.1062266081571579, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": 0.916599452495575, |
|
"logits/rejected": 0.9806827306747437, |
|
"logps/chosen": -281.9441833496094, |
|
"logps/rejected": -244.51806640625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.051647938787937164, |
|
"rewards/margins": 0.05136079713702202, |
|
"rewards/rejected": -0.10300873219966888, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": 0.8767641186714172, |
|
"logits/rejected": 0.9864808320999146, |
|
"logps/chosen": -270.2128601074219, |
|
"logps/rejected": -259.9897766113281, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.042821671813726425, |
|
"rewards/margins": 0.05382692068815231, |
|
"rewards/rejected": -0.09664861112833023, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": 0.8953625559806824, |
|
"logits/rejected": 0.9520618319511414, |
|
"logps/chosen": -297.1680603027344, |
|
"logps/rejected": -249.801025390625, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.038018837571144104, |
|
"rewards/margins": 0.06626948714256287, |
|
"rewards/rejected": -0.10428832471370697, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": 0.9500657320022583, |
|
"logits/rejected": 0.9770669937133789, |
|
"logps/chosen": -247.0959930419922, |
|
"logps/rejected": -231.38626098632812, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04545672982931137, |
|
"rewards/margins": 0.05727598816156387, |
|
"rewards/rejected": -0.10273271799087524, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": 0.9221125841140747, |
|
"logits/rejected": 1.0069925785064697, |
|
"logps/chosen": -325.3990783691406, |
|
"logps/rejected": -256.74163818359375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03280999884009361, |
|
"rewards/margins": 0.04576558619737625, |
|
"rewards/rejected": -0.07857557386159897, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": 0.9161292314529419, |
|
"eval_logits/rejected": 1.013729453086853, |
|
"eval_logps/chosen": -282.56494140625, |
|
"eval_logps/rejected": -254.04412841796875, |
|
"eval_loss": 0.6910136938095093, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.05018109828233719, |
|
"eval_rewards/margins": 0.052100956439971924, |
|
"eval_rewards/rejected": -0.10228205472230911, |
|
"eval_runtime": 540.0139, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": 0.9163872003555298, |
|
"logits/rejected": 1.0111920833587646, |
|
"logps/chosen": -263.4795227050781, |
|
"logps/rejected": -241.57888793945312, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.05680041387677193, |
|
"rewards/margins": 0.021093839779496193, |
|
"rewards/rejected": -0.07789425551891327, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": 0.9174981117248535, |
|
"logits/rejected": 0.9849978685379028, |
|
"logps/chosen": -263.1864929199219, |
|
"logps/rejected": -217.2829132080078, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.049250274896621704, |
|
"rewards/margins": 0.0348433181643486, |
|
"rewards/rejected": -0.0840936005115509, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": 1.0062494277954102, |
|
"logits/rejected": 0.9941271543502808, |
|
"logps/chosen": -307.7798767089844, |
|
"logps/rejected": -280.7508239746094, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.05701036378741264, |
|
"rewards/margins": 0.05211354047060013, |
|
"rewards/rejected": -0.10912390798330307, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": 0.926976203918457, |
|
"logits/rejected": 1.0702403783798218, |
|
"logps/chosen": -292.91351318359375, |
|
"logps/rejected": -256.81536865234375, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04857570677995682, |
|
"rewards/margins": 0.041417308151721954, |
|
"rewards/rejected": -0.08999301493167877, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": 0.9064332842826843, |
|
"logits/rejected": 1.022146224975586, |
|
"logps/chosen": -324.94122314453125, |
|
"logps/rejected": -255.82431030273438, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0545024499297142, |
|
"rewards/margins": 0.05221433565020561, |
|
"rewards/rejected": -0.10671677440404892, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": 0.9837790727615356, |
|
"logits/rejected": 1.0159826278686523, |
|
"logps/chosen": -292.13531494140625, |
|
"logps/rejected": -249.9795684814453, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.061875708401203156, |
|
"rewards/margins": 0.05055435746908188, |
|
"rewards/rejected": -0.11243007332086563, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": 0.9538768529891968, |
|
"logits/rejected": 1.0130136013031006, |
|
"logps/chosen": -266.67266845703125, |
|
"logps/rejected": -224.70468139648438, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.052473802119493484, |
|
"rewards/margins": 0.05191361904144287, |
|
"rewards/rejected": -0.10438741743564606, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": 0.9861133694648743, |
|
"logits/rejected": 1.0575048923492432, |
|
"logps/chosen": -270.86236572265625, |
|
"logps/rejected": -278.5152893066406, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04235236719250679, |
|
"rewards/margins": 0.05154258757829666, |
|
"rewards/rejected": -0.09389495849609375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": 0.9495855569839478, |
|
"logits/rejected": 0.9809403419494629, |
|
"logps/chosen": -264.71380615234375, |
|
"logps/rejected": -214.28939819335938, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03805701807141304, |
|
"rewards/margins": 0.04229050129652023, |
|
"rewards/rejected": -0.08034752309322357, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": 0.9524203538894653, |
|
"logits/rejected": 1.011755108833313, |
|
"logps/chosen": -262.1990661621094, |
|
"logps/rejected": -238.88052368164062, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04073198884725571, |
|
"rewards/margins": 0.039406824856996536, |
|
"rewards/rejected": -0.08013881742954254, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.9319766759872437, |
|
"eval_logits/rejected": 1.0314892530441284, |
|
"eval_logps/chosen": -281.04010009765625, |
|
"eval_logps/rejected": -252.43983459472656, |
|
"eval_loss": 0.6909967660903931, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.03493276238441467, |
|
"eval_rewards/margins": 0.051306504756212234, |
|
"eval_rewards/rejected": -0.08623925596475601, |
|
"eval_runtime": 539.9382, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": 0.9360347986221313, |
|
"logits/rejected": 1.0636814832687378, |
|
"logps/chosen": -301.8039245605469, |
|
"logps/rejected": -245.0256805419922, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.02840288355946541, |
|
"rewards/margins": 0.06062694638967514, |
|
"rewards/rejected": -0.08902983367443085, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": 0.9372593760490417, |
|
"logits/rejected": 1.0011626482009888, |
|
"logps/chosen": -253.13583374023438, |
|
"logps/rejected": -247.22116088867188, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.03672366216778755, |
|
"rewards/margins": 0.055417824536561966, |
|
"rewards/rejected": -0.09214149415493011, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": 0.9559675455093384, |
|
"logits/rejected": 1.0077977180480957, |
|
"logps/chosen": -301.99700927734375, |
|
"logps/rejected": -291.1852111816406, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03201790526509285, |
|
"rewards/margins": 0.047422006726264954, |
|
"rewards/rejected": -0.0794399082660675, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": 0.9405361413955688, |
|
"logits/rejected": 0.972905158996582, |
|
"logps/chosen": -277.42578125, |
|
"logps/rejected": -244.67495727539062, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.035732053220272064, |
|
"rewards/margins": 0.041307561099529266, |
|
"rewards/rejected": -0.07703961431980133, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": 0.9268352389335632, |
|
"logits/rejected": 0.9519475102424622, |
|
"logps/chosen": -251.6887664794922, |
|
"logps/rejected": -207.09823608398438, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03763822466135025, |
|
"rewards/margins": 0.05269993096590042, |
|
"rewards/rejected": -0.09033815562725067, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": 0.8857590556144714, |
|
"logits/rejected": 1.0210973024368286, |
|
"logps/chosen": -318.9454650878906, |
|
"logps/rejected": -253.1227569580078, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.041044797748327255, |
|
"rewards/margins": 0.05614280700683594, |
|
"rewards/rejected": -0.0971876010298729, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": 0.9426645040512085, |
|
"logits/rejected": 1.06788969039917, |
|
"logps/chosen": -280.87506103515625, |
|
"logps/rejected": -242.68856811523438, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.04844700172543526, |
|
"rewards/margins": 0.06784703582525253, |
|
"rewards/rejected": -0.11629404127597809, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": 0.8899833559989929, |
|
"logits/rejected": 0.981528639793396, |
|
"logps/chosen": -261.93939208984375, |
|
"logps/rejected": -244.01416015625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.048227887600660324, |
|
"rewards/margins": 0.04917442053556442, |
|
"rewards/rejected": -0.09740231186151505, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": 0.9601287841796875, |
|
"logits/rejected": 0.9461712837219238, |
|
"logps/chosen": -262.20184326171875, |
|
"logps/rejected": -260.2802429199219, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06563547253608704, |
|
"rewards/margins": 0.05082215741276741, |
|
"rewards/rejected": -0.11645762622356415, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": 0.9125478863716125, |
|
"logits/rejected": 1.015061616897583, |
|
"logps/chosen": -299.32171630859375, |
|
"logps/rejected": -256.58953857421875, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04695742204785347, |
|
"rewards/margins": 0.05506708472967148, |
|
"rewards/rejected": -0.10202451795339584, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": 0.9100068211555481, |
|
"eval_logits/rejected": 1.0087957382202148, |
|
"eval_logps/chosen": -282.8432922363281, |
|
"eval_logps/rejected": -254.7029571533203, |
|
"eval_loss": 0.69096839427948, |
|
"eval_rewards/accuracies": 0.6324999928474426, |
|
"eval_rewards/chosen": -0.052964530885219574, |
|
"eval_rewards/margins": 0.05590558797121048, |
|
"eval_rewards/rejected": -0.10887012630701065, |
|
"eval_runtime": 540.5566, |
|
"eval_samples_per_second": 3.7, |
|
"eval_steps_per_second": 0.925, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": 0.944310188293457, |
|
"logits/rejected": 0.9725696444511414, |
|
"logps/chosen": -294.048095703125, |
|
"logps/rejected": -265.73284912109375, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04054544121026993, |
|
"rewards/margins": 0.06353868544101715, |
|
"rewards/rejected": -0.10408411920070648, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": 0.9270051717758179, |
|
"logits/rejected": 0.9650331735610962, |
|
"logps/chosen": -327.187744140625, |
|
"logps/rejected": -308.1183166503906, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.045968521386384964, |
|
"rewards/margins": 0.062069911509752274, |
|
"rewards/rejected": -0.10803844034671783, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": 0.9014407992362976, |
|
"logits/rejected": 0.9876381754875183, |
|
"logps/chosen": -266.403564453125, |
|
"logps/rejected": -229.73080444335938, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04243239760398865, |
|
"rewards/margins": 0.06432008743286133, |
|
"rewards/rejected": -0.10675249993801117, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": 1.0035514831542969, |
|
"logits/rejected": 0.9860905408859253, |
|
"logps/chosen": -317.1201171875, |
|
"logps/rejected": -265.30499267578125, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.06333385407924652, |
|
"rewards/margins": 0.047281377017498016, |
|
"rewards/rejected": -0.11061523109674454, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": 0.9631811380386353, |
|
"logits/rejected": 0.9473791122436523, |
|
"logps/chosen": -288.9410095214844, |
|
"logps/rejected": -267.47454833984375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.049429457634687424, |
|
"rewards/margins": 0.06482435762882233, |
|
"rewards/rejected": -0.11425381898880005, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": 0.9454625844955444, |
|
"logits/rejected": 1.029585838317871, |
|
"logps/chosen": -284.25531005859375, |
|
"logps/rejected": -266.0755920410156, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05219440534710884, |
|
"rewards/margins": 0.05088215321302414, |
|
"rewards/rejected": -0.10307655483484268, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": 0.9423101544380188, |
|
"logits/rejected": 1.0101561546325684, |
|
"logps/chosen": -302.5003356933594, |
|
"logps/rejected": -258.0522155761719, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05451619625091553, |
|
"rewards/margins": 0.06200449541211128, |
|
"rewards/rejected": -0.11652068793773651, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": 0.9246463775634766, |
|
"logits/rejected": 1.057897925376892, |
|
"logps/chosen": -307.1044921875, |
|
"logps/rejected": -260.0653076171875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.025903914123773575, |
|
"rewards/margins": 0.06586969643831253, |
|
"rewards/rejected": -0.0917736142873764, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": 0.9973335266113281, |
|
"logits/rejected": 1.0170247554779053, |
|
"logps/chosen": -275.74005126953125, |
|
"logps/rejected": -261.8125915527344, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.047602783888578415, |
|
"rewards/margins": 0.05812246724963188, |
|
"rewards/rejected": -0.1057252511382103, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": 1.0013173818588257, |
|
"logits/rejected": 0.9759401082992554, |
|
"logps/chosen": -283.0948791503906, |
|
"logps/rejected": -236.4706268310547, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04060991853475571, |
|
"rewards/margins": 0.06453616917133331, |
|
"rewards/rejected": -0.10514608770608902, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 0.9323698282241821, |
|
"eval_logits/rejected": 1.0313962697982788, |
|
"eval_logps/chosen": -281.6338195800781, |
|
"eval_logps/rejected": -253.65231323242188, |
|
"eval_loss": 0.6909690499305725, |
|
"eval_rewards/accuracies": 0.6225000023841858, |
|
"eval_rewards/chosen": -0.0408700592815876, |
|
"eval_rewards/margins": 0.057493917644023895, |
|
"eval_rewards/rejected": -0.0983639732003212, |
|
"eval_runtime": 539.9632, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": 0.9391202926635742, |
|
"logits/rejected": 1.0263153314590454, |
|
"logps/chosen": -284.8543395996094, |
|
"logps/rejected": -277.1647644042969, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03517382591962814, |
|
"rewards/margins": 0.05179664492607117, |
|
"rewards/rejected": -0.08697047084569931, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": 0.9998887777328491, |
|
"logits/rejected": 1.085648536682129, |
|
"logps/chosen": -282.2349548339844, |
|
"logps/rejected": -252.2509765625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.029559914022684097, |
|
"rewards/margins": 0.06976927816867828, |
|
"rewards/rejected": -0.09932918846607208, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": 0.9946446418762207, |
|
"logits/rejected": 0.9664584398269653, |
|
"logps/chosen": -258.7174072265625, |
|
"logps/rejected": -240.670654296875, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.02138775773346424, |
|
"rewards/margins": 0.07551835477352142, |
|
"rewards/rejected": -0.09690611809492111, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": 0.9557411074638367, |
|
"logits/rejected": 1.0262891054153442, |
|
"logps/chosen": -286.01458740234375, |
|
"logps/rejected": -232.46139526367188, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.01390999834984541, |
|
"rewards/margins": 0.07762787491083145, |
|
"rewards/rejected": -0.09153787791728973, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": 0.9258459806442261, |
|
"logits/rejected": 1.0731465816497803, |
|
"logps/chosen": -286.8509826660156, |
|
"logps/rejected": -233.0972137451172, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.032274287194013596, |
|
"rewards/margins": 0.0481577143073082, |
|
"rewards/rejected": -0.0804319903254509, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": 0.9777040481567383, |
|
"logits/rejected": 1.0403920412063599, |
|
"logps/chosen": -288.07568359375, |
|
"logps/rejected": -264.2755432128906, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.023307165130972862, |
|
"rewards/margins": 0.048550479114055634, |
|
"rewards/rejected": -0.07185763865709305, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": 0.9937931895256042, |
|
"logits/rejected": 1.0553154945373535, |
|
"logps/chosen": -299.058837890625, |
|
"logps/rejected": -248.8879852294922, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.036436520516872406, |
|
"rewards/margins": 0.07167023420333862, |
|
"rewards/rejected": -0.10810675472021103, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": 0.9339237213134766, |
|
"logits/rejected": 1.0194135904312134, |
|
"logps/chosen": -279.79034423828125, |
|
"logps/rejected": -242.7287139892578, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.015874430537223816, |
|
"rewards/margins": 0.0718928873538971, |
|
"rewards/rejected": -0.08776732534170151, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": 0.9355725049972534, |
|
"logits/rejected": 0.9969936609268188, |
|
"logps/chosen": -294.20538330078125, |
|
"logps/rejected": -244.11160278320312, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.026097718626260757, |
|
"rewards/margins": 0.05047178268432617, |
|
"rewards/rejected": -0.07656950503587723, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": 1.0147713422775269, |
|
"logits/rejected": 0.9884480237960815, |
|
"logps/chosen": -278.02313232421875, |
|
"logps/rejected": -251.70596313476562, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.02444344013929367, |
|
"rewards/margins": 0.054150182753801346, |
|
"rewards/rejected": -0.07859362661838531, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": 0.9226279258728027, |
|
"eval_logits/rejected": 1.0212137699127197, |
|
"eval_logps/chosen": -280.8078308105469, |
|
"eval_logps/rejected": -252.7657470703125, |
|
"eval_loss": 0.6909632682800293, |
|
"eval_rewards/accuracies": 0.6215000152587891, |
|
"eval_rewards/chosen": -0.03261038661003113, |
|
"eval_rewards/margins": 0.05688786879181862, |
|
"eval_rewards/rejected": -0.08949825167655945, |
|
"eval_runtime": 539.8982, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": 0.9406150579452515, |
|
"logits/rejected": 1.0022201538085938, |
|
"logps/chosen": -272.09759521484375, |
|
"logps/rejected": -249.77279663085938, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0340249240398407, |
|
"rewards/margins": 0.041387807577848434, |
|
"rewards/rejected": -0.07541273534297943, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": 0.9434909820556641, |
|
"logits/rejected": 1.056489109992981, |
|
"logps/chosen": -277.2883605957031, |
|
"logps/rejected": -262.8563537597656, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.031123792752623558, |
|
"rewards/margins": 0.06892909109592438, |
|
"rewards/rejected": -0.10005287826061249, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": 1.0239412784576416, |
|
"logits/rejected": 0.993172824382782, |
|
"logps/chosen": -261.37689208984375, |
|
"logps/rejected": -221.52090454101562, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.042845502495765686, |
|
"rewards/margins": 0.056726813316345215, |
|
"rewards/rejected": -0.0995723158121109, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": 0.9209731817245483, |
|
"logits/rejected": 1.0005519390106201, |
|
"logps/chosen": -281.11822509765625, |
|
"logps/rejected": -259.76007080078125, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.028859639540314674, |
|
"rewards/margins": 0.047795943915843964, |
|
"rewards/rejected": -0.07665558159351349, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": 0.958030104637146, |
|
"logits/rejected": 1.0400612354278564, |
|
"logps/chosen": -250.44058227539062, |
|
"logps/rejected": -242.2317352294922, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0336209237575531, |
|
"rewards/margins": 0.05756276845932007, |
|
"rewards/rejected": -0.09118369966745377, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": 0.9849356412887573, |
|
"logits/rejected": 0.9542206525802612, |
|
"logps/chosen": -209.1316375732422, |
|
"logps/rejected": -223.1927947998047, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.030019324272871017, |
|
"rewards/margins": 0.057300496846437454, |
|
"rewards/rejected": -0.08731982111930847, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": 0.8806187510490417, |
|
"logits/rejected": 0.982339084148407, |
|
"logps/chosen": -288.1579895019531, |
|
"logps/rejected": -268.0417785644531, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03183884546160698, |
|
"rewards/margins": 0.042079776525497437, |
|
"rewards/rejected": -0.07391862571239471, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": 0.9701333045959473, |
|
"logits/rejected": 1.0392358303070068, |
|
"logps/chosen": -274.5201110839844, |
|
"logps/rejected": -255.364501953125, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03297718986868858, |
|
"rewards/margins": 0.0656973272562027, |
|
"rewards/rejected": -0.09867450594902039, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": 0.9770027995109558, |
|
"logits/rejected": 0.9862260818481445, |
|
"logps/chosen": -272.98602294921875, |
|
"logps/rejected": -268.11175537109375, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.037066780030727386, |
|
"rewards/margins": 0.03600457310676575, |
|
"rewards/rejected": -0.07307135313749313, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": 0.9106483459472656, |
|
"logits/rejected": 1.0019176006317139, |
|
"logps/chosen": -254.740234375, |
|
"logps/rejected": -229.75634765625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.026663145050406456, |
|
"rewards/margins": 0.03192012384533882, |
|
"rewards/rejected": -0.05858327075839043, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.9258546829223633, |
|
"eval_logits/rejected": 1.025207757949829, |
|
"eval_logps/chosen": -279.9319763183594, |
|
"eval_logps/rejected": -251.49105834960938, |
|
"eval_loss": 0.6909723281860352, |
|
"eval_rewards/accuracies": 0.6274999976158142, |
|
"eval_rewards/chosen": -0.02385157160460949, |
|
"eval_rewards/margins": 0.05289952829480171, |
|
"eval_rewards/rejected": -0.07675110548734665, |
|
"eval_runtime": 540.2889, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.925, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": 0.9473625421524048, |
|
"logits/rejected": 1.0636024475097656, |
|
"logps/chosen": -250.43350219726562, |
|
"logps/rejected": -213.73043823242188, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.016012001782655716, |
|
"rewards/margins": 0.0778764933347702, |
|
"rewards/rejected": -0.09388849884271622, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": 1.0584747791290283, |
|
"logits/rejected": 1.0787056684494019, |
|
"logps/chosen": -277.7005615234375, |
|
"logps/rejected": -263.6285400390625, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.028144020587205887, |
|
"rewards/margins": 0.07585910707712173, |
|
"rewards/rejected": -0.10400311648845673, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": 0.9738826751708984, |
|
"logits/rejected": 1.0668303966522217, |
|
"logps/chosen": -245.468994140625, |
|
"logps/rejected": -252.04458618164062, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.026749875396490097, |
|
"rewards/margins": 0.06751126796007156, |
|
"rewards/rejected": -0.09426114708185196, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": 0.9280556440353394, |
|
"logits/rejected": 1.0651742219924927, |
|
"logps/chosen": -304.1370849609375, |
|
"logps/rejected": -259.7930603027344, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03546469286084175, |
|
"rewards/margins": 0.06755216419696808, |
|
"rewards/rejected": -0.10301685333251953, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": 0.908758282661438, |
|
"logits/rejected": 1.0054810047149658, |
|
"logps/chosen": -289.89154052734375, |
|
"logps/rejected": -250.53231811523438, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.028398990631103516, |
|
"rewards/margins": 0.06537959724664688, |
|
"rewards/rejected": -0.093778595328331, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": 1.0136375427246094, |
|
"logits/rejected": 1.071046233177185, |
|
"logps/chosen": -227.84829711914062, |
|
"logps/rejected": -231.29788208007812, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.025034338235855103, |
|
"rewards/margins": 0.06069794297218323, |
|
"rewards/rejected": -0.08573228865861893, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": 0.9761725664138794, |
|
"logits/rejected": 1.0631458759307861, |
|
"logps/chosen": -275.0975341796875, |
|
"logps/rejected": -230.28170776367188, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03186946362257004, |
|
"rewards/margins": 0.056202489882707596, |
|
"rewards/rejected": -0.08807194232940674, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": 0.9701007604598999, |
|
"logits/rejected": 1.0082242488861084, |
|
"logps/chosen": -231.7493133544922, |
|
"logps/rejected": -252.67343139648438, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.02439451590180397, |
|
"rewards/margins": 0.04928570240736008, |
|
"rewards/rejected": -0.07368021458387375, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": 0.9784774780273438, |
|
"logits/rejected": 1.0191878080368042, |
|
"logps/chosen": -283.6234130859375, |
|
"logps/rejected": -256.2926940917969, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.03525594249367714, |
|
"rewards/margins": 0.05471722036600113, |
|
"rewards/rejected": -0.08997315913438797, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": 0.9242954254150391, |
|
"logits/rejected": 0.9784539937973022, |
|
"logps/chosen": -265.85198974609375, |
|
"logps/rejected": -248.41354370117188, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03961525112390518, |
|
"rewards/margins": 0.04367566108703613, |
|
"rewards/rejected": -0.08329091221094131, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": 0.9476714730262756, |
|
"eval_logits/rejected": 1.0475825071334839, |
|
"eval_logps/chosen": -281.3605651855469, |
|
"eval_logps/rejected": -253.0793914794922, |
|
"eval_loss": 0.690941333770752, |
|
"eval_rewards/accuracies": 0.6345000267028809, |
|
"eval_rewards/chosen": -0.03813740611076355, |
|
"eval_rewards/margins": 0.054497238248586655, |
|
"eval_rewards/rejected": -0.0926346406340599, |
|
"eval_runtime": 539.933, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": 0.9688129425048828, |
|
"logits/rejected": 1.0826255083084106, |
|
"logps/chosen": -259.4082336425781, |
|
"logps/rejected": -252.29434204101562, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03624454885721207, |
|
"rewards/margins": 0.06441988795995712, |
|
"rewards/rejected": -0.10066443681716919, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": 0.9535354375839233, |
|
"logits/rejected": 0.9888173937797546, |
|
"logps/chosen": -308.61053466796875, |
|
"logps/rejected": -290.3708801269531, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.033887576311826706, |
|
"rewards/margins": 0.038659535348415375, |
|
"rewards/rejected": -0.07254711538553238, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": 0.9049111604690552, |
|
"logits/rejected": 1.0025997161865234, |
|
"logps/chosen": -300.1152648925781, |
|
"logps/rejected": -246.1627197265625, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03963657096028328, |
|
"rewards/margins": 0.07257360219955444, |
|
"rewards/rejected": -0.11221016943454742, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": 0.985696017742157, |
|
"logits/rejected": 0.9788693189620972, |
|
"logps/chosen": -273.555419921875, |
|
"logps/rejected": -260.4004211425781, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04467379301786423, |
|
"rewards/margins": 0.04357798025012016, |
|
"rewards/rejected": -0.08825178444385529, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": 0.989804744720459, |
|
"logits/rejected": 1.0961401462554932, |
|
"logps/chosen": -260.736328125, |
|
"logps/rejected": -231.63491821289062, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.051245905458927155, |
|
"rewards/margins": 0.05609399080276489, |
|
"rewards/rejected": -0.10733989626169205, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": 1.0626736879348755, |
|
"logits/rejected": 1.1053330898284912, |
|
"logps/chosen": -243.4201202392578, |
|
"logps/rejected": -225.9866180419922, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.052421581000089645, |
|
"rewards/margins": 0.037004150450229645, |
|
"rewards/rejected": -0.08942572772502899, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": 0.9754394292831421, |
|
"logits/rejected": 0.9671268463134766, |
|
"logps/chosen": -273.8443603515625, |
|
"logps/rejected": -249.9600372314453, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.043098267167806625, |
|
"rewards/margins": 0.04836183041334152, |
|
"rewards/rejected": -0.09146009385585785, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": 1.0004395246505737, |
|
"logits/rejected": 1.0548702478408813, |
|
"logps/chosen": -258.23388671875, |
|
"logps/rejected": -254.6455841064453, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.042513586580753326, |
|
"rewards/margins": 0.06627936661243439, |
|
"rewards/rejected": -0.10879294574260712, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": 0.932357907295227, |
|
"logits/rejected": 1.1255356073379517, |
|
"logps/chosen": -272.53533935546875, |
|
"logps/rejected": -232.40127563476562, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.040888115763664246, |
|
"rewards/margins": 0.07338190823793411, |
|
"rewards/rejected": -0.11427001655101776, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": 0.9959739446640015, |
|
"logits/rejected": 0.9297063946723938, |
|
"logps/chosen": -290.0731201171875, |
|
"logps/rejected": -259.72259521484375, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.048611197620630264, |
|
"rewards/margins": 0.04705143719911575, |
|
"rewards/rejected": -0.09566263109445572, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": 0.9399436116218567, |
|
"eval_logits/rejected": 1.0406934022903442, |
|
"eval_logps/chosen": -281.7611389160156, |
|
"eval_logps/rejected": -253.66929626464844, |
|
"eval_loss": 0.690939724445343, |
|
"eval_rewards/accuracies": 0.6324999928474426, |
|
"eval_rewards/chosen": -0.042143091559410095, |
|
"eval_rewards/margins": 0.05639072135090828, |
|
"eval_rewards/rejected": -0.09853381663560867, |
|
"eval_runtime": 539.9748, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": 1.089950680732727, |
|
"logits/rejected": 0.9905519485473633, |
|
"logps/chosen": -274.2284851074219, |
|
"logps/rejected": -254.03823852539062, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03807093948125839, |
|
"rewards/margins": 0.06290793418884277, |
|
"rewards/rejected": -0.10097887367010117, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": 0.9911603927612305, |
|
"logits/rejected": 1.117432951927185, |
|
"logps/chosen": -274.63616943359375, |
|
"logps/rejected": -236.0727996826172, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.029746342450380325, |
|
"rewards/margins": 0.05460263043642044, |
|
"rewards/rejected": -0.08434897661209106, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": 0.9844030141830444, |
|
"logits/rejected": 1.0057882070541382, |
|
"logps/chosen": -267.25860595703125, |
|
"logps/rejected": -221.1670684814453, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02886761724948883, |
|
"rewards/margins": 0.06318513303995132, |
|
"rewards/rejected": -0.09205274283885956, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": 0.9558299779891968, |
|
"logits/rejected": 1.0252538919448853, |
|
"logps/chosen": -292.6085510253906, |
|
"logps/rejected": -274.9212951660156, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.02827531099319458, |
|
"rewards/margins": 0.041763827204704285, |
|
"rewards/rejected": -0.07003913819789886, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": 1.0085655450820923, |
|
"logits/rejected": 1.0989354848861694, |
|
"logps/chosen": -255.2216796875, |
|
"logps/rejected": -226.0516815185547, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.036384742707014084, |
|
"rewards/margins": 0.05588601902127266, |
|
"rewards/rejected": -0.09227076172828674, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": 0.9903377294540405, |
|
"logits/rejected": 1.0552234649658203, |
|
"logps/chosen": -270.6020202636719, |
|
"logps/rejected": -208.5309295654297, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04310330003499985, |
|
"rewards/margins": 0.04129331558942795, |
|
"rewards/rejected": -0.0843966156244278, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": 0.9834343791007996, |
|
"logits/rejected": 1.0014784336090088, |
|
"logps/chosen": -259.8085632324219, |
|
"logps/rejected": -261.5582580566406, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.023330455645918846, |
|
"rewards/margins": 0.04660937935113907, |
|
"rewards/rejected": -0.06993982940912247, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": 0.9085767865180969, |
|
"logits/rejected": 1.0599794387817383, |
|
"logps/chosen": -264.39208984375, |
|
"logps/rejected": -222.37948608398438, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03771457076072693, |
|
"rewards/margins": 0.04855852574110031, |
|
"rewards/rejected": -0.08627309650182724, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": 0.9768549799919128, |
|
"logits/rejected": 1.0272681713104248, |
|
"logps/chosen": -250.63037109375, |
|
"logps/rejected": -270.17938232421875, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03126218914985657, |
|
"rewards/margins": 0.057791270315647125, |
|
"rewards/rejected": -0.08905345946550369, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": 0.9690292477607727, |
|
"logits/rejected": 0.9458838701248169, |
|
"logps/chosen": -250.7799072265625, |
|
"logps/rejected": -240.507568359375, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.026477226987481117, |
|
"rewards/margins": 0.053064655512571335, |
|
"rewards/rejected": -0.079541876912117, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": 0.9399006366729736, |
|
"eval_logits/rejected": 1.0408267974853516, |
|
"eval_logps/chosen": -280.7285461425781, |
|
"eval_logps/rejected": -252.42715454101562, |
|
"eval_loss": 0.6909221410751343, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.03181701526045799, |
|
"eval_rewards/margins": 0.05429535731673241, |
|
"eval_rewards/rejected": -0.0861123651266098, |
|
"eval_runtime": 540.0931, |
|
"eval_samples_per_second": 3.703, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": 0.9707075357437134, |
|
"logits/rejected": 1.1204993724822998, |
|
"logps/chosen": -285.9052734375, |
|
"logps/rejected": -265.517333984375, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.02710430696606636, |
|
"rewards/margins": 0.0629369467496872, |
|
"rewards/rejected": -0.09004124999046326, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": 0.9497090578079224, |
|
"logits/rejected": 1.0253454446792603, |
|
"logps/chosen": -259.4183044433594, |
|
"logps/rejected": -253.4176483154297, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03496091067790985, |
|
"rewards/margins": 0.045159030705690384, |
|
"rewards/rejected": -0.08011993765830994, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": 0.9671627283096313, |
|
"logits/rejected": 1.0244706869125366, |
|
"logps/chosen": -316.0784912109375, |
|
"logps/rejected": -272.86224365234375, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03136713430285454, |
|
"rewards/margins": 0.05638208985328674, |
|
"rewards/rejected": -0.08774922788143158, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": 1.038586974143982, |
|
"logits/rejected": 1.0072470903396606, |
|
"logps/chosen": -250.32870483398438, |
|
"logps/rejected": -251.5189971923828, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04340856149792671, |
|
"rewards/margins": 0.0459757074713707, |
|
"rewards/rejected": -0.08938425779342651, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": 0.9293440580368042, |
|
"logits/rejected": 1.0626415014266968, |
|
"logps/chosen": -260.1195373535156, |
|
"logps/rejected": -246.67117309570312, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03792440518736839, |
|
"rewards/margins": 0.0486772395670414, |
|
"rewards/rejected": -0.08660164475440979, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": 0.9554189443588257, |
|
"logits/rejected": 1.0219160318374634, |
|
"logps/chosen": -227.74856567382812, |
|
"logps/rejected": -232.154296875, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03027189150452614, |
|
"rewards/margins": 0.05248479172587395, |
|
"rewards/rejected": -0.08275668323040009, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": 0.9694328308105469, |
|
"logits/rejected": 0.998115062713623, |
|
"logps/chosen": -242.61392211914062, |
|
"logps/rejected": -225.9537811279297, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.01270161010324955, |
|
"rewards/margins": 0.06611243635416031, |
|
"rewards/rejected": -0.07881404459476471, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": 0.939893901348114, |
|
"logits/rejected": 1.0379140377044678, |
|
"logps/chosen": -271.5564270019531, |
|
"logps/rejected": -256.105712890625, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024293258786201477, |
|
"rewards/margins": 0.05607147887349129, |
|
"rewards/rejected": -0.08036474138498306, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": 0.9116802215576172, |
|
"logits/rejected": 1.035742998123169, |
|
"logps/chosen": -255.07760620117188, |
|
"logps/rejected": -239.1924285888672, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.019214514642953873, |
|
"rewards/margins": 0.05538605526089668, |
|
"rewards/rejected": -0.07460056990385056, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": 0.926386833190918, |
|
"logits/rejected": 0.9954707026481628, |
|
"logps/chosen": -254.49209594726562, |
|
"logps/rejected": -233.5257568359375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.027828719466924667, |
|
"rewards/margins": 0.059567712247371674, |
|
"rewards/rejected": -0.08739643543958664, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.9198330044746399, |
|
"eval_logits/rejected": 1.021889328956604, |
|
"eval_logps/chosen": -280.510009765625, |
|
"eval_logps/rejected": -252.31210327148438, |
|
"eval_loss": 0.6909388303756714, |
|
"eval_rewards/accuracies": 0.6359999775886536, |
|
"eval_rewards/chosen": -0.029631877318024635, |
|
"eval_rewards/margins": 0.05532996356487274, |
|
"eval_rewards/rejected": -0.08496184647083282, |
|
"eval_runtime": 539.9384, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": 0.8990974426269531, |
|
"logits/rejected": 1.0346615314483643, |
|
"logps/chosen": -256.46343994140625, |
|
"logps/rejected": -225.152099609375, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029439201578497887, |
|
"rewards/margins": 0.060266874730587006, |
|
"rewards/rejected": -0.08970607817173004, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": 0.9923946261405945, |
|
"logits/rejected": 1.0339380502700806, |
|
"logps/chosen": -273.4414367675781, |
|
"logps/rejected": -264.6556091308594, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.029256407171487808, |
|
"rewards/margins": 0.07676726579666138, |
|
"rewards/rejected": -0.10602366924285889, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": 0.9024986028671265, |
|
"logits/rejected": 1.0799829959869385, |
|
"logps/chosen": -260.97979736328125, |
|
"logps/rejected": -240.7161102294922, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.025985723361372948, |
|
"rewards/margins": 0.06577527523040771, |
|
"rewards/rejected": -0.09176099300384521, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": 0.9433887600898743, |
|
"logits/rejected": 1.0706889629364014, |
|
"logps/chosen": -278.13543701171875, |
|
"logps/rejected": -270.07415771484375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.01627534069120884, |
|
"rewards/margins": 0.07549260556697845, |
|
"rewards/rejected": -0.09176793694496155, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": 0.9400017857551575, |
|
"logits/rejected": 0.9848964810371399, |
|
"logps/chosen": -277.2012634277344, |
|
"logps/rejected": -248.524169921875, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.023953277617692947, |
|
"rewards/margins": 0.05445709824562073, |
|
"rewards/rejected": -0.07841037213802338, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": 0.9121321439743042, |
|
"logits/rejected": 0.9821128845214844, |
|
"logps/chosen": -280.4564208984375, |
|
"logps/rejected": -272.4418640136719, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.022658145055174828, |
|
"rewards/margins": 0.058780424296855927, |
|
"rewards/rejected": -0.0814385712146759, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": 0.9408855438232422, |
|
"logits/rejected": 1.041475534439087, |
|
"logps/chosen": -295.4039306640625, |
|
"logps/rejected": -253.6959686279297, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.02255399525165558, |
|
"rewards/margins": 0.0683935284614563, |
|
"rewards/rejected": -0.09094752371311188, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": 0.9445545077323914, |
|
"logits/rejected": 0.9898989796638489, |
|
"logps/chosen": -257.03387451171875, |
|
"logps/rejected": -232.8082275390625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.022778628394007683, |
|
"rewards/margins": 0.05287964269518852, |
|
"rewards/rejected": -0.07565827667713165, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": 0.8453947901725769, |
|
"logits/rejected": 1.0863592624664307, |
|
"logps/chosen": -268.630615234375, |
|
"logps/rejected": -246.2313690185547, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.030399400740861893, |
|
"rewards/margins": 0.0645337849855423, |
|
"rewards/rejected": -0.09493318945169449, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": 0.9887116551399231, |
|
"logits/rejected": 1.091596007347107, |
|
"logps/chosen": -232.3385772705078, |
|
"logps/rejected": -221.32968139648438, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03866693750023842, |
|
"rewards/margins": 0.05171254277229309, |
|
"rewards/rejected": -0.09037948399782181, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": 0.9195509552955627, |
|
"eval_logits/rejected": 1.0213452577590942, |
|
"eval_logps/chosen": -281.2753601074219, |
|
"eval_logps/rejected": -253.40109252929688, |
|
"eval_loss": 0.6909087896347046, |
|
"eval_rewards/accuracies": 0.6330000162124634, |
|
"eval_rewards/chosen": -0.037285856902599335, |
|
"eval_rewards/margins": 0.05856594070792198, |
|
"eval_rewards/rejected": -0.09585181623697281, |
|
"eval_runtime": 539.985, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": 1.0217519998550415, |
|
"logits/rejected": 1.1035264730453491, |
|
"logps/chosen": -280.2012939453125, |
|
"logps/rejected": -262.41033935546875, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03267248719930649, |
|
"rewards/margins": 0.07917577773332596, |
|
"rewards/rejected": -0.11184825003147125, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": 0.9456014633178711, |
|
"logits/rejected": 1.0302622318267822, |
|
"logps/chosen": -300.8179931640625, |
|
"logps/rejected": -262.069580078125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03674257546663284, |
|
"rewards/margins": 0.056660883128643036, |
|
"rewards/rejected": -0.09340345859527588, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": 0.9554840326309204, |
|
"logits/rejected": 0.9924384951591492, |
|
"logps/chosen": -268.2906188964844, |
|
"logps/rejected": -242.49649047851562, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.032142072916030884, |
|
"rewards/margins": 0.056136567145586014, |
|
"rewards/rejected": -0.0882786363363266, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": 0.9138981103897095, |
|
"logits/rejected": 0.9927932620048523, |
|
"logps/chosen": -269.4898986816406, |
|
"logps/rejected": -257.8581848144531, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.026234816759824753, |
|
"rewards/margins": 0.050769805908203125, |
|
"rewards/rejected": -0.07700462639331818, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": 0.8760209083557129, |
|
"logits/rejected": 0.9947643280029297, |
|
"logps/chosen": -277.1868591308594, |
|
"logps/rejected": -236.7860870361328, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.045319218188524246, |
|
"rewards/margins": 0.06617378443479538, |
|
"rewards/rejected": -0.11149300634860992, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": 1.0274193286895752, |
|
"logits/rejected": 1.0255438089370728, |
|
"logps/chosen": -241.68191528320312, |
|
"logps/rejected": -215.37057495117188, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.046610768884420395, |
|
"rewards/margins": 0.05322499945759773, |
|
"rewards/rejected": -0.09983576834201813, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": 0.9781502485275269, |
|
"logits/rejected": 1.0266082286834717, |
|
"logps/chosen": -291.5887756347656, |
|
"logps/rejected": -256.538818359375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.028579484671354294, |
|
"rewards/margins": 0.07505873590707779, |
|
"rewards/rejected": -0.10363821685314178, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": 0.9894530177116394, |
|
"logits/rejected": 1.038171648979187, |
|
"logps/chosen": -308.27691650390625, |
|
"logps/rejected": -225.60122680664062, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03209725767374039, |
|
"rewards/margins": 0.06519778817892075, |
|
"rewards/rejected": -0.09729506075382233, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": 0.9771683812141418, |
|
"logits/rejected": 1.058801531791687, |
|
"logps/chosen": -268.15179443359375, |
|
"logps/rejected": -273.706298828125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05141326040029526, |
|
"rewards/margins": 0.058643460273742676, |
|
"rewards/rejected": -0.11005672067403793, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": 1.0130704641342163, |
|
"logits/rejected": 0.9349973797798157, |
|
"logps/chosen": -273.2847900390625, |
|
"logps/rejected": -240.21829223632812, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0360480472445488, |
|
"rewards/margins": 0.05611228942871094, |
|
"rewards/rejected": -0.09216034412384033, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": 0.9161260724067688, |
|
"eval_logits/rejected": 1.017266035079956, |
|
"eval_logps/chosen": -281.78839111328125, |
|
"eval_logps/rejected": -254.0473175048828, |
|
"eval_loss": 0.6909086108207703, |
|
"eval_rewards/accuracies": 0.6294999718666077, |
|
"eval_rewards/chosen": -0.042415801435709, |
|
"eval_rewards/margins": 0.05989806354045868, |
|
"eval_rewards/rejected": -0.10231386125087738, |
|
"eval_runtime": 540.1872, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": 0.932266354560852, |
|
"logits/rejected": 0.9696345329284668, |
|
"logps/chosen": -259.15472412109375, |
|
"logps/rejected": -220.0342559814453, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.02964337170124054, |
|
"rewards/margins": 0.05693807080388069, |
|
"rewards/rejected": -0.08658144623041153, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": 0.9445293545722961, |
|
"logits/rejected": 0.9707993268966675, |
|
"logps/chosen": -260.50518798828125, |
|
"logps/rejected": -241.0626983642578, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.038475148379802704, |
|
"rewards/margins": 0.05124124884605408, |
|
"rewards/rejected": -0.08971639722585678, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": 0.9341602325439453, |
|
"logits/rejected": 0.9812172055244446, |
|
"logps/chosen": -291.263671875, |
|
"logps/rejected": -256.05816650390625, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03991129249334335, |
|
"rewards/margins": 0.06936169415712357, |
|
"rewards/rejected": -0.10927299410104752, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": 0.9518648386001587, |
|
"logits/rejected": 1.0701024532318115, |
|
"logps/chosen": -322.59869384765625, |
|
"logps/rejected": -246.1614532470703, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.032660938799381256, |
|
"rewards/margins": 0.045336611568927765, |
|
"rewards/rejected": -0.07799754291772842, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": 0.9637743234634399, |
|
"logits/rejected": 0.9643661379814148, |
|
"logps/chosen": -299.9573669433594, |
|
"logps/rejected": -267.3519592285156, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03847068175673485, |
|
"rewards/margins": 0.05295687913894653, |
|
"rewards/rejected": -0.09142756462097168, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": 0.9999778866767883, |
|
"logits/rejected": 1.0599424839019775, |
|
"logps/chosen": -291.417724609375, |
|
"logps/rejected": -258.858642578125, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04292257875204086, |
|
"rewards/margins": 0.045674268156290054, |
|
"rewards/rejected": -0.08859684318304062, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": 0.9801284670829773, |
|
"logits/rejected": 1.0555846691131592, |
|
"logps/chosen": -286.49017333984375, |
|
"logps/rejected": -262.8829650878906, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.02869715727865696, |
|
"rewards/margins": 0.07526004314422607, |
|
"rewards/rejected": -0.10395719856023788, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": 0.9294122457504272, |
|
"logits/rejected": 1.0954596996307373, |
|
"logps/chosen": -264.6773986816406, |
|
"logps/rejected": -241.242431640625, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03240332007408142, |
|
"rewards/margins": 0.04624287039041519, |
|
"rewards/rejected": -0.07864619046449661, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": 0.8595544695854187, |
|
"logits/rejected": 1.0383957624435425, |
|
"logps/chosen": -296.6278076171875, |
|
"logps/rejected": -270.6478576660156, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.030149826779961586, |
|
"rewards/margins": 0.045799605548381805, |
|
"rewards/rejected": -0.07594943791627884, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": 0.9707738757133484, |
|
"logits/rejected": 0.9931126832962036, |
|
"logps/chosen": -281.4847412109375, |
|
"logps/rejected": -259.38934326171875, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.023116273805499077, |
|
"rewards/margins": 0.05793018266558647, |
|
"rewards/rejected": -0.0810464546084404, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": 0.9119435548782349, |
|
"eval_logits/rejected": 1.0139191150665283, |
|
"eval_logps/chosen": -281.0736083984375, |
|
"eval_logps/rejected": -253.19638061523438, |
|
"eval_loss": 0.6908898949623108, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.03526770696043968, |
|
"eval_rewards/margins": 0.05853661522269249, |
|
"eval_rewards/rejected": -0.09380432963371277, |
|
"eval_runtime": 540.0121, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": 0.9695969820022583, |
|
"logits/rejected": 0.9412325024604797, |
|
"logps/chosen": -277.9027099609375, |
|
"logps/rejected": -233.14767456054688, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029822606593370438, |
|
"rewards/margins": 0.05514361336827278, |
|
"rewards/rejected": -0.08496621251106262, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": 0.9639630317687988, |
|
"logits/rejected": 1.0453459024429321, |
|
"logps/chosen": -261.4854736328125, |
|
"logps/rejected": -243.4965057373047, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.037622541189193726, |
|
"rewards/margins": 0.05851644277572632, |
|
"rewards/rejected": -0.09613899141550064, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": 1.0106990337371826, |
|
"logits/rejected": 0.9288986325263977, |
|
"logps/chosen": -273.11346435546875, |
|
"logps/rejected": -243.2933807373047, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03193587437272072, |
|
"rewards/margins": 0.055615246295928955, |
|
"rewards/rejected": -0.08755112439393997, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": 0.9593019485473633, |
|
"logits/rejected": 0.9824401140213013, |
|
"logps/chosen": -273.22357177734375, |
|
"logps/rejected": -241.69149780273438, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03742024675011635, |
|
"rewards/margins": 0.07019098103046417, |
|
"rewards/rejected": -0.10761122405529022, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": 0.9543115496635437, |
|
"logits/rejected": 0.9525305032730103, |
|
"logps/chosen": -247.79141235351562, |
|
"logps/rejected": -250.76815795898438, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03152688220143318, |
|
"rewards/margins": 0.05581844598054886, |
|
"rewards/rejected": -0.08734532445669174, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": 0.9415119290351868, |
|
"logits/rejected": 1.0495812892913818, |
|
"logps/chosen": -288.77117919921875, |
|
"logps/rejected": -236.2522735595703, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.034685637801885605, |
|
"rewards/margins": 0.07457654178142548, |
|
"rewards/rejected": -0.10926218330860138, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": 0.9325952529907227, |
|
"logits/rejected": 0.9842671155929565, |
|
"logps/chosen": -230.1918487548828, |
|
"logps/rejected": -243.35086059570312, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.04897845536470413, |
|
"rewards/margins": 0.04100048914551735, |
|
"rewards/rejected": -0.08997894823551178, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": 0.8943045735359192, |
|
"logits/rejected": 1.0183279514312744, |
|
"logps/chosen": -272.97314453125, |
|
"logps/rejected": -255.63125610351562, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03204040229320526, |
|
"rewards/margins": 0.0708232969045639, |
|
"rewards/rejected": -0.10286370664834976, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": 0.897107720375061, |
|
"logits/rejected": 0.9336503744125366, |
|
"logps/chosen": -278.9725341796875, |
|
"logps/rejected": -243.04238891601562, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.027060145512223244, |
|
"rewards/margins": 0.05337075516581535, |
|
"rewards/rejected": -0.08043090254068375, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": 0.9530097246170044, |
|
"logits/rejected": 1.0400608777999878, |
|
"logps/chosen": -271.3522033691406, |
|
"logps/rejected": -225.7040557861328, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03952925279736519, |
|
"rewards/margins": 0.05476094409823418, |
|
"rewards/rejected": -0.09429020434617996, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 0.9141185283660889, |
|
"eval_logits/rejected": 1.0163326263427734, |
|
"eval_logps/chosen": -280.8155517578125, |
|
"eval_logps/rejected": -252.75259399414062, |
|
"eval_loss": 0.6908916234970093, |
|
"eval_rewards/accuracies": 0.6305000185966492, |
|
"eval_rewards/chosen": -0.0326874740421772, |
|
"eval_rewards/margins": 0.05667929723858833, |
|
"eval_rewards/rejected": -0.08936676383018494, |
|
"eval_runtime": 540.3199, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.925, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": 0.9315062761306763, |
|
"logits/rejected": 1.0539835691452026, |
|
"logps/chosen": -247.2057342529297, |
|
"logps/rejected": -225.90493774414062, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.02216990664601326, |
|
"rewards/margins": 0.07055743038654327, |
|
"rewards/rejected": -0.09272731840610504, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": 0.9194700121879578, |
|
"logits/rejected": 1.008080244064331, |
|
"logps/chosen": -242.8909454345703, |
|
"logps/rejected": -255.8279266357422, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.033027276396751404, |
|
"rewards/margins": 0.043912582099437714, |
|
"rewards/rejected": -0.07693986594676971, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": 0.95648193359375, |
|
"logits/rejected": 1.0707709789276123, |
|
"logps/chosen": -272.9548645019531, |
|
"logps/rejected": -241.94235229492188, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.030579229816794395, |
|
"rewards/margins": 0.06354820728302002, |
|
"rewards/rejected": -0.09412743896245956, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": 0.9589303135871887, |
|
"logits/rejected": 0.9868467450141907, |
|
"logps/chosen": -219.3035125732422, |
|
"logps/rejected": -192.64849853515625, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0318605974316597, |
|
"rewards/margins": 0.05495452880859375, |
|
"rewards/rejected": -0.08681513369083405, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": 0.9364684820175171, |
|
"logits/rejected": 1.0579065084457397, |
|
"logps/chosen": -272.2196350097656, |
|
"logps/rejected": -280.43341064453125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.020410938188433647, |
|
"rewards/margins": 0.0782955139875412, |
|
"rewards/rejected": -0.0987064465880394, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": 0.9313532114028931, |
|
"logits/rejected": 1.04340398311615, |
|
"logps/chosen": -267.8077087402344, |
|
"logps/rejected": -246.677001953125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.024608857929706573, |
|
"rewards/margins": 0.06871498376131058, |
|
"rewards/rejected": -0.09332384169101715, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": 0.9149921536445618, |
|
"logits/rejected": 1.0074148178100586, |
|
"logps/chosen": -243.07144165039062, |
|
"logps/rejected": -241.79171752929688, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.029673978686332703, |
|
"rewards/margins": 0.0581536665558815, |
|
"rewards/rejected": -0.0878276452422142, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": 0.9447315335273743, |
|
"logits/rejected": 0.9529320001602173, |
|
"logps/chosen": -269.72869873046875, |
|
"logps/rejected": -222.9635467529297, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03748570382595062, |
|
"rewards/margins": 0.05406556650996208, |
|
"rewards/rejected": -0.0915512815117836, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": 1.0064308643341064, |
|
"logits/rejected": 1.0328409671783447, |
|
"logps/chosen": -253.03274536132812, |
|
"logps/rejected": -257.581298828125, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03226761519908905, |
|
"rewards/margins": 0.06126902252435684, |
|
"rewards/rejected": -0.09353663772344589, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": 0.9581842422485352, |
|
"logits/rejected": 1.0024080276489258, |
|
"logps/chosen": -233.0767364501953, |
|
"logps/rejected": -227.9841766357422, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.035527393221855164, |
|
"rewards/margins": 0.056871771812438965, |
|
"rewards/rejected": -0.09239916503429413, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": 0.9098308682441711, |
|
"eval_logits/rejected": 1.0122586488723755, |
|
"eval_logps/chosen": -280.8845520019531, |
|
"eval_logps/rejected": -252.85272216796875, |
|
"eval_loss": 0.6908985376358032, |
|
"eval_rewards/accuracies": 0.6294999718666077, |
|
"eval_rewards/chosen": -0.033377815037965775, |
|
"eval_rewards/margins": 0.0569901280105114, |
|
"eval_rewards/rejected": -0.09036794304847717, |
|
"eval_runtime": 540.038, |
|
"eval_samples_per_second": 3.703, |
|
"eval_steps_per_second": 0.926, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": 0.9396654963493347, |
|
"logits/rejected": 0.9836663007736206, |
|
"logps/chosen": -286.25970458984375, |
|
"logps/rejected": -269.1986083984375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03581177443265915, |
|
"rewards/margins": 0.04660804197192192, |
|
"rewards/rejected": -0.08241982758045197, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": 0.9897071123123169, |
|
"logits/rejected": 1.0295822620391846, |
|
"logps/chosen": -272.0080871582031, |
|
"logps/rejected": -232.94650268554688, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.035867296159267426, |
|
"rewards/margins": 0.05984731763601303, |
|
"rewards/rejected": -0.09571461379528046, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": 0.9099165797233582, |
|
"logits/rejected": 1.017539381980896, |
|
"logps/chosen": -313.5694885253906, |
|
"logps/rejected": -268.1796569824219, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.026468079537153244, |
|
"rewards/margins": 0.05932525545358658, |
|
"rewards/rejected": -0.08579333126544952, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": 0.9644147753715515, |
|
"logits/rejected": 0.9656912088394165, |
|
"logps/chosen": -250.9320831298828, |
|
"logps/rejected": -264.95233154296875, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03290482237935066, |
|
"rewards/margins": 0.0584709532558918, |
|
"rewards/rejected": -0.09137578308582306, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": 0.9676412343978882, |
|
"logits/rejected": 0.9627419710159302, |
|
"logps/chosen": -294.781982421875, |
|
"logps/rejected": -271.5765380859375, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.022627348080277443, |
|
"rewards/margins": 0.06841419637203217, |
|
"rewards/rejected": -0.09104155004024506, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": 0.9224090576171875, |
|
"logits/rejected": 1.0375105142593384, |
|
"logps/chosen": -251.53451538085938, |
|
"logps/rejected": -233.12393188476562, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04297469183802605, |
|
"rewards/margins": 0.05884693190455437, |
|
"rewards/rejected": -0.10182162374258041, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": 0.9037634134292603, |
|
"logits/rejected": 0.9682399034500122, |
|
"logps/chosen": -286.7387390136719, |
|
"logps/rejected": -260.4252014160156, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.023535221815109253, |
|
"rewards/margins": 0.057094089686870575, |
|
"rewards/rejected": -0.08062931150197983, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": 0.9559614062309265, |
|
"logits/rejected": 1.007653832435608, |
|
"logps/chosen": -306.8362121582031, |
|
"logps/rejected": -272.37249755859375, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029121745377779007, |
|
"rewards/margins": 0.0643700435757637, |
|
"rewards/rejected": -0.09349179267883301, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": 0.9615996479988098, |
|
"logits/rejected": 0.9802320599555969, |
|
"logps/chosen": -297.98504638671875, |
|
"logps/rejected": -250.20980834960938, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.02488793060183525, |
|
"rewards/margins": 0.05079926922917366, |
|
"rewards/rejected": -0.07568720728158951, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": 0.9520236849784851, |
|
"logits/rejected": 0.9852927923202515, |
|
"logps/chosen": -270.9884033203125, |
|
"logps/rejected": -242.1695098876953, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.029557928442955017, |
|
"rewards/margins": 0.06538344919681549, |
|
"rewards/rejected": -0.0949413850903511, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": 0.9123407602310181, |
|
"eval_logits/rejected": 1.0147464275360107, |
|
"eval_logps/chosen": -280.66253662109375, |
|
"eval_logps/rejected": -252.71670532226562, |
|
"eval_loss": 0.6908931732177734, |
|
"eval_rewards/accuracies": 0.6294999718666077, |
|
"eval_rewards/chosen": -0.031157268211245537, |
|
"eval_rewards/margins": 0.05785065144300461, |
|
"eval_rewards/rejected": -0.0890079066157341, |
|
"eval_runtime": 540.0515, |
|
"eval_samples_per_second": 3.703, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": 0.9562872052192688, |
|
"logits/rejected": 1.0361840724945068, |
|
"logps/chosen": -274.87127685546875, |
|
"logps/rejected": -239.40811157226562, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.02163364365696907, |
|
"rewards/margins": 0.06655408442020416, |
|
"rewards/rejected": -0.08818772435188293, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": 0.9342554211616516, |
|
"logits/rejected": 0.9498234987258911, |
|
"logps/chosen": -278.9698486328125, |
|
"logps/rejected": -264.5009765625, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.016571203246712685, |
|
"rewards/margins": 0.055479150265455246, |
|
"rewards/rejected": -0.07205035537481308, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": 0.9194384813308716, |
|
"logits/rejected": 0.9668914079666138, |
|
"logps/chosen": -291.434814453125, |
|
"logps/rejected": -275.8680114746094, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03377396613359451, |
|
"rewards/margins": 0.07892084121704102, |
|
"rewards/rejected": -0.11269481480121613, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": 0.9412601590156555, |
|
"logits/rejected": 1.0217000246047974, |
|
"logps/chosen": -304.5139465332031, |
|
"logps/rejected": -248.54312133789062, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.01636696420609951, |
|
"rewards/margins": 0.06269043684005737, |
|
"rewards/rejected": -0.07905739545822144, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": 0.88300621509552, |
|
"logits/rejected": 1.0030475854873657, |
|
"logps/chosen": -301.0616149902344, |
|
"logps/rejected": -266.23419189453125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.011049589142203331, |
|
"rewards/margins": 0.07019064575433731, |
|
"rewards/rejected": -0.0812402293086052, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": 0.9647413492202759, |
|
"logits/rejected": 0.9922765493392944, |
|
"logps/chosen": -222.5147705078125, |
|
"logps/rejected": -206.96414184570312, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03366454318165779, |
|
"rewards/margins": 0.058607954531908035, |
|
"rewards/rejected": -0.09227249771356583, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": 0.9332239031791687, |
|
"logits/rejected": 1.0312001705169678, |
|
"logps/chosen": -238.71188354492188, |
|
"logps/rejected": -243.7023468017578, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.033622294664382935, |
|
"rewards/margins": 0.06797768920660019, |
|
"rewards/rejected": -0.10159997642040253, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": 0.9673851132392883, |
|
"logits/rejected": 1.029404878616333, |
|
"logps/chosen": -280.47100830078125, |
|
"logps/rejected": -243.7905731201172, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02676049992442131, |
|
"rewards/margins": 0.06190108135342598, |
|
"rewards/rejected": -0.08866159617900848, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": 0.9254564046859741, |
|
"logits/rejected": 1.013877272605896, |
|
"logps/chosen": -260.7253723144531, |
|
"logps/rejected": -259.11224365234375, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.024502191692590714, |
|
"rewards/margins": 0.059731971472501755, |
|
"rewards/rejected": -0.08423416316509247, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": 0.9964572191238403, |
|
"logits/rejected": 0.9820792078971863, |
|
"logps/chosen": -255.88662719726562, |
|
"logps/rejected": -244.15097045898438, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.02560483291745186, |
|
"rewards/margins": 0.0563865527510643, |
|
"rewards/rejected": -0.08199138939380646, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": 0.9147237539291382, |
|
"eval_logits/rejected": 1.0175365209579468, |
|
"eval_logps/chosen": -280.5528869628906, |
|
"eval_logps/rejected": -252.58462524414062, |
|
"eval_loss": 0.6908957958221436, |
|
"eval_rewards/accuracies": 0.6330000162124634, |
|
"eval_rewards/chosen": -0.03006073087453842, |
|
"eval_rewards/margins": 0.057626351714134216, |
|
"eval_rewards/rejected": -0.08768707513809204, |
|
"eval_runtime": 540.1337, |
|
"eval_samples_per_second": 3.703, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": 0.901587963104248, |
|
"logits/rejected": 1.0238076448440552, |
|
"logps/chosen": -293.3055114746094, |
|
"logps/rejected": -265.13775634765625, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.022513773292303085, |
|
"rewards/margins": 0.08927594870328903, |
|
"rewards/rejected": -0.11178971827030182, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": 1.040078043937683, |
|
"logits/rejected": 1.0295056104660034, |
|
"logps/chosen": -282.08648681640625, |
|
"logps/rejected": -230.9876251220703, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.026005476713180542, |
|
"rewards/margins": 0.054637789726257324, |
|
"rewards/rejected": -0.08064327389001846, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": 0.9570829272270203, |
|
"logits/rejected": 1.007840871810913, |
|
"logps/chosen": -296.0901794433594, |
|
"logps/rejected": -222.00198364257812, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.015858953818678856, |
|
"rewards/margins": 0.0702957734465599, |
|
"rewards/rejected": -0.08615472912788391, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": 0.9432961344718933, |
|
"logits/rejected": 0.9931074976921082, |
|
"logps/chosen": -237.6160430908203, |
|
"logps/rejected": -222.57870483398438, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.024945750832557678, |
|
"rewards/margins": 0.07617698609828949, |
|
"rewards/rejected": -0.10112272202968597, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": 0.9430766105651855, |
|
"logits/rejected": 1.0184309482574463, |
|
"logps/chosen": -286.0646667480469, |
|
"logps/rejected": -229.7809600830078, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.017250075936317444, |
|
"rewards/margins": 0.05181562900543213, |
|
"rewards/rejected": -0.06906570494174957, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": 0.8948880434036255, |
|
"logits/rejected": 1.0683404207229614, |
|
"logps/chosen": -271.1024169921875, |
|
"logps/rejected": -254.7742462158203, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.032865189015865326, |
|
"rewards/margins": 0.04334001615643501, |
|
"rewards/rejected": -0.07620520889759064, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": 0.9620285034179688, |
|
"logits/rejected": 1.0088149309158325, |
|
"logps/chosen": -246.1608123779297, |
|
"logps/rejected": -221.75363159179688, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.027930408716201782, |
|
"rewards/margins": 0.05746666342020035, |
|
"rewards/rejected": -0.08539707213640213, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": 0.895652174949646, |
|
"logits/rejected": 0.9522021412849426, |
|
"logps/chosen": -272.8707580566406, |
|
"logps/rejected": -259.20855712890625, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0206812284886837, |
|
"rewards/margins": 0.05796490237116814, |
|
"rewards/rejected": -0.07864613831043243, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": 0.9851544499397278, |
|
"logits/rejected": 1.0554238557815552, |
|
"logps/chosen": -296.3244323730469, |
|
"logps/rejected": -245.6664581298828, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.02956976927816868, |
|
"rewards/margins": 0.04805067181587219, |
|
"rewards/rejected": -0.07762044668197632, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": 0.9625126123428345, |
|
"logits/rejected": 0.9740635752677917, |
|
"logps/chosen": -282.29425048828125, |
|
"logps/rejected": -246.2125244140625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03583105653524399, |
|
"rewards/margins": 0.048236239701509476, |
|
"rewards/rejected": -0.08406729251146317, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.9153636693954468, |
|
"eval_logits/rejected": 1.0176055431365967, |
|
"eval_logps/chosen": -280.5576477050781, |
|
"eval_logps/rejected": -252.59996032714844, |
|
"eval_loss": 0.6908729076385498, |
|
"eval_rewards/accuracies": 0.6305000185966492, |
|
"eval_rewards/chosen": -0.03010854683816433, |
|
"eval_rewards/margins": 0.057731661945581436, |
|
"eval_rewards/rejected": -0.08784020692110062, |
|
"eval_runtime": 540.1227, |
|
"eval_samples_per_second": 3.703, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": 0.9004298448562622, |
|
"logits/rejected": 1.0678789615631104, |
|
"logps/chosen": -247.7801513671875, |
|
"logps/rejected": -225.3026580810547, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03286176174879074, |
|
"rewards/margins": 0.062369298189878464, |
|
"rewards/rejected": -0.0952310562133789, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": 0.9522799253463745, |
|
"logits/rejected": 0.9462326169013977, |
|
"logps/chosen": -242.72482299804688, |
|
"logps/rejected": -237.6994171142578, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.030536144971847534, |
|
"rewards/margins": 0.06319095939397812, |
|
"rewards/rejected": -0.09372710436582565, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": 0.9089903831481934, |
|
"logits/rejected": 1.0164532661437988, |
|
"logps/chosen": -300.99224853515625, |
|
"logps/rejected": -260.6708984375, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.021878289058804512, |
|
"rewards/margins": 0.06206582114100456, |
|
"rewards/rejected": -0.08394411206245422, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": 0.9136902093887329, |
|
"logits/rejected": 1.064955234527588, |
|
"logps/chosen": -260.19952392578125, |
|
"logps/rejected": -237.27487182617188, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03714042156934738, |
|
"rewards/margins": 0.04816558212041855, |
|
"rewards/rejected": -0.08530601114034653, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": 0.9555643200874329, |
|
"logits/rejected": 1.0349557399749756, |
|
"logps/chosen": -254.3995361328125, |
|
"logps/rejected": -219.08493041992188, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02656802162528038, |
|
"rewards/margins": 0.052377671003341675, |
|
"rewards/rejected": -0.07894569635391235, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": 0.9827576875686646, |
|
"logits/rejected": 0.9898948669433594, |
|
"logps/chosen": -261.27630615234375, |
|
"logps/rejected": -252.62423706054688, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.031087204813957214, |
|
"rewards/margins": 0.05801438167691231, |
|
"rewards/rejected": -0.08910159021615982, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": 0.9491473436355591, |
|
"logits/rejected": 0.972917914390564, |
|
"logps/chosen": -255.64334106445312, |
|
"logps/rejected": -237.9709930419922, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.025073718279600143, |
|
"rewards/margins": 0.06804076582193375, |
|
"rewards/rejected": -0.09311448037624359, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": 1.007514238357544, |
|
"logits/rejected": 1.0354502201080322, |
|
"logps/chosen": -285.310302734375, |
|
"logps/rejected": -281.1996154785156, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.02509785071015358, |
|
"rewards/margins": 0.07314437627792358, |
|
"rewards/rejected": -0.09824222326278687, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": 0.9445541501045227, |
|
"logits/rejected": 0.9918941259384155, |
|
"logps/chosen": -257.47137451171875, |
|
"logps/rejected": -222.67733764648438, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.025721009820699692, |
|
"rewards/margins": 0.0427216961979866, |
|
"rewards/rejected": -0.068442702293396, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": 0.9048360586166382, |
|
"logits/rejected": 1.0405280590057373, |
|
"logps/chosen": -294.37847900390625, |
|
"logps/rejected": -225.70297241210938, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.02359098568558693, |
|
"rewards/margins": 0.06500183045864105, |
|
"rewards/rejected": -0.08859282732009888, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": 0.9186109900474548, |
|
"eval_logits/rejected": 1.0211718082427979, |
|
"eval_logps/chosen": -280.2095947265625, |
|
"eval_logps/rejected": -252.20504760742188, |
|
"eval_loss": 0.6908652782440186, |
|
"eval_rewards/accuracies": 0.6284999847412109, |
|
"eval_rewards/chosen": -0.02662779949605465, |
|
"eval_rewards/margins": 0.057263679802417755, |
|
"eval_rewards/rejected": -0.08389147371053696, |
|
"eval_runtime": 540.4002, |
|
"eval_samples_per_second": 3.701, |
|
"eval_steps_per_second": 0.925, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": 0.9340456128120422, |
|
"logits/rejected": 0.9448652267456055, |
|
"logps/chosen": -252.797119140625, |
|
"logps/rejected": -231.66714477539062, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.022808540612459183, |
|
"rewards/margins": 0.05553862452507019, |
|
"rewards/rejected": -0.07834717631340027, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": 0.9630203247070312, |
|
"logits/rejected": 1.0022109746932983, |
|
"logps/chosen": -297.3249816894531, |
|
"logps/rejected": -251.38656616210938, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.015477204695343971, |
|
"rewards/margins": 0.08188708126544952, |
|
"rewards/rejected": -0.09736428409814835, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": 0.9089611768722534, |
|
"logits/rejected": 1.0690263509750366, |
|
"logps/chosen": -269.80987548828125, |
|
"logps/rejected": -246.6451873779297, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.031479865312576294, |
|
"rewards/margins": 0.0678834319114685, |
|
"rewards/rejected": -0.0993632897734642, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": 0.9765059351921082, |
|
"logits/rejected": 1.011185884475708, |
|
"logps/chosen": -267.643798828125, |
|
"logps/rejected": -244.47586059570312, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.02102813683450222, |
|
"rewards/margins": 0.03919995576143265, |
|
"rewards/rejected": -0.060228098183870316, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": 0.9900724291801453, |
|
"logits/rejected": 1.0086749792099, |
|
"logps/chosen": -296.67889404296875, |
|
"logps/rejected": -239.91189575195312, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.028908709064126015, |
|
"rewards/margins": 0.030585547909140587, |
|
"rewards/rejected": -0.0594942569732666, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": 0.9651070833206177, |
|
"logits/rejected": 1.0190773010253906, |
|
"logps/chosen": -290.01953125, |
|
"logps/rejected": -266.74298095703125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.017114948481321335, |
|
"rewards/margins": 0.06442873179912567, |
|
"rewards/rejected": -0.08154366910457611, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": 1.0086729526519775, |
|
"logits/rejected": 1.0279282331466675, |
|
"logps/chosen": -267.48614501953125, |
|
"logps/rejected": -265.8891296386719, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.02099434658885002, |
|
"rewards/margins": 0.061572205275297165, |
|
"rewards/rejected": -0.08256654441356659, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": 1.0000813007354736, |
|
"logits/rejected": 1.0106067657470703, |
|
"logps/chosen": -293.6463623046875, |
|
"logps/rejected": -262.5312805175781, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.036777563393116, |
|
"rewards/margins": 0.04339155554771423, |
|
"rewards/rejected": -0.08016912639141083, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": 1.0087939500808716, |
|
"logits/rejected": 1.079708456993103, |
|
"logps/chosen": -292.1297912597656, |
|
"logps/rejected": -263.822509765625, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.028077151626348495, |
|
"rewards/margins": 0.06453180313110352, |
|
"rewards/rejected": -0.09260895103216171, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": 1.0100739002227783, |
|
"logits/rejected": 1.02089524269104, |
|
"logps/chosen": -272.09161376953125, |
|
"logps/rejected": -239.74560546875, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.02850997820496559, |
|
"rewards/margins": 0.07207761704921722, |
|
"rewards/rejected": -0.10058760643005371, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": 0.9202005863189697, |
|
"eval_logits/rejected": 1.0222870111465454, |
|
"eval_logps/chosen": -280.4384460449219, |
|
"eval_logps/rejected": -252.48487854003906, |
|
"eval_loss": 0.6908697485923767, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": -0.028916185721755028, |
|
"eval_rewards/margins": 0.057773273438215256, |
|
"eval_rewards/rejected": -0.08668945729732513, |
|
"eval_runtime": 540.2108, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": 1.0115320682525635, |
|
"logits/rejected": 1.0266879796981812, |
|
"logps/chosen": -282.57763671875, |
|
"logps/rejected": -256.1681213378906, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.031413815915584564, |
|
"rewards/margins": 0.056411970406770706, |
|
"rewards/rejected": -0.08782579004764557, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": 0.9267918467521667, |
|
"logits/rejected": 0.988551139831543, |
|
"logps/chosen": -267.50616455078125, |
|
"logps/rejected": -228.9858856201172, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.022525835782289505, |
|
"rewards/margins": 0.055544476956129074, |
|
"rewards/rejected": -0.07807030528783798, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": 0.9413129687309265, |
|
"logits/rejected": 0.9737070798873901, |
|
"logps/chosen": -287.58221435546875, |
|
"logps/rejected": -231.7978973388672, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.031874872744083405, |
|
"rewards/margins": 0.03842931613326073, |
|
"rewards/rejected": -0.07030418515205383, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": 0.9008700251579285, |
|
"logits/rejected": 1.0831358432769775, |
|
"logps/chosen": -283.8552551269531, |
|
"logps/rejected": -254.0209503173828, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.02647927962243557, |
|
"rewards/margins": 0.07812824845314026, |
|
"rewards/rejected": -0.10460753738880157, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": 0.9461116790771484, |
|
"logits/rejected": 1.0905543565750122, |
|
"logps/chosen": -259.1544494628906, |
|
"logps/rejected": -243.8948211669922, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03435998409986496, |
|
"rewards/margins": 0.07146745920181274, |
|
"rewards/rejected": -0.1058274507522583, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": 0.9727323651313782, |
|
"logits/rejected": 0.9646995663642883, |
|
"logps/chosen": -240.6188507080078, |
|
"logps/rejected": -220.088623046875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.021371301263570786, |
|
"rewards/margins": 0.04031342267990112, |
|
"rewards/rejected": -0.06168472766876221, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": 1.0163447856903076, |
|
"logits/rejected": 1.0307289361953735, |
|
"logps/chosen": -249.5280303955078, |
|
"logps/rejected": -242.83419799804688, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.033246301114559174, |
|
"rewards/margins": 0.05100921913981438, |
|
"rewards/rejected": -0.08425550907850266, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": 0.9769922494888306, |
|
"logits/rejected": 1.0272125005722046, |
|
"logps/chosen": -255.9679718017578, |
|
"logps/rejected": -241.6696319580078, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.036718789488077164, |
|
"rewards/margins": 0.06190019100904465, |
|
"rewards/rejected": -0.09861898422241211, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": 1.0243645906448364, |
|
"logits/rejected": 1.0316869020462036, |
|
"logps/chosen": -243.71621704101562, |
|
"logps/rejected": -253.5388946533203, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03732157126069069, |
|
"rewards/margins": 0.06564446538686752, |
|
"rewards/rejected": -0.10296603292226791, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": 0.9439903497695923, |
|
"logits/rejected": 0.9749709963798523, |
|
"logps/chosen": -262.73333740234375, |
|
"logps/rejected": -220.5067138671875, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03468143194913864, |
|
"rewards/margins": 0.0676378607749939, |
|
"rewards/rejected": -0.10231930017471313, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": 0.9216287136077881, |
|
"eval_logits/rejected": 1.0239145755767822, |
|
"eval_logps/chosen": -280.4474792480469, |
|
"eval_logps/rejected": -252.50457763671875, |
|
"eval_loss": 0.6908650398254395, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -0.029006626456975937, |
|
"eval_rewards/margins": 0.05787980556488037, |
|
"eval_rewards/rejected": -0.0868864357471466, |
|
"eval_runtime": 539.9895, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": 0.9812003374099731, |
|
"logits/rejected": 0.9633600115776062, |
|
"logps/chosen": -264.53521728515625, |
|
"logps/rejected": -254.2129364013672, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03213541954755783, |
|
"rewards/margins": 0.0566454641520977, |
|
"rewards/rejected": -0.08878089487552643, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": 0.9166300892829895, |
|
"logits/rejected": 1.0722475051879883, |
|
"logps/chosen": -275.48828125, |
|
"logps/rejected": -235.26626586914062, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.016414564102888107, |
|
"rewards/margins": 0.06454362720251083, |
|
"rewards/rejected": -0.08095818758010864, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": 0.9330304265022278, |
|
"logits/rejected": 1.0522311925888062, |
|
"logps/chosen": -260.5693054199219, |
|
"logps/rejected": -258.9266052246094, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.025154178962111473, |
|
"rewards/margins": 0.05904405564069748, |
|
"rewards/rejected": -0.08419822156429291, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": 0.9113238453865051, |
|
"logits/rejected": 0.9963987469673157, |
|
"logps/chosen": -279.2100524902344, |
|
"logps/rejected": -249.4517059326172, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.020202763378620148, |
|
"rewards/margins": 0.04745917767286301, |
|
"rewards/rejected": -0.06766194850206375, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": 1.0007197856903076, |
|
"logits/rejected": 0.9505215883255005, |
|
"logps/chosen": -280.51666259765625, |
|
"logps/rejected": -254.579833984375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03505931794643402, |
|
"rewards/margins": 0.04930321127176285, |
|
"rewards/rejected": -0.08436252176761627, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": 1.0191797018051147, |
|
"logits/rejected": 1.0009005069732666, |
|
"logps/chosen": -292.7381591796875, |
|
"logps/rejected": -252.88143920898438, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.029474809765815735, |
|
"rewards/margins": 0.05774076655507088, |
|
"rewards/rejected": -0.08721558004617691, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": 1.0553741455078125, |
|
"logits/rejected": 0.9938759803771973, |
|
"logps/chosen": -280.03265380859375, |
|
"logps/rejected": -261.9696044921875, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.024510715156793594, |
|
"rewards/margins": 0.05735497549176216, |
|
"rewards/rejected": -0.08186569064855576, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": 0.9185417890548706, |
|
"logits/rejected": 1.0824509859085083, |
|
"logps/chosen": -279.9969787597656, |
|
"logps/rejected": -240.0286865234375, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029702965170145035, |
|
"rewards/margins": 0.05176641792058945, |
|
"rewards/rejected": -0.08146937936544418, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": 0.894769012928009, |
|
"logits/rejected": 1.0935579538345337, |
|
"logps/chosen": -275.4918518066406, |
|
"logps/rejected": -258.45953369140625, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02207362651824951, |
|
"rewards/margins": 0.06469397246837616, |
|
"rewards/rejected": -0.08676759898662567, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": 0.9377716183662415, |
|
"logits/rejected": 1.0579009056091309, |
|
"logps/chosen": -318.527587890625, |
|
"logps/rejected": -260.9480285644531, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03672604635357857, |
|
"rewards/margins": 0.05514238029718399, |
|
"rewards/rejected": -0.09186841547489166, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 0.9221275448799133, |
|
"eval_logits/rejected": 1.0243616104125977, |
|
"eval_logps/chosen": -280.42578125, |
|
"eval_logps/rejected": -252.4630584716797, |
|
"eval_loss": 0.6908705830574036, |
|
"eval_rewards/accuracies": 0.6290000081062317, |
|
"eval_rewards/chosen": -0.02878967486321926, |
|
"eval_rewards/margins": 0.057681918144226074, |
|
"eval_rewards/rejected": -0.08647158741950989, |
|
"eval_runtime": 539.9286, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": 0.9917756915092468, |
|
"logits/rejected": 1.043176293373108, |
|
"logps/chosen": -285.3194274902344, |
|
"logps/rejected": -251.86471557617188, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.020223677158355713, |
|
"rewards/margins": 0.0789574682712555, |
|
"rewards/rejected": -0.0991811528801918, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": 0.9845743179321289, |
|
"logits/rejected": 0.9572548866271973, |
|
"logps/chosen": -231.9497833251953, |
|
"logps/rejected": -235.6708984375, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.024526074528694153, |
|
"rewards/margins": 0.0476609468460083, |
|
"rewards/rejected": -0.07218702882528305, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": 0.9714505076408386, |
|
"logits/rejected": 0.9893890619277954, |
|
"logps/chosen": -345.9156188964844, |
|
"logps/rejected": -263.2041931152344, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.022486504167318344, |
|
"rewards/margins": 0.052516452968120575, |
|
"rewards/rejected": -0.07500295341014862, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": 0.9488525390625, |
|
"logits/rejected": 1.0411800146102905, |
|
"logps/chosen": -294.39727783203125, |
|
"logps/rejected": -257.3688659667969, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03506433218717575, |
|
"rewards/margins": 0.0475606694817543, |
|
"rewards/rejected": -0.08262500911951065, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": 0.9405366778373718, |
|
"logits/rejected": 0.9672233462333679, |
|
"logps/chosen": -299.8280334472656, |
|
"logps/rejected": -250.7571258544922, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.011996401473879814, |
|
"rewards/margins": 0.08034516870975494, |
|
"rewards/rejected": -0.09234156459569931, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": 0.9549415707588196, |
|
"logits/rejected": 1.0070356130599976, |
|
"logps/chosen": -241.44503784179688, |
|
"logps/rejected": -215.5791778564453, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.029812267050147057, |
|
"rewards/margins": 0.04664590209722519, |
|
"rewards/rejected": -0.0764581710100174, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": 0.8861829042434692, |
|
"logits/rejected": 1.0260193347930908, |
|
"logps/chosen": -290.62060546875, |
|
"logps/rejected": -238.04080200195312, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.025474613532423973, |
|
"rewards/margins": 0.061755161732435226, |
|
"rewards/rejected": -0.08722977340221405, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": 0.9960481524467468, |
|
"logits/rejected": 0.9741401672363281, |
|
"logps/chosen": -247.71484375, |
|
"logps/rejected": -232.7592315673828, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.025015806779265404, |
|
"rewards/margins": 0.04509962350130081, |
|
"rewards/rejected": -0.07011543214321136, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": 0.9169954061508179, |
|
"logits/rejected": 0.9940811991691589, |
|
"logps/chosen": -279.7234191894531, |
|
"logps/rejected": -257.5582580566406, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029348302632570267, |
|
"rewards/margins": 0.05934641510248184, |
|
"rewards/rejected": -0.08869470655918121, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": 0.9306381940841675, |
|
"logits/rejected": 0.9920527338981628, |
|
"logps/chosen": -320.2879333496094, |
|
"logps/rejected": -246.09512329101562, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.02664271369576454, |
|
"rewards/margins": 0.05591448396444321, |
|
"rewards/rejected": -0.08255720138549805, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": 0.9215983152389526, |
|
"eval_logits/rejected": 1.0239914655685425, |
|
"eval_logps/chosen": -280.43499755859375, |
|
"eval_logps/rejected": -252.45912170410156, |
|
"eval_loss": 0.6908671855926514, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.028881965205073357, |
|
"eval_rewards/margins": 0.05755016952753067, |
|
"eval_rewards/rejected": -0.08643212914466858, |
|
"eval_runtime": 540.2057, |
|
"eval_samples_per_second": 3.702, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": 0.9069113731384277, |
|
"logits/rejected": 0.9997833371162415, |
|
"logps/chosen": -258.52178955078125, |
|
"logps/rejected": -240.258544921875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02280072309076786, |
|
"rewards/margins": 0.054703257977962494, |
|
"rewards/rejected": -0.0775039792060852, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": 0.9457674026489258, |
|
"logits/rejected": 0.987481415271759, |
|
"logps/chosen": -288.9521484375, |
|
"logps/rejected": -266.4629821777344, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.019102510064840317, |
|
"rewards/margins": 0.06657516956329346, |
|
"rewards/rejected": -0.08567767590284348, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": 0.9037330746650696, |
|
"logits/rejected": 1.0098841190338135, |
|
"logps/chosen": -303.5415954589844, |
|
"logps/rejected": -282.108642578125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.038687679916620255, |
|
"rewards/margins": 0.04073931276798248, |
|
"rewards/rejected": -0.07942698895931244, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": 0.9645431637763977, |
|
"logits/rejected": 1.0531866550445557, |
|
"logps/chosen": -265.5387268066406, |
|
"logps/rejected": -245.14968872070312, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.018353218212723732, |
|
"rewards/margins": 0.05940115451812744, |
|
"rewards/rejected": -0.07775436341762543, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": 0.9109350442886353, |
|
"logits/rejected": 1.009334683418274, |
|
"logps/chosen": -311.3597412109375, |
|
"logps/rejected": -255.84878540039062, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02166152000427246, |
|
"rewards/margins": 0.06039486080408096, |
|
"rewards/rejected": -0.08205638825893402, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": 0.8763138055801392, |
|
"logits/rejected": 1.017446517944336, |
|
"logps/chosen": -282.8580627441406, |
|
"logps/rejected": -261.3891906738281, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.024912741035223007, |
|
"rewards/margins": 0.045935411006212234, |
|
"rewards/rejected": -0.07084815204143524, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": 0.9283539056777954, |
|
"logits/rejected": 1.0674773454666138, |
|
"logps/chosen": -270.36358642578125, |
|
"logps/rejected": -274.0054626464844, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01020726840943098, |
|
"rewards/margins": 0.06225059553980827, |
|
"rewards/rejected": -0.07245786488056183, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": 0.8904141187667847, |
|
"logits/rejected": 0.9620451927185059, |
|
"logps/chosen": -264.84930419921875, |
|
"logps/rejected": -218.98477172851562, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.012690825387835503, |
|
"rewards/margins": 0.06626447290182114, |
|
"rewards/rejected": -0.07895530760288239, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": 1.0099847316741943, |
|
"logits/rejected": 0.9373987913131714, |
|
"logps/chosen": -268.06048583984375, |
|
"logps/rejected": -228.6332550048828, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.028048217296600342, |
|
"rewards/margins": 0.06466346234083176, |
|
"rewards/rejected": -0.0927116721868515, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": 0.8952652812004089, |
|
"logits/rejected": 1.017452597618103, |
|
"logps/chosen": -254.45571899414062, |
|
"logps/rejected": -240.29006958007812, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.032994892448186874, |
|
"rewards/margins": 0.04944116994738579, |
|
"rewards/rejected": -0.08243606984615326, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": 0.9220536351203918, |
|
"eval_logits/rejected": 1.0245850086212158, |
|
"eval_logps/chosen": -280.4203796386719, |
|
"eval_logps/rejected": -252.4789581298828, |
|
"eval_loss": 0.690862774848938, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.028735652565956116, |
|
"eval_rewards/margins": 0.057894736528396606, |
|
"eval_rewards/rejected": -0.08663039654493332, |
|
"eval_runtime": 539.9529, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": 0.895196795463562, |
|
"logits/rejected": 1.000282645225525, |
|
"logps/chosen": -302.4236145019531, |
|
"logps/rejected": -285.1495361328125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.016779940575361252, |
|
"rewards/margins": 0.05169288069009781, |
|
"rewards/rejected": -0.06847281754016876, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": 0.982556164264679, |
|
"logits/rejected": 0.9328534007072449, |
|
"logps/chosen": -285.391845703125, |
|
"logps/rejected": -266.6493225097656, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.026478338986635208, |
|
"rewards/margins": 0.06039903312921524, |
|
"rewards/rejected": -0.08687736093997955, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.691200782103989, |
|
"train_runtime": 55857.7878, |
|
"train_samples_per_second": 1.094, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|