|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9282399143163156, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003570153516601214, |
|
"grad_norm": 7.480371952056885, |
|
"learning_rate": 4.9821492324169946e-05, |
|
"logits/chosen": 24.812057495117188, |
|
"logits/rejected": 24.878061294555664, |
|
"logps/chosen": -20.12216567993164, |
|
"logps/rejected": -25.59366798400879, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08925116807222366, |
|
"rewards/margins": 0.21577796339988708, |
|
"rewards/rejected": -0.12652680277824402, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007140307033202428, |
|
"grad_norm": 6.517239093780518, |
|
"learning_rate": 4.964298464833988e-05, |
|
"logits/chosen": 24.764490127563477, |
|
"logits/rejected": 24.641494750976562, |
|
"logps/chosen": -11.448348999023438, |
|
"logps/rejected": -12.761981964111328, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14247111976146698, |
|
"rewards/margins": 0.24720080196857452, |
|
"rewards/rejected": -0.10472966730594635, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010710460549803642, |
|
"grad_norm": 6.2558159828186035, |
|
"learning_rate": 4.946447697250982e-05, |
|
"logits/chosen": 26.845245361328125, |
|
"logits/rejected": 26.89157485961914, |
|
"logps/chosen": -32.96189880371094, |
|
"logps/rejected": -36.98007583618164, |
|
"loss": 0.7495, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.26471006870269775, |
|
"rewards/margins": 0.1922953575849533, |
|
"rewards/rejected": -0.45700541138648987, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014280614066404856, |
|
"grad_norm": 7.402151584625244, |
|
"learning_rate": 4.928596929667976e-05, |
|
"logits/chosen": 27.013961791992188, |
|
"logits/rejected": 27.773513793945312, |
|
"logps/chosen": -33.79491424560547, |
|
"logps/rejected": -45.16600799560547, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.004278683569282293, |
|
"rewards/margins": 0.5446106195449829, |
|
"rewards/rejected": -0.5488892793655396, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01785076758300607, |
|
"grad_norm": 8.540044784545898, |
|
"learning_rate": 4.9107461620849696e-05, |
|
"logits/chosen": 24.013198852539062, |
|
"logits/rejected": 24.5129337310791, |
|
"logps/chosen": -45.71046829223633, |
|
"logps/rejected": -38.64923858642578, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3722326159477234, |
|
"rewards/margins": 0.14807865023612976, |
|
"rewards/rejected": -0.5203112363815308, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021420921099607283, |
|
"grad_norm": 4.945995330810547, |
|
"learning_rate": 4.892895394501964e-05, |
|
"logits/chosen": 27.04281997680664, |
|
"logits/rejected": 26.753490447998047, |
|
"logps/chosen": -58.52327346801758, |
|
"logps/rejected": -42.7177619934082, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.732448399066925, |
|
"rewards/margins": 0.44616565108299255, |
|
"rewards/rejected": -1.1786140203475952, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.024991074616208496, |
|
"grad_norm": 7.011103630065918, |
|
"learning_rate": 4.875044626918958e-05, |
|
"logits/chosen": 25.751834869384766, |
|
"logits/rejected": 26.487178802490234, |
|
"logps/chosen": -59.837867736816406, |
|
"logps/rejected": -62.329681396484375, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.34982752799987793, |
|
"rewards/margins": 0.5455743074417114, |
|
"rewards/rejected": -0.8954018354415894, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028561228132809712, |
|
"grad_norm": 4.859188556671143, |
|
"learning_rate": 4.8571938593359514e-05, |
|
"logits/chosen": 23.81821060180664, |
|
"logits/rejected": 23.5118465423584, |
|
"logps/chosen": -42.71343231201172, |
|
"logps/rejected": -37.387977600097656, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.011353443376719952, |
|
"rewards/margins": 0.47227078676223755, |
|
"rewards/rejected": -0.4836241602897644, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03213138164941092, |
|
"grad_norm": 11.05513858795166, |
|
"learning_rate": 4.839343091752946e-05, |
|
"logits/chosen": 27.86802101135254, |
|
"logits/rejected": 27.87044334411621, |
|
"logps/chosen": -56.7071533203125, |
|
"logps/rejected": -48.46919631958008, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.01455842237919569, |
|
"rewards/margins": 0.41877785325050354, |
|
"rewards/rejected": -0.4333363175392151, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03570153516601214, |
|
"grad_norm": 5.356033802032471, |
|
"learning_rate": 4.8214923241699396e-05, |
|
"logits/chosen": 23.89133644104004, |
|
"logits/rejected": 23.665489196777344, |
|
"logps/chosen": -63.13393020629883, |
|
"logps/rejected": -43.369056701660156, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.114576056599617, |
|
"rewards/margins": 0.36550942063331604, |
|
"rewards/rejected": -0.48008546233177185, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03570153516601214, |
|
"eval_logits/chosen": 25.279199600219727, |
|
"eval_logits/rejected": 25.355634689331055, |
|
"eval_logps/chosen": -42.37962341308594, |
|
"eval_logps/rejected": -37.090999603271484, |
|
"eval_loss": 0.5741721987724304, |
|
"eval_rewards/accuracies": 0.6777777671813965, |
|
"eval_rewards/chosen": -0.07397282123565674, |
|
"eval_rewards/margins": 0.43140149116516113, |
|
"eval_rewards/rejected": -0.5053743124008179, |
|
"eval_runtime": 109.7281, |
|
"eval_samples_per_second": 13.05, |
|
"eval_steps_per_second": 0.82, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.039271688682613354, |
|
"grad_norm": 5.330838680267334, |
|
"learning_rate": 4.803641556586933e-05, |
|
"logits/chosen": 26.816951751708984, |
|
"logits/rejected": 27.308795928955078, |
|
"logps/chosen": -37.895626068115234, |
|
"logps/rejected": -35.43812942504883, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1233404278755188, |
|
"rewards/margins": 0.47418642044067383, |
|
"rewards/rejected": -0.5975269079208374, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.042841842199214566, |
|
"grad_norm": 10.010088920593262, |
|
"learning_rate": 4.785790789003927e-05, |
|
"logits/chosen": 27.359798431396484, |
|
"logits/rejected": 27.50588035583496, |
|
"logps/chosen": -52.02180862426758, |
|
"logps/rejected": -48.30536651611328, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.14739781618118286, |
|
"rewards/margins": 0.22387762367725372, |
|
"rewards/rejected": -0.07647980749607086, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04641199571581578, |
|
"grad_norm": 7.994888782501221, |
|
"learning_rate": 4.7679400214209214e-05, |
|
"logits/chosen": 27.39333152770996, |
|
"logits/rejected": 26.996658325195312, |
|
"logps/chosen": -18.88664436340332, |
|
"logps/rejected": -39.95366668701172, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.09302469342947006, |
|
"rewards/margins": 0.047691889107227325, |
|
"rewards/rejected": -0.14071659743785858, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04998214923241699, |
|
"grad_norm": 3.463163375854492, |
|
"learning_rate": 4.750089253837915e-05, |
|
"logits/chosen": 22.470857620239258, |
|
"logits/rejected": 22.843807220458984, |
|
"logps/chosen": -22.85379981994629, |
|
"logps/rejected": -19.81589698791504, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08994263410568237, |
|
"rewards/margins": 0.6263381242752075, |
|
"rewards/rejected": -0.5363954305648804, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.053552302749018205, |
|
"grad_norm": 7.127109527587891, |
|
"learning_rate": 4.732238486254909e-05, |
|
"logits/chosen": 26.753835678100586, |
|
"logits/rejected": 26.78439712524414, |
|
"logps/chosen": -46.176551818847656, |
|
"logps/rejected": -50.23821258544922, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20682087540626526, |
|
"rewards/margins": 1.0148881673812866, |
|
"rewards/rejected": -0.8080674409866333, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.057122456265619424, |
|
"grad_norm": 6.003675937652588, |
|
"learning_rate": 4.714387718671903e-05, |
|
"logits/chosen": 29.764068603515625, |
|
"logits/rejected": 29.705978393554688, |
|
"logps/chosen": -23.824987411499023, |
|
"logps/rejected": -42.840850830078125, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22749033570289612, |
|
"rewards/margins": 0.35782289505004883, |
|
"rewards/rejected": -0.5853132009506226, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06069260978222064, |
|
"grad_norm": 6.028194427490234, |
|
"learning_rate": 4.696536951088897e-05, |
|
"logits/chosen": 23.769296646118164, |
|
"logits/rejected": 24.033222198486328, |
|
"logps/chosen": -41.425865173339844, |
|
"logps/rejected": -41.90104293823242, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4930092692375183, |
|
"rewards/margins": 0.38698917627334595, |
|
"rewards/rejected": -0.8799983859062195, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06426276329882184, |
|
"grad_norm": 4.039268493652344, |
|
"learning_rate": 4.678686183505891e-05, |
|
"logits/chosen": 25.150955200195312, |
|
"logits/rejected": 25.3060302734375, |
|
"logps/chosen": -33.606788635253906, |
|
"logps/rejected": -22.799793243408203, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12118367105722427, |
|
"rewards/margins": 0.29234248399734497, |
|
"rewards/rejected": -0.41352614760398865, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06783291681542307, |
|
"grad_norm": 8.811986923217773, |
|
"learning_rate": 4.660835415922885e-05, |
|
"logits/chosen": 28.551767349243164, |
|
"logits/rejected": 28.756189346313477, |
|
"logps/chosen": -45.27893829345703, |
|
"logps/rejected": -28.83548355102539, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4300183653831482, |
|
"rewards/margins": 0.17230427265167236, |
|
"rewards/rejected": -0.6023226976394653, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07140307033202428, |
|
"grad_norm": 7.31522798538208, |
|
"learning_rate": 4.642984648339879e-05, |
|
"logits/chosen": 25.931976318359375, |
|
"logits/rejected": 25.951126098632812, |
|
"logps/chosen": -50.394126892089844, |
|
"logps/rejected": -38.347755432128906, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2471456527709961, |
|
"rewards/margins": 0.6917206048965454, |
|
"rewards/rejected": -0.9388662576675415, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07140307033202428, |
|
"eval_logits/chosen": 25.649152755737305, |
|
"eval_logits/rejected": 25.710378646850586, |
|
"eval_logps/chosen": -43.097450256347656, |
|
"eval_logps/rejected": -38.84811782836914, |
|
"eval_loss": 0.5490943193435669, |
|
"eval_rewards/accuracies": 0.6833333373069763, |
|
"eval_rewards/chosen": -0.14575596153736115, |
|
"eval_rewards/margins": 0.5353304743766785, |
|
"eval_rewards/rejected": -0.6810863614082336, |
|
"eval_runtime": 97.7698, |
|
"eval_samples_per_second": 14.647, |
|
"eval_steps_per_second": 0.921, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0749732238486255, |
|
"grad_norm": 5.679516792297363, |
|
"learning_rate": 4.6251338807568726e-05, |
|
"logits/chosen": 26.078624725341797, |
|
"logits/rejected": 25.89596939086914, |
|
"logps/chosen": -28.943302154541016, |
|
"logps/rejected": -45.90317916870117, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14115412533283234, |
|
"rewards/margins": 0.2540377974510193, |
|
"rewards/rejected": -0.3951919376850128, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07854337736522671, |
|
"grad_norm": 13.459559440612793, |
|
"learning_rate": 4.607283113173867e-05, |
|
"logits/chosen": 22.200332641601562, |
|
"logits/rejected": 22.71112060546875, |
|
"logps/chosen": -36.98976135253906, |
|
"logps/rejected": -30.201828002929688, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.00881283264607191, |
|
"rewards/margins": 0.8385305404663086, |
|
"rewards/rejected": -0.8473433256149292, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08211353088182792, |
|
"grad_norm": 9.37973403930664, |
|
"learning_rate": 4.589432345590861e-05, |
|
"logits/chosen": 28.823495864868164, |
|
"logits/rejected": 28.9989013671875, |
|
"logps/chosen": -52.96837615966797, |
|
"logps/rejected": -48.45201110839844, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.45236507058143616, |
|
"rewards/margins": 0.9013611674308777, |
|
"rewards/rejected": -1.3537262678146362, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08568368439842913, |
|
"grad_norm": 6.611434459686279, |
|
"learning_rate": 4.5715815780078545e-05, |
|
"logits/chosen": 19.64382553100586, |
|
"logits/rejected": 19.509660720825195, |
|
"logps/chosen": -48.31082534790039, |
|
"logps/rejected": -50.42847442626953, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15059544146060944, |
|
"rewards/margins": 0.887191653251648, |
|
"rewards/rejected": -1.0377871990203857, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08925383791503035, |
|
"grad_norm": 6.911257266998291, |
|
"learning_rate": 4.553730810424848e-05, |
|
"logits/chosen": 17.858783721923828, |
|
"logits/rejected": 18.209197998046875, |
|
"logps/chosen": -28.553043365478516, |
|
"logps/rejected": -39.740516662597656, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13874831795692444, |
|
"rewards/margins": 0.48837804794311523, |
|
"rewards/rejected": -0.6271263360977173, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09282399143163156, |
|
"grad_norm": 8.965351104736328, |
|
"learning_rate": 4.5358800428418426e-05, |
|
"logits/chosen": 25.02800178527832, |
|
"logits/rejected": 24.903820037841797, |
|
"logps/chosen": -41.33483123779297, |
|
"logps/rejected": -38.99028396606445, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.14857253432273865, |
|
"rewards/margins": 0.6738235354423523, |
|
"rewards/rejected": -0.525251030921936, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09639414494823277, |
|
"grad_norm": 6.815878868103027, |
|
"learning_rate": 4.5180292752588364e-05, |
|
"logits/chosen": 26.964553833007812, |
|
"logits/rejected": 26.749637603759766, |
|
"logps/chosen": -36.553131103515625, |
|
"logps/rejected": -19.335412979125977, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.019418681040406227, |
|
"rewards/margins": 0.6367809772491455, |
|
"rewards/rejected": -0.6561996936798096, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09996429846483398, |
|
"grad_norm": 9.245991706848145, |
|
"learning_rate": 4.50017850767583e-05, |
|
"logits/chosen": 25.099380493164062, |
|
"logits/rejected": 25.418010711669922, |
|
"logps/chosen": -30.81514549255371, |
|
"logps/rejected": -27.115314483642578, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4499152600765228, |
|
"rewards/margins": 0.7015028595924377, |
|
"rewards/rejected": -1.1514180898666382, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1035344519814352, |
|
"grad_norm": 4.70521879196167, |
|
"learning_rate": 4.4823277400928245e-05, |
|
"logits/chosen": 26.4927978515625, |
|
"logits/rejected": 26.206491470336914, |
|
"logps/chosen": -50.66899871826172, |
|
"logps/rejected": -31.35797691345215, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7465568780899048, |
|
"rewards/margins": 0.4888392388820648, |
|
"rewards/rejected": -1.235396146774292, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10710460549803641, |
|
"grad_norm": 12.274664878845215, |
|
"learning_rate": 4.464476972509818e-05, |
|
"logits/chosen": 24.71265983581543, |
|
"logits/rejected": 24.88272476196289, |
|
"logps/chosen": -37.392120361328125, |
|
"logps/rejected": -31.838241577148438, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5502563714981079, |
|
"rewards/margins": 0.21214011311531067, |
|
"rewards/rejected": -0.762396514415741, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10710460549803641, |
|
"eval_logits/chosen": 24.666624069213867, |
|
"eval_logits/rejected": 24.732454299926758, |
|
"eval_logps/chosen": -43.291751861572266, |
|
"eval_logps/rejected": -40.54462432861328, |
|
"eval_loss": 0.5352813601493835, |
|
"eval_rewards/accuracies": 0.7222222089767456, |
|
"eval_rewards/chosen": -0.16518594324588776, |
|
"eval_rewards/margins": 0.6855509877204895, |
|
"eval_rewards/rejected": -0.8507369160652161, |
|
"eval_runtime": 97.8407, |
|
"eval_samples_per_second": 14.636, |
|
"eval_steps_per_second": 0.92, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11067475901463764, |
|
"grad_norm": 7.402091979980469, |
|
"learning_rate": 4.446626204926812e-05, |
|
"logits/chosen": 25.197057723999023, |
|
"logits/rejected": 25.214290618896484, |
|
"logps/chosen": -53.834320068359375, |
|
"logps/rejected": -47.46216583251953, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2350492775440216, |
|
"rewards/margins": 0.7798230051994324, |
|
"rewards/rejected": -1.0148723125457764, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11424491253123885, |
|
"grad_norm": 7.803309917449951, |
|
"learning_rate": 4.4287754373438064e-05, |
|
"logits/chosen": 25.673919677734375, |
|
"logits/rejected": 26.028600692749023, |
|
"logps/chosen": -47.897422790527344, |
|
"logps/rejected": -32.04759216308594, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.12382154166698456, |
|
"rewards/margins": 1.0582133531570435, |
|
"rewards/rejected": -0.934391975402832, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11781506604784006, |
|
"grad_norm": 12.149727821350098, |
|
"learning_rate": 4.4109246697608e-05, |
|
"logits/chosen": 26.565237045288086, |
|
"logits/rejected": 26.2646541595459, |
|
"logps/chosen": -45.204307556152344, |
|
"logps/rejected": -54.38944625854492, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.24066975712776184, |
|
"rewards/margins": 0.3767842650413513, |
|
"rewards/rejected": -0.6174539923667908, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12138521956444127, |
|
"grad_norm": 11.431114196777344, |
|
"learning_rate": 4.393073902177794e-05, |
|
"logits/chosen": 24.764408111572266, |
|
"logits/rejected": 23.79238510131836, |
|
"logps/chosen": -22.893648147583008, |
|
"logps/rejected": -48.49061584472656, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6392322778701782, |
|
"rewards/margins": 0.1874510645866394, |
|
"rewards/rejected": -0.8266833424568176, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12495537308104249, |
|
"grad_norm": 13.246600151062012, |
|
"learning_rate": 4.375223134594788e-05, |
|
"logits/chosen": 23.024621963500977, |
|
"logits/rejected": 22.838958740234375, |
|
"logps/chosen": -46.08310317993164, |
|
"logps/rejected": -63.932945251464844, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7620424032211304, |
|
"rewards/margins": 0.4331347346305847, |
|
"rewards/rejected": -1.1951771974563599, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12852552659764369, |
|
"grad_norm": 6.875857830047607, |
|
"learning_rate": 4.357372367011782e-05, |
|
"logits/chosen": 24.33723258972168, |
|
"logits/rejected": 24.633174896240234, |
|
"logps/chosen": -36.276363372802734, |
|
"logps/rejected": -41.762107849121094, |
|
"loss": 0.538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5144018530845642, |
|
"rewards/margins": 0.7962431907653809, |
|
"rewards/rejected": -1.3106449842453003, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1320956801142449, |
|
"grad_norm": 8.577788352966309, |
|
"learning_rate": 4.339521599428776e-05, |
|
"logits/chosen": 18.497318267822266, |
|
"logits/rejected": 18.542171478271484, |
|
"logps/chosen": -54.802284240722656, |
|
"logps/rejected": -40.361900329589844, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.43400055170059204, |
|
"rewards/margins": 0.37733301520347595, |
|
"rewards/rejected": -0.8113336563110352, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13566583363084614, |
|
"grad_norm": 7.8961052894592285, |
|
"learning_rate": 4.3216708318457694e-05, |
|
"logits/chosen": 20.79564094543457, |
|
"logits/rejected": 21.202953338623047, |
|
"logps/chosen": -46.6911735534668, |
|
"logps/rejected": -25.177892684936523, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20933791995048523, |
|
"rewards/margins": 0.9722681045532227, |
|
"rewards/rejected": -1.1816058158874512, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13923598714744734, |
|
"grad_norm": 6.78292989730835, |
|
"learning_rate": 4.303820064262764e-05, |
|
"logits/chosen": 24.46206283569336, |
|
"logits/rejected": 24.468271255493164, |
|
"logps/chosen": -17.142438888549805, |
|
"logps/rejected": -8.94920539855957, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5404640436172485, |
|
"rewards/margins": -0.012588715180754662, |
|
"rewards/rejected": -0.5278753042221069, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14280614066404856, |
|
"grad_norm": 6.525446891784668, |
|
"learning_rate": 4.2859692966797575e-05, |
|
"logits/chosen": 27.94113540649414, |
|
"logits/rejected": 27.818674087524414, |
|
"logps/chosen": -28.574228286743164, |
|
"logps/rejected": -41.67361831665039, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17134228348731995, |
|
"rewards/margins": 0.6803145408630371, |
|
"rewards/rejected": -0.8516567945480347, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14280614066404856, |
|
"eval_logits/chosen": 23.894601821899414, |
|
"eval_logits/rejected": 23.941455841064453, |
|
"eval_logps/chosen": -42.414432525634766, |
|
"eval_logps/rejected": -40.24538040161133, |
|
"eval_loss": 0.5327069759368896, |
|
"eval_rewards/accuracies": 0.7944444417953491, |
|
"eval_rewards/chosen": -0.07745402306318283, |
|
"eval_rewards/margins": 0.7433586716651917, |
|
"eval_rewards/rejected": -0.8208127617835999, |
|
"eval_runtime": 97.7382, |
|
"eval_samples_per_second": 14.651, |
|
"eval_steps_per_second": 0.921, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14637629418064976, |
|
"grad_norm": 7.846954345703125, |
|
"learning_rate": 4.268118529096751e-05, |
|
"logits/chosen": 25.257925033569336, |
|
"logits/rejected": 25.105932235717773, |
|
"logps/chosen": -45.8572883605957, |
|
"logps/rejected": -26.36654281616211, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5164770483970642, |
|
"rewards/margins": 0.7112953662872314, |
|
"rewards/rejected": -1.2277723550796509, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.149946447697251, |
|
"grad_norm": 7.722918510437012, |
|
"learning_rate": 4.250267761513746e-05, |
|
"logits/chosen": 21.332164764404297, |
|
"logits/rejected": 21.323444366455078, |
|
"logps/chosen": -37.462974548339844, |
|
"logps/rejected": -28.17437171936035, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.4826648235321045, |
|
"rewards/margins": 0.2844996452331543, |
|
"rewards/rejected": -0.7671645283699036, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1535166012138522, |
|
"grad_norm": 8.375965118408203, |
|
"learning_rate": 4.232416993930739e-05, |
|
"logits/chosen": 18.349498748779297, |
|
"logits/rejected": 18.25157356262207, |
|
"logps/chosen": -53.23881912231445, |
|
"logps/rejected": -62.28485107421875, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14654412865638733, |
|
"rewards/margins": 0.404157817363739, |
|
"rewards/rejected": -0.550701916217804, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15708675473045342, |
|
"grad_norm": 6.275489807128906, |
|
"learning_rate": 4.214566226347733e-05, |
|
"logits/chosen": 22.851360321044922, |
|
"logits/rejected": 23.03590202331543, |
|
"logps/chosen": -32.48253631591797, |
|
"logps/rejected": -36.65414047241211, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13281583786010742, |
|
"rewards/margins": 1.3949477672576904, |
|
"rewards/rejected": -1.262131929397583, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16065690824705461, |
|
"grad_norm": 9.541354179382324, |
|
"learning_rate": 4.1967154587647275e-05, |
|
"logits/chosen": 25.5571231842041, |
|
"logits/rejected": 25.35956573486328, |
|
"logps/chosen": -33.83552169799805, |
|
"logps/rejected": -34.80134963989258, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1620870977640152, |
|
"rewards/margins": 1.014452576637268, |
|
"rewards/rejected": -1.176539659500122, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16422706176365584, |
|
"grad_norm": 4.77647590637207, |
|
"learning_rate": 4.1788646911817206e-05, |
|
"logits/chosen": 25.47999382019043, |
|
"logits/rejected": 24.808551788330078, |
|
"logps/chosen": -39.025047302246094, |
|
"logps/rejected": -71.5890884399414, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2537313401699066, |
|
"rewards/margins": 1.16329026222229, |
|
"rewards/rejected": -1.4170215129852295, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.16779721528025704, |
|
"grad_norm": 4.253603458404541, |
|
"learning_rate": 4.161013923598715e-05, |
|
"logits/chosen": 20.190471649169922, |
|
"logits/rejected": 20.048721313476562, |
|
"logps/chosen": -68.62611389160156, |
|
"logps/rejected": -72.08489227294922, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9637469053268433, |
|
"rewards/margins": 0.6993335485458374, |
|
"rewards/rejected": -1.6630805730819702, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.17136736879685827, |
|
"grad_norm": 9.119401931762695, |
|
"learning_rate": 4.1431631560157094e-05, |
|
"logits/chosen": 19.260848999023438, |
|
"logits/rejected": 18.897022247314453, |
|
"logps/chosen": -39.502891540527344, |
|
"logps/rejected": -37.60371398925781, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7391226291656494, |
|
"rewards/margins": 0.16527147591114044, |
|
"rewards/rejected": -0.9043940305709839, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1749375223134595, |
|
"grad_norm": 6.5403876304626465, |
|
"learning_rate": 4.1253123884327025e-05, |
|
"logits/chosen": 24.00229263305664, |
|
"logits/rejected": 24.137210845947266, |
|
"logps/chosen": -45.11564636230469, |
|
"logps/rejected": -63.3198356628418, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5857070684432983, |
|
"rewards/margins": 1.3458783626556396, |
|
"rewards/rejected": -1.9315853118896484, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1785076758300607, |
|
"grad_norm": 6.147622108459473, |
|
"learning_rate": 4.107461620849697e-05, |
|
"logits/chosen": 20.132909774780273, |
|
"logits/rejected": 20.044574737548828, |
|
"logps/chosen": -40.866424560546875, |
|
"logps/rejected": -50.83662414550781, |
|
"loss": 0.4606, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.40833067893981934, |
|
"rewards/margins": 0.7871369123458862, |
|
"rewards/rejected": -1.195467472076416, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1785076758300607, |
|
"eval_logits/chosen": 22.78708839416504, |
|
"eval_logits/rejected": 22.809024810791016, |
|
"eval_logps/chosen": -47.07508850097656, |
|
"eval_logps/rejected": -45.6721076965332, |
|
"eval_loss": 0.5191536545753479, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.5435196161270142, |
|
"eval_rewards/margins": 0.8199654221534729, |
|
"eval_rewards/rejected": -1.3634849786758423, |
|
"eval_runtime": 97.7087, |
|
"eval_samples_per_second": 14.656, |
|
"eval_steps_per_second": 0.921, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18207782934666192, |
|
"grad_norm": 12.392146110534668, |
|
"learning_rate": 4.0896108532666906e-05, |
|
"logits/chosen": 19.699203491210938, |
|
"logits/rejected": 19.52513885498047, |
|
"logps/chosen": -57.56768798828125, |
|
"logps/rejected": -71.2248764038086, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3169898986816406, |
|
"rewards/margins": 1.7359142303466797, |
|
"rewards/rejected": -2.0529043674468994, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.18564798286326312, |
|
"grad_norm": 5.329209804534912, |
|
"learning_rate": 4.071760085683684e-05, |
|
"logits/chosen": 20.416704177856445, |
|
"logits/rejected": 20.525035858154297, |
|
"logps/chosen": -38.03459930419922, |
|
"logps/rejected": -38.064720153808594, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5582042932510376, |
|
"rewards/margins": 0.7457180023193359, |
|
"rewards/rejected": -1.3039222955703735, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.18921813637986434, |
|
"grad_norm": 5.097146987915039, |
|
"learning_rate": 4.053909318100679e-05, |
|
"logits/chosen": 22.233572006225586, |
|
"logits/rejected": 22.370027542114258, |
|
"logps/chosen": -15.693742752075195, |
|
"logps/rejected": -16.01146125793457, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.28356021642684937, |
|
"rewards/margins": 0.759593665599823, |
|
"rewards/rejected": -1.0431538820266724, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.19278828989646554, |
|
"grad_norm": 10.173523902893066, |
|
"learning_rate": 4.0360585505176725e-05, |
|
"logits/chosen": 22.36469268798828, |
|
"logits/rejected": 22.462005615234375, |
|
"logps/chosen": -22.046735763549805, |
|
"logps/rejected": -29.99558448791504, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5353186726570129, |
|
"rewards/margins": 0.14299364387989044, |
|
"rewards/rejected": -0.6783123016357422, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.19635844341306677, |
|
"grad_norm": 4.725411891937256, |
|
"learning_rate": 4.018207782934666e-05, |
|
"logits/chosen": 22.934490203857422, |
|
"logits/rejected": 22.83697509765625, |
|
"logps/chosen": -50.47562789916992, |
|
"logps/rejected": -39.98368453979492, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5939350724220276, |
|
"rewards/margins": 0.7087148427963257, |
|
"rewards/rejected": -1.3026498556137085, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19992859692966797, |
|
"grad_norm": 16.693065643310547, |
|
"learning_rate": 4.00035701535166e-05, |
|
"logits/chosen": 26.147781372070312, |
|
"logits/rejected": 25.938640594482422, |
|
"logps/chosen": -66.49282836914062, |
|
"logps/rejected": -62.023353576660156, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1642974466085434, |
|
"rewards/margins": 1.2225383520126343, |
|
"rewards/rejected": -1.3868358135223389, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2034987504462692, |
|
"grad_norm": 4.374217987060547, |
|
"learning_rate": 3.982506247768654e-05, |
|
"logits/chosen": 23.22292709350586, |
|
"logits/rejected": 22.756343841552734, |
|
"logps/chosen": -47.733238220214844, |
|
"logps/rejected": -65.7103042602539, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5647065043449402, |
|
"rewards/margins": 0.45924702286720276, |
|
"rewards/rejected": -1.0239535570144653, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2070689039628704, |
|
"grad_norm": 5.655895233154297, |
|
"learning_rate": 3.964655480185648e-05, |
|
"logits/chosen": 22.537179946899414, |
|
"logits/rejected": 22.3128662109375, |
|
"logps/chosen": -33.57523727416992, |
|
"logps/rejected": -46.982025146484375, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5356764793395996, |
|
"rewards/margins": 1.0464246273040771, |
|
"rewards/rejected": -1.5821009874343872, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.21063905747947162, |
|
"grad_norm": 8.67690372467041, |
|
"learning_rate": 3.946804712602642e-05, |
|
"logits/chosen": 24.053617477416992, |
|
"logits/rejected": 24.086387634277344, |
|
"logps/chosen": -30.85910415649414, |
|
"logps/rejected": -17.82439422607422, |
|
"loss": 0.4658, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6462145447731018, |
|
"rewards/margins": 0.6275963187217712, |
|
"rewards/rejected": -1.273810863494873, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.21420921099607282, |
|
"grad_norm": 12.361294746398926, |
|
"learning_rate": 3.928953945019636e-05, |
|
"logits/chosen": 19.749366760253906, |
|
"logits/rejected": 20.843948364257812, |
|
"logps/chosen": -47.06502151489258, |
|
"logps/rejected": -38.376441955566406, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.37672120332717896, |
|
"rewards/margins": 1.4848692417144775, |
|
"rewards/rejected": -1.8615906238555908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21420921099607282, |
|
"eval_logits/chosen": 22.7338924407959, |
|
"eval_logits/rejected": 22.768844604492188, |
|
"eval_logps/chosen": -43.45345687866211, |
|
"eval_logps/rejected": -43.90707778930664, |
|
"eval_loss": 0.5074149966239929, |
|
"eval_rewards/accuracies": 0.7944444417953491, |
|
"eval_rewards/chosen": -0.18135611712932587, |
|
"eval_rewards/margins": 1.0056260824203491, |
|
"eval_rewards/rejected": -1.1869820356369019, |
|
"eval_runtime": 97.81, |
|
"eval_samples_per_second": 14.641, |
|
"eval_steps_per_second": 0.92, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21777936451267405, |
|
"grad_norm": 11.45566177368164, |
|
"learning_rate": 3.91110317743663e-05, |
|
"logits/chosen": 17.696277618408203, |
|
"logits/rejected": 18.161190032958984, |
|
"logps/chosen": -61.38924026489258, |
|
"logps/rejected": -28.698049545288086, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9514519572257996, |
|
"rewards/margins": -0.2889273166656494, |
|
"rewards/rejected": -0.6625245809555054, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.22134951802927527, |
|
"grad_norm": 11.354263305664062, |
|
"learning_rate": 3.8932524098536237e-05, |
|
"logits/chosen": 23.74215316772461, |
|
"logits/rejected": 23.932151794433594, |
|
"logps/chosen": -39.733665466308594, |
|
"logps/rejected": -52.69596481323242, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11807258427143097, |
|
"rewards/margins": 0.5899603962898254, |
|
"rewards/rejected": -0.7080329656600952, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.22491967154587647, |
|
"grad_norm": 8.663636207580566, |
|
"learning_rate": 3.875401642270618e-05, |
|
"logits/chosen": 20.996337890625, |
|
"logits/rejected": 21.191204071044922, |
|
"logps/chosen": -42.43983840942383, |
|
"logps/rejected": -30.458621978759766, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20720069110393524, |
|
"rewards/margins": 0.8300245404243469, |
|
"rewards/rejected": -1.0372252464294434, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2284898250624777, |
|
"grad_norm": 6.389614105224609, |
|
"learning_rate": 3.857550874687612e-05, |
|
"logits/chosen": 23.66379737854004, |
|
"logits/rejected": 24.068124771118164, |
|
"logps/chosen": -56.00236129760742, |
|
"logps/rejected": -52.503746032714844, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09035615622997284, |
|
"rewards/margins": 1.4734671115875244, |
|
"rewards/rejected": -1.5638234615325928, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2320599785790789, |
|
"grad_norm": 5.053744792938232, |
|
"learning_rate": 3.8397001071046055e-05, |
|
"logits/chosen": 24.763042449951172, |
|
"logits/rejected": 24.731813430786133, |
|
"logps/chosen": -34.47309494018555, |
|
"logps/rejected": -24.708209991455078, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.23519682884216309, |
|
"rewards/margins": 0.7381345629692078, |
|
"rewards/rejected": -0.9733314514160156, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.23563013209568012, |
|
"grad_norm": 14.828216552734375, |
|
"learning_rate": 3.8218493395216e-05, |
|
"logits/chosen": 23.33843421936035, |
|
"logits/rejected": 23.605775833129883, |
|
"logps/chosen": -38.09233856201172, |
|
"logps/rejected": -36.02016830444336, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3239721357822418, |
|
"rewards/margins": 0.35063061118125916, |
|
"rewards/rejected": -0.6746028661727905, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.23920028561228132, |
|
"grad_norm": 6.591442108154297, |
|
"learning_rate": 3.8039985719385937e-05, |
|
"logits/chosen": 22.064315795898438, |
|
"logits/rejected": 22.158384323120117, |
|
"logps/chosen": -28.91001319885254, |
|
"logps/rejected": -23.76300048828125, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.16806618869304657, |
|
"rewards/margins": 0.20326104760169983, |
|
"rewards/rejected": -0.3713272213935852, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.24277043912888255, |
|
"grad_norm": 9.390283584594727, |
|
"learning_rate": 3.7861478043555874e-05, |
|
"logits/chosen": 23.296916961669922, |
|
"logits/rejected": 23.495471954345703, |
|
"logps/chosen": -51.231292724609375, |
|
"logps/rejected": -32.36138916015625, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4357140064239502, |
|
"rewards/margins": 0.5686477422714233, |
|
"rewards/rejected": -1.0043617486953735, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.24634059264548375, |
|
"grad_norm": 7.05921745300293, |
|
"learning_rate": 3.768297036772581e-05, |
|
"logits/chosen": 24.67749786376953, |
|
"logits/rejected": 25.084362030029297, |
|
"logps/chosen": -61.90864944458008, |
|
"logps/rejected": -47.69279861450195, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0670510530471802, |
|
"rewards/margins": 0.16362956166267395, |
|
"rewards/rejected": -1.2306805849075317, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.24991074616208497, |
|
"grad_norm": 6.927039623260498, |
|
"learning_rate": 3.7504462691895755e-05, |
|
"logits/chosen": 22.120304107666016, |
|
"logits/rejected": 22.317134857177734, |
|
"logps/chosen": -47.007747650146484, |
|
"logps/rejected": -29.523571014404297, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47515907883644104, |
|
"rewards/margins": 0.9105289578437805, |
|
"rewards/rejected": -1.385688066482544, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24991074616208497, |
|
"eval_logits/chosen": 23.21824073791504, |
|
"eval_logits/rejected": 23.2376651763916, |
|
"eval_logps/chosen": -43.74300765991211, |
|
"eval_logps/rejected": -43.685081481933594, |
|
"eval_loss": 0.5032530426979065, |
|
"eval_rewards/accuracies": 0.7888888716697693, |
|
"eval_rewards/chosen": -0.21031144261360168, |
|
"eval_rewards/margins": 0.9544711709022522, |
|
"eval_rewards/rejected": -1.1647826433181763, |
|
"eval_runtime": 97.9268, |
|
"eval_samples_per_second": 14.623, |
|
"eval_steps_per_second": 0.919, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2534808996786862, |
|
"grad_norm": 6.4891767501831055, |
|
"learning_rate": 3.732595501606569e-05, |
|
"logits/chosen": 23.012346267700195, |
|
"logits/rejected": 23.133983612060547, |
|
"logps/chosen": -32.99153137207031, |
|
"logps/rejected": -30.8564453125, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13405704498291016, |
|
"rewards/margins": 0.8106959462165833, |
|
"rewards/rejected": -0.9447528719902039, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.25705105319528737, |
|
"grad_norm": 7.658117771148682, |
|
"learning_rate": 3.714744734023563e-05, |
|
"logits/chosen": 22.539342880249023, |
|
"logits/rejected": 22.32984161376953, |
|
"logps/chosen": -41.30803298950195, |
|
"logps/rejected": -37.77648162841797, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5999592542648315, |
|
"rewards/margins": 0.26343995332717896, |
|
"rewards/rejected": -0.8633992075920105, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2606212067118886, |
|
"grad_norm": 7.918398857116699, |
|
"learning_rate": 3.6968939664405574e-05, |
|
"logits/chosen": 22.939983367919922, |
|
"logits/rejected": 23.18207359313965, |
|
"logps/chosen": -46.46357345581055, |
|
"logps/rejected": -35.31732940673828, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.043735720217227936, |
|
"rewards/margins": 0.8847159147262573, |
|
"rewards/rejected": -0.928451657295227, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2641913602284898, |
|
"grad_norm": 6.20953893661499, |
|
"learning_rate": 3.6790431988575504e-05, |
|
"logits/chosen": 28.148590087890625, |
|
"logits/rejected": 27.938528060913086, |
|
"logps/chosen": -51.41779327392578, |
|
"logps/rejected": -52.49924850463867, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0679657459259033, |
|
"rewards/margins": 0.6626083254814148, |
|
"rewards/rejected": -1.7305742502212524, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.26776151374509105, |
|
"grad_norm": 10.36694049835205, |
|
"learning_rate": 3.661192431274545e-05, |
|
"logits/chosen": 24.841873168945312, |
|
"logits/rejected": 25.12506675720215, |
|
"logps/chosen": -21.518558502197266, |
|
"logps/rejected": -28.731252670288086, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3871726393699646, |
|
"rewards/margins": 0.5390633344650269, |
|
"rewards/rejected": -0.9262359738349915, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2713316672616923, |
|
"grad_norm": 6.616639137268066, |
|
"learning_rate": 3.643341663691539e-05, |
|
"logits/chosen": 23.483844757080078, |
|
"logits/rejected": 23.462881088256836, |
|
"logps/chosen": -20.33786964416504, |
|
"logps/rejected": -41.595367431640625, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.26747018098831177, |
|
"rewards/margins": 0.5755705833435059, |
|
"rewards/rejected": -0.8430407643318176, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.27490182077829345, |
|
"grad_norm": 7.002477169036865, |
|
"learning_rate": 3.625490896108533e-05, |
|
"logits/chosen": 23.841524124145508, |
|
"logits/rejected": 23.905414581298828, |
|
"logps/chosen": -54.80754852294922, |
|
"logps/rejected": -60.40642166137695, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7754853367805481, |
|
"rewards/margins": 0.7477506995201111, |
|
"rewards/rejected": -1.5232360363006592, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2784719742948947, |
|
"grad_norm": 4.716040134429932, |
|
"learning_rate": 3.607640128525527e-05, |
|
"logits/chosen": 21.66192054748535, |
|
"logits/rejected": 21.678348541259766, |
|
"logps/chosen": -59.828773498535156, |
|
"logps/rejected": -51.79111862182617, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5894995927810669, |
|
"rewards/margins": 1.3722957372665405, |
|
"rewards/rejected": -1.9617952108383179, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2820421278114959, |
|
"grad_norm": 8.946788787841797, |
|
"learning_rate": 3.589789360942521e-05, |
|
"logits/chosen": 24.498876571655273, |
|
"logits/rejected": 24.739574432373047, |
|
"logps/chosen": -56.08790969848633, |
|
"logps/rejected": -50.47938919067383, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2977168560028076, |
|
"rewards/margins": 0.1555618941783905, |
|
"rewards/rejected": -1.45327889919281, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.28561228132809713, |
|
"grad_norm": 5.08765983581543, |
|
"learning_rate": 3.571938593359515e-05, |
|
"logits/chosen": 26.44881820678711, |
|
"logits/rejected": 26.592966079711914, |
|
"logps/chosen": -26.545644760131836, |
|
"logps/rejected": -46.947174072265625, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5634913444519043, |
|
"rewards/margins": 0.8930961489677429, |
|
"rewards/rejected": -1.456587553024292, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28561228132809713, |
|
"eval_logits/chosen": 23.41709327697754, |
|
"eval_logits/rejected": 23.457468032836914, |
|
"eval_logps/chosen": -47.103492736816406, |
|
"eval_logps/rejected": -47.543006896972656, |
|
"eval_loss": 0.4901362657546997, |
|
"eval_rewards/accuracies": 0.7833333611488342, |
|
"eval_rewards/chosen": -0.5463601350784302, |
|
"eval_rewards/margins": 1.0042147636413574, |
|
"eval_rewards/rejected": -1.5505748987197876, |
|
"eval_runtime": 97.7883, |
|
"eval_samples_per_second": 14.644, |
|
"eval_steps_per_second": 0.92, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2891824348446983, |
|
"grad_norm": 5.668002605438232, |
|
"learning_rate": 3.5540878257765086e-05, |
|
"logits/chosen": 15.257682800292969, |
|
"logits/rejected": 15.15269947052002, |
|
"logps/chosen": -28.859485626220703, |
|
"logps/rejected": -39.70137023925781, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8887822031974792, |
|
"rewards/margins": 0.40193691849708557, |
|
"rewards/rejected": -1.2907191514968872, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2927525883612995, |
|
"grad_norm": 6.119519233703613, |
|
"learning_rate": 3.536237058193502e-05, |
|
"logits/chosen": 20.487186431884766, |
|
"logits/rejected": 20.569347381591797, |
|
"logps/chosen": -26.315814971923828, |
|
"logps/rejected": -24.049549102783203, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2173776626586914, |
|
"rewards/margins": 0.8700194358825684, |
|
"rewards/rejected": -1.0873970985412598, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.29632274187790075, |
|
"grad_norm": 9.75696849822998, |
|
"learning_rate": 3.518386290610497e-05, |
|
"logits/chosen": 28.570262908935547, |
|
"logits/rejected": 29.040294647216797, |
|
"logps/chosen": -46.7459831237793, |
|
"logps/rejected": -25.238162994384766, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20457518100738525, |
|
"rewards/margins": 1.082558274269104, |
|
"rewards/rejected": -1.2871334552764893, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.299892895394502, |
|
"grad_norm": 4.077060699462891, |
|
"learning_rate": 3.5005355230274904e-05, |
|
"logits/chosen": 22.926097869873047, |
|
"logits/rejected": 23.03900718688965, |
|
"logps/chosen": -43.289085388183594, |
|
"logps/rejected": -39.03928756713867, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.38202500343322754, |
|
"rewards/margins": 0.7754106521606445, |
|
"rewards/rejected": -1.1574357748031616, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.30346304891110315, |
|
"grad_norm": 6.117227554321289, |
|
"learning_rate": 3.482684755444484e-05, |
|
"logits/chosen": 26.401382446289062, |
|
"logits/rejected": 25.781219482421875, |
|
"logps/chosen": -30.348541259765625, |
|
"logps/rejected": -47.32704162597656, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6118322610855103, |
|
"rewards/margins": 1.2445160150527954, |
|
"rewards/rejected": -1.8563482761383057, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3070332024277044, |
|
"grad_norm": 11.844883918762207, |
|
"learning_rate": 3.4648339878614786e-05, |
|
"logits/chosen": 23.075183868408203, |
|
"logits/rejected": 22.080493927001953, |
|
"logps/chosen": -49.947914123535156, |
|
"logps/rejected": -58.764617919921875, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0053069591522217, |
|
"rewards/margins": 0.48744481801986694, |
|
"rewards/rejected": -1.4927518367767334, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3106033559443056, |
|
"grad_norm": 12.23678207397461, |
|
"learning_rate": 3.4469832202784716e-05, |
|
"logits/chosen": 26.47686767578125, |
|
"logits/rejected": 26.53334617614746, |
|
"logps/chosen": -27.09230613708496, |
|
"logps/rejected": -39.659324645996094, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6957861185073853, |
|
"rewards/margins": 0.6590690016746521, |
|
"rewards/rejected": -1.3548550605773926, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.31417350946090683, |
|
"grad_norm": 8.197884559631348, |
|
"learning_rate": 3.429132452695466e-05, |
|
"logits/chosen": 22.180370330810547, |
|
"logits/rejected": 22.334489822387695, |
|
"logps/chosen": -45.77663040161133, |
|
"logps/rejected": -41.45575714111328, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.40475016832351685, |
|
"rewards/margins": 1.7321338653564453, |
|
"rewards/rejected": -2.1368842124938965, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.31774366297750806, |
|
"grad_norm": 6.394644260406494, |
|
"learning_rate": 3.4112816851124604e-05, |
|
"logits/chosen": 21.572330474853516, |
|
"logits/rejected": 21.563013076782227, |
|
"logps/chosen": -42.58979034423828, |
|
"logps/rejected": -32.326332092285156, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -1.3842201232910156, |
|
"rewards/margins": -0.4129442572593689, |
|
"rewards/rejected": -0.9712759256362915, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.32131381649410923, |
|
"grad_norm": 11.288488388061523, |
|
"learning_rate": 3.3934309175294535e-05, |
|
"logits/chosen": 23.486909866333008, |
|
"logits/rejected": 23.229293823242188, |
|
"logps/chosen": -26.536273956298828, |
|
"logps/rejected": -35.171104431152344, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.39322757720947266, |
|
"rewards/margins": 0.7671092748641968, |
|
"rewards/rejected": -1.160336971282959, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32131381649410923, |
|
"eval_logits/chosen": 22.803043365478516, |
|
"eval_logits/rejected": 22.84783172607422, |
|
"eval_logps/chosen": -47.309364318847656, |
|
"eval_logps/rejected": -47.91334533691406, |
|
"eval_loss": 0.4862760305404663, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.5669471025466919, |
|
"eval_rewards/margins": 1.0206618309020996, |
|
"eval_rewards/rejected": -1.587609052658081, |
|
"eval_runtime": 97.8776, |
|
"eval_samples_per_second": 14.631, |
|
"eval_steps_per_second": 0.92, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32488397001071045, |
|
"grad_norm": 9.65831470489502, |
|
"learning_rate": 3.375580149946448e-05, |
|
"logits/chosen": 26.497304916381836, |
|
"logits/rejected": 26.356307983398438, |
|
"logps/chosen": -28.236690521240234, |
|
"logps/rejected": -34.156455993652344, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3999934196472168, |
|
"rewards/margins": 0.484555184841156, |
|
"rewards/rejected": -0.884548544883728, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3284541235273117, |
|
"grad_norm": 6.470170021057129, |
|
"learning_rate": 3.357729382363442e-05, |
|
"logits/chosen": 23.454336166381836, |
|
"logits/rejected": 23.263614654541016, |
|
"logps/chosen": -65.40380859375, |
|
"logps/rejected": -39.344261169433594, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.46298104524612427, |
|
"rewards/margins": 1.2011882066726685, |
|
"rewards/rejected": -1.6641693115234375, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3320242770439129, |
|
"grad_norm": 10.672679901123047, |
|
"learning_rate": 3.3398786147804354e-05, |
|
"logits/chosen": 24.854284286499023, |
|
"logits/rejected": 24.732633590698242, |
|
"logps/chosen": -60.40422821044922, |
|
"logps/rejected": -35.107269287109375, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.48133277893066406, |
|
"rewards/margins": 0.2392297238111496, |
|
"rewards/rejected": -0.7205625176429749, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3355944305605141, |
|
"grad_norm": 11.122824668884277, |
|
"learning_rate": 3.32202784719743e-05, |
|
"logits/chosen": 25.70233726501465, |
|
"logits/rejected": 25.91379737854004, |
|
"logps/chosen": -45.25204086303711, |
|
"logps/rejected": -32.91322326660156, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7527903318405151, |
|
"rewards/margins": 0.9621620178222656, |
|
"rewards/rejected": -1.7149524688720703, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3391645840771153, |
|
"grad_norm": 8.845693588256836, |
|
"learning_rate": 3.3041770796144235e-05, |
|
"logits/chosen": 23.679813385009766, |
|
"logits/rejected": 24.00821304321289, |
|
"logps/chosen": -53.314300537109375, |
|
"logps/rejected": -57.53212356567383, |
|
"loss": 0.4488, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8183348774909973, |
|
"rewards/margins": 1.1887353658676147, |
|
"rewards/rejected": -2.007070302963257, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.34273473759371653, |
|
"grad_norm": 11.808037757873535, |
|
"learning_rate": 3.286326312031417e-05, |
|
"logits/chosen": 21.081445693969727, |
|
"logits/rejected": 20.683502197265625, |
|
"logps/chosen": -59.89850997924805, |
|
"logps/rejected": -69.1084213256836, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4903489649295807, |
|
"rewards/margins": 1.016610860824585, |
|
"rewards/rejected": -1.5069600343704224, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.34630489111031776, |
|
"grad_norm": 4.3298845291137695, |
|
"learning_rate": 3.2684755444484116e-05, |
|
"logits/chosen": 23.46924591064453, |
|
"logits/rejected": 23.299514770507812, |
|
"logps/chosen": -45.10730743408203, |
|
"logps/rejected": -34.44489288330078, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9016550779342651, |
|
"rewards/margins": 0.5788153409957886, |
|
"rewards/rejected": -1.4804704189300537, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.349875044626919, |
|
"grad_norm": 5.846831798553467, |
|
"learning_rate": 3.2506247768654054e-05, |
|
"logits/chosen": 22.685070037841797, |
|
"logits/rejected": 23.413772583007812, |
|
"logps/chosen": -42.309078216552734, |
|
"logps/rejected": -63.73997116088867, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5896610021591187, |
|
"rewards/margins": 1.2323236465454102, |
|
"rewards/rejected": -1.8219846487045288, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.35344519814352016, |
|
"grad_norm": 8.441866874694824, |
|
"learning_rate": 3.232774009282399e-05, |
|
"logits/chosen": 22.13656997680664, |
|
"logits/rejected": 22.546173095703125, |
|
"logps/chosen": -39.778053283691406, |
|
"logps/rejected": -48.562049865722656, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4805465340614319, |
|
"rewards/margins": 1.1920750141143799, |
|
"rewards/rejected": -1.6726213693618774, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3570153516601214, |
|
"grad_norm": 8.373982429504395, |
|
"learning_rate": 3.214923241699393e-05, |
|
"logits/chosen": 23.80282974243164, |
|
"logits/rejected": 23.995637893676758, |
|
"logps/chosen": -41.55686950683594, |
|
"logps/rejected": -39.182655334472656, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.36075398325920105, |
|
"rewards/margins": 1.393147349357605, |
|
"rewards/rejected": -1.7539011240005493, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3570153516601214, |
|
"eval_logits/chosen": 21.82489013671875, |
|
"eval_logits/rejected": 21.88067054748535, |
|
"eval_logps/chosen": -47.92158126831055, |
|
"eval_logps/rejected": -49.27989196777344, |
|
"eval_loss": 0.474328875541687, |
|
"eval_rewards/accuracies": 0.7888888716697693, |
|
"eval_rewards/chosen": -0.6281694173812866, |
|
"eval_rewards/margins": 1.0960942506790161, |
|
"eval_rewards/rejected": -1.7242639064788818, |
|
"eval_runtime": 97.8094, |
|
"eval_samples_per_second": 14.641, |
|
"eval_steps_per_second": 0.92, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3605855051767226, |
|
"grad_norm": 10.327641487121582, |
|
"learning_rate": 3.197072474116387e-05, |
|
"logits/chosen": 23.04856300354004, |
|
"logits/rejected": 23.33269691467285, |
|
"logps/chosen": -47.23090362548828, |
|
"logps/rejected": -35.351966857910156, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2200241088867188, |
|
"rewards/margins": 0.10034622997045517, |
|
"rewards/rejected": -1.3203704357147217, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.36415565869332384, |
|
"grad_norm": 13.408589363098145, |
|
"learning_rate": 3.179221706533381e-05, |
|
"logits/chosen": 20.803436279296875, |
|
"logits/rejected": 20.647022247314453, |
|
"logps/chosen": -42.571128845214844, |
|
"logps/rejected": -34.00419235229492, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.104473352432251, |
|
"rewards/margins": 0.35474246740341187, |
|
"rewards/rejected": -1.4592156410217285, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.367725812209925, |
|
"grad_norm": 5.063046455383301, |
|
"learning_rate": 3.161370938950375e-05, |
|
"logits/chosen": 23.382953643798828, |
|
"logits/rejected": 23.356374740600586, |
|
"logps/chosen": -35.55125045776367, |
|
"logps/rejected": -40.64656066894531, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9775670766830444, |
|
"rewards/margins": 0.6872422099113464, |
|
"rewards/rejected": -1.664809226989746, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.37129596572652623, |
|
"grad_norm": 7.303701877593994, |
|
"learning_rate": 3.143520171367369e-05, |
|
"logits/chosen": 24.133228302001953, |
|
"logits/rejected": 24.389375686645508, |
|
"logps/chosen": -50.63660430908203, |
|
"logps/rejected": -54.03656005859375, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.38200637698173523, |
|
"rewards/margins": 1.240886926651001, |
|
"rewards/rejected": -1.6228930950164795, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.37486611924312746, |
|
"grad_norm": 6.75851583480835, |
|
"learning_rate": 3.125669403784363e-05, |
|
"logits/chosen": 19.592905044555664, |
|
"logits/rejected": 19.829776763916016, |
|
"logps/chosen": -59.42816162109375, |
|
"logps/rejected": -58.9128303527832, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.082971215248108, |
|
"rewards/margins": 0.658551037311554, |
|
"rewards/rejected": -1.7415224313735962, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3784362727597287, |
|
"grad_norm": 8.15499210357666, |
|
"learning_rate": 3.1078186362013566e-05, |
|
"logits/chosen": 25.108243942260742, |
|
"logits/rejected": 25.314802169799805, |
|
"logps/chosen": -44.924949645996094, |
|
"logps/rejected": -37.734554290771484, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3476589620113373, |
|
"rewards/margins": 1.265342354774475, |
|
"rewards/rejected": -1.6130014657974243, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.38200642627632986, |
|
"grad_norm": 14.444954872131348, |
|
"learning_rate": 3.089967868618351e-05, |
|
"logits/chosen": 24.31263542175293, |
|
"logits/rejected": 24.08812141418457, |
|
"logps/chosen": -42.63981246948242, |
|
"logps/rejected": -52.27241897583008, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5805258750915527, |
|
"rewards/margins": 0.8533035516738892, |
|
"rewards/rejected": -1.4338295459747314, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3855765797929311, |
|
"grad_norm": 7.73518180847168, |
|
"learning_rate": 3.072117101035345e-05, |
|
"logits/chosen": 25.645715713500977, |
|
"logits/rejected": 25.8345947265625, |
|
"logps/chosen": -39.769630432128906, |
|
"logps/rejected": -35.580650329589844, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4297657907009125, |
|
"rewards/margins": 1.0592445135116577, |
|
"rewards/rejected": -1.4890105724334717, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3891467333095323, |
|
"grad_norm": 6.938913345336914, |
|
"learning_rate": 3.0542663334523384e-05, |
|
"logits/chosen": 21.569717407226562, |
|
"logits/rejected": 21.530803680419922, |
|
"logps/chosen": -42.12656021118164, |
|
"logps/rejected": -51.796173095703125, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.23102021217346191, |
|
"rewards/margins": 1.1891123056411743, |
|
"rewards/rejected": -1.4201325178146362, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.39271688682613354, |
|
"grad_norm": 14.953814506530762, |
|
"learning_rate": 3.0364155658693328e-05, |
|
"logits/chosen": 23.64889144897461, |
|
"logits/rejected": 24.02678680419922, |
|
"logps/chosen": -32.63819122314453, |
|
"logps/rejected": -34.16315841674805, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6592009663581848, |
|
"rewards/margins": 1.2730789184570312, |
|
"rewards/rejected": -1.9322798252105713, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.39271688682613354, |
|
"eval_logits/chosen": 22.99826431274414, |
|
"eval_logits/rejected": 23.05336570739746, |
|
"eval_logps/chosen": -45.58966064453125, |
|
"eval_logps/rejected": -47.14959716796875, |
|
"eval_loss": 0.468368798494339, |
|
"eval_rewards/accuracies": 0.8055555820465088, |
|
"eval_rewards/chosen": -0.3949766457080841, |
|
"eval_rewards/margins": 1.116257905960083, |
|
"eval_rewards/rejected": -1.5112345218658447, |
|
"eval_runtime": 97.8365, |
|
"eval_samples_per_second": 14.637, |
|
"eval_steps_per_second": 0.92, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.39628704034273476, |
|
"grad_norm": 8.018348693847656, |
|
"learning_rate": 3.0185647982863262e-05, |
|
"logits/chosen": 21.868206024169922, |
|
"logits/rejected": 21.934436798095703, |
|
"logps/chosen": -12.12812614440918, |
|
"logps/rejected": -39.364479064941406, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7536962032318115, |
|
"rewards/margins": 0.8047281503677368, |
|
"rewards/rejected": -1.5584242343902588, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.39985719385933594, |
|
"grad_norm": 14.252638816833496, |
|
"learning_rate": 3.0007140307033206e-05, |
|
"logits/chosen": 21.122318267822266, |
|
"logits/rejected": 21.303096771240234, |
|
"logps/chosen": -25.239931106567383, |
|
"logps/rejected": -44.85738754272461, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9396845102310181, |
|
"rewards/margins": 0.4221257269382477, |
|
"rewards/rejected": -1.3618100881576538, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.40342734737593716, |
|
"grad_norm": 10.65608024597168, |
|
"learning_rate": 2.982863263120314e-05, |
|
"logits/chosen": 27.12192153930664, |
|
"logits/rejected": 27.36038589477539, |
|
"logps/chosen": -30.87465476989746, |
|
"logps/rejected": -33.55220031738281, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.76970374584198, |
|
"rewards/margins": 0.6138733625411987, |
|
"rewards/rejected": -1.3835771083831787, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4069975008925384, |
|
"grad_norm": 8.322869300842285, |
|
"learning_rate": 2.965012495537308e-05, |
|
"logits/chosen": 21.752477645874023, |
|
"logits/rejected": 21.980819702148438, |
|
"logps/chosen": -45.920928955078125, |
|
"logps/rejected": -35.359962463378906, |
|
"loss": 0.4077, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -1.1174064874649048, |
|
"rewards/margins": 0.1437036395072937, |
|
"rewards/rejected": -1.2611101865768433, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.4105676544091396, |
|
"grad_norm": 7.159435272216797, |
|
"learning_rate": 2.9471617279543025e-05, |
|
"logits/chosen": 25.57335662841797, |
|
"logits/rejected": 25.599693298339844, |
|
"logps/chosen": -35.72122573852539, |
|
"logps/rejected": -55.0676383972168, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37138763070106506, |
|
"rewards/margins": 1.5116512775421143, |
|
"rewards/rejected": -1.883039116859436, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4141378079257408, |
|
"grad_norm": 6.634488582611084, |
|
"learning_rate": 2.929310960371296e-05, |
|
"logits/chosen": 20.41391944885254, |
|
"logits/rejected": 20.523845672607422, |
|
"logps/chosen": -42.240272521972656, |
|
"logps/rejected": -59.96808624267578, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6876804828643799, |
|
"rewards/margins": 1.695648431777954, |
|
"rewards/rejected": -2.383328914642334, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.417707961442342, |
|
"grad_norm": 6.726844787597656, |
|
"learning_rate": 2.91146019278829e-05, |
|
"logits/chosen": 25.41510009765625, |
|
"logits/rejected": 25.722497940063477, |
|
"logps/chosen": -66.07826232910156, |
|
"logps/rejected": -49.482093811035156, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5786004066467285, |
|
"rewards/margins": 1.6916053295135498, |
|
"rewards/rejected": -2.2702059745788574, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.42127811495894324, |
|
"grad_norm": 5.256776332855225, |
|
"learning_rate": 2.8936094252052844e-05, |
|
"logits/chosen": 24.357757568359375, |
|
"logits/rejected": 24.397367477416992, |
|
"logps/chosen": -45.10001754760742, |
|
"logps/rejected": -79.18079376220703, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1261012703180313, |
|
"rewards/margins": 1.3214435577392578, |
|
"rewards/rejected": -1.447544813156128, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.42484826847554447, |
|
"grad_norm": 23.727317810058594, |
|
"learning_rate": 2.8757586576222777e-05, |
|
"logits/chosen": 26.00099754333496, |
|
"logits/rejected": 25.485157012939453, |
|
"logps/chosen": -36.56196594238281, |
|
"logps/rejected": -52.7928581237793, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9087120294570923, |
|
"rewards/margins": 1.3440439701080322, |
|
"rewards/rejected": -2.252755880355835, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.42841842199214564, |
|
"grad_norm": 7.284200191497803, |
|
"learning_rate": 2.8579078900392718e-05, |
|
"logits/chosen": 27.846277236938477, |
|
"logits/rejected": 28.00874900817871, |
|
"logps/chosen": -59.95824432373047, |
|
"logps/rejected": -73.82151794433594, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.411950021982193, |
|
"rewards/margins": 1.2028063535690308, |
|
"rewards/rejected": -1.6147563457489014, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.42841842199214564, |
|
"eval_logits/chosen": 22.84278106689453, |
|
"eval_logits/rejected": 22.89705467224121, |
|
"eval_logps/chosen": -46.42566680908203, |
|
"eval_logps/rejected": -48.32484817504883, |
|
"eval_loss": 0.46235060691833496, |
|
"eval_rewards/accuracies": 0.8222222328186035, |
|
"eval_rewards/chosen": -0.4785774350166321, |
|
"eval_rewards/margins": 1.1501818895339966, |
|
"eval_rewards/rejected": -1.6287592649459839, |
|
"eval_runtime": 97.7745, |
|
"eval_samples_per_second": 14.646, |
|
"eval_steps_per_second": 0.92, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.43198857550874686, |
|
"grad_norm": 10.374000549316406, |
|
"learning_rate": 2.8400571224562655e-05, |
|
"logits/chosen": 27.34882164001465, |
|
"logits/rejected": 26.882160186767578, |
|
"logps/chosen": -17.03693199157715, |
|
"logps/rejected": -37.147464752197266, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.975134015083313, |
|
"rewards/margins": 0.5972901582717896, |
|
"rewards/rejected": -1.5724241733551025, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4355587290253481, |
|
"grad_norm": 8.28000259399414, |
|
"learning_rate": 2.8222063548732596e-05, |
|
"logits/chosen": 24.054485321044922, |
|
"logits/rejected": 24.115304946899414, |
|
"logps/chosen": -40.294517517089844, |
|
"logps/rejected": -29.917953491210938, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.7177508473396301, |
|
"rewards/margins": 0.1164645105600357, |
|
"rewards/rejected": -0.8342153429985046, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.4391288825419493, |
|
"grad_norm": 8.388548851013184, |
|
"learning_rate": 2.8043555872902537e-05, |
|
"logits/chosen": 25.40005111694336, |
|
"logits/rejected": 25.363679885864258, |
|
"logps/chosen": -33.07610321044922, |
|
"logps/rejected": -46.69367218017578, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6370976567268372, |
|
"rewards/margins": 0.2812044620513916, |
|
"rewards/rejected": -0.9183019399642944, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.44269903605855054, |
|
"grad_norm": 10.984318733215332, |
|
"learning_rate": 2.7865048197072474e-05, |
|
"logits/chosen": 21.659027099609375, |
|
"logits/rejected": 21.85382843017578, |
|
"logps/chosen": -73.49362182617188, |
|
"logps/rejected": -69.6221694946289, |
|
"loss": 0.5531, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2802386283874512, |
|
"rewards/margins": 0.23997211456298828, |
|
"rewards/rejected": -1.520210862159729, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4462691895751517, |
|
"grad_norm": 6.512259006500244, |
|
"learning_rate": 2.7686540521242415e-05, |
|
"logits/chosen": 25.227018356323242, |
|
"logits/rejected": 24.743755340576172, |
|
"logps/chosen": -47.708919525146484, |
|
"logps/rejected": -72.66358947753906, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -1.2783235311508179, |
|
"rewards/margins": 0.3692890405654907, |
|
"rewards/rejected": -1.6476128101348877, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.44983934309175294, |
|
"grad_norm": 13.5468168258667, |
|
"learning_rate": 2.7508032845412352e-05, |
|
"logits/chosen": 22.638336181640625, |
|
"logits/rejected": 22.76083755493164, |
|
"logps/chosen": -33.97126007080078, |
|
"logps/rejected": -34.66718673706055, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6014578342437744, |
|
"rewards/margins": 1.0001842975616455, |
|
"rewards/rejected": -1.6016420125961304, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.45340949660835417, |
|
"grad_norm": 10.532015800476074, |
|
"learning_rate": 2.7329525169582293e-05, |
|
"logits/chosen": 20.29448699951172, |
|
"logits/rejected": 20.10554313659668, |
|
"logps/chosen": -36.69490051269531, |
|
"logps/rejected": -56.0795783996582, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4149419665336609, |
|
"rewards/margins": 0.9424688220024109, |
|
"rewards/rejected": -1.3574106693267822, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4569796501249554, |
|
"grad_norm": 5.038132190704346, |
|
"learning_rate": 2.7151017493752233e-05, |
|
"logits/chosen": 25.44969367980957, |
|
"logits/rejected": 25.12093734741211, |
|
"logps/chosen": -45.38970184326172, |
|
"logps/rejected": -48.31528091430664, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48757249116897583, |
|
"rewards/margins": 0.5879091024398804, |
|
"rewards/rejected": -1.075481653213501, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.46054980364155657, |
|
"grad_norm": 14.402963638305664, |
|
"learning_rate": 2.697250981792217e-05, |
|
"logits/chosen": 24.284452438354492, |
|
"logits/rejected": 23.935733795166016, |
|
"logps/chosen": -44.071624755859375, |
|
"logps/rejected": -44.24872589111328, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -1.4019516706466675, |
|
"rewards/margins": 0.15506015717983246, |
|
"rewards/rejected": -1.5570119619369507, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4641199571581578, |
|
"grad_norm": 7.078202247619629, |
|
"learning_rate": 2.679400214209211e-05, |
|
"logits/chosen": 20.088533401489258, |
|
"logits/rejected": 20.420536041259766, |
|
"logps/chosen": -50.00440216064453, |
|
"logps/rejected": -56.248382568359375, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6917403936386108, |
|
"rewards/margins": 1.1690679788589478, |
|
"rewards/rejected": -1.8608081340789795, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4641199571581578, |
|
"eval_logits/chosen": 21.782039642333984, |
|
"eval_logits/rejected": 21.82331085205078, |
|
"eval_logps/chosen": -46.87673568725586, |
|
"eval_logps/rejected": -49.23981475830078, |
|
"eval_loss": 0.4559466540813446, |
|
"eval_rewards/accuracies": 0.8388888835906982, |
|
"eval_rewards/chosen": -0.523684024810791, |
|
"eval_rewards/margins": 1.196572184562683, |
|
"eval_rewards/rejected": -1.7202562093734741, |
|
"eval_runtime": 97.7244, |
|
"eval_samples_per_second": 14.653, |
|
"eval_steps_per_second": 0.921, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.467690110674759, |
|
"grad_norm": 10.393937110900879, |
|
"learning_rate": 2.6615494466262052e-05, |
|
"logits/chosen": 20.803997039794922, |
|
"logits/rejected": 20.7797794342041, |
|
"logps/chosen": -44.06121826171875, |
|
"logps/rejected": -47.28874588012695, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.47202056646347046, |
|
"rewards/margins": 0.9966621398925781, |
|
"rewards/rejected": -1.4686826467514038, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.47126026419136025, |
|
"grad_norm": 6.863224983215332, |
|
"learning_rate": 2.643698679043199e-05, |
|
"logits/chosen": 19.93913459777832, |
|
"logits/rejected": 20.202621459960938, |
|
"logps/chosen": -37.7781867980957, |
|
"logps/rejected": -39.12306594848633, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0681989192962646, |
|
"rewards/margins": 0.03398240730166435, |
|
"rewards/rejected": -1.1021811962127686, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4748304177079614, |
|
"grad_norm": 9.727211952209473, |
|
"learning_rate": 2.625847911460193e-05, |
|
"logits/chosen": 19.134681701660156, |
|
"logits/rejected": 19.11941146850586, |
|
"logps/chosen": -57.157989501953125, |
|
"logps/rejected": -72.72858428955078, |
|
"loss": 0.507, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9592474102973938, |
|
"rewards/margins": 1.3726739883422852, |
|
"rewards/rejected": -2.331921339035034, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.47840057122456264, |
|
"grad_norm": 9.726844787597656, |
|
"learning_rate": 2.6079971438771867e-05, |
|
"logits/chosen": 22.685604095458984, |
|
"logits/rejected": 23.21428108215332, |
|
"logps/chosen": -57.5455436706543, |
|
"logps/rejected": -83.27085876464844, |
|
"loss": 0.379, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.020816683769226, |
|
"rewards/margins": 1.3953444957733154, |
|
"rewards/rejected": -2.416161298751831, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.48197072474116387, |
|
"grad_norm": 9.248942375183105, |
|
"learning_rate": 2.5901463762941808e-05, |
|
"logits/chosen": 19.568103790283203, |
|
"logits/rejected": 19.459789276123047, |
|
"logps/chosen": -22.311328887939453, |
|
"logps/rejected": -22.42624282836914, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12884020805358887, |
|
"rewards/margins": 1.4438714981079102, |
|
"rewards/rejected": -1.572711706161499, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4855408782577651, |
|
"grad_norm": 9.093886375427246, |
|
"learning_rate": 2.572295608711175e-05, |
|
"logits/chosen": 19.389034271240234, |
|
"logits/rejected": 19.35251235961914, |
|
"logps/chosen": -33.7999382019043, |
|
"logps/rejected": -42.553993225097656, |
|
"loss": 0.4346, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5412906408309937, |
|
"rewards/margins": 0.463665634393692, |
|
"rewards/rejected": -1.0049562454223633, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4891110317743663, |
|
"grad_norm": 11.408600807189941, |
|
"learning_rate": 2.5544448411281686e-05, |
|
"logits/chosen": 19.231670379638672, |
|
"logits/rejected": 19.629993438720703, |
|
"logps/chosen": -31.9910831451416, |
|
"logps/rejected": -59.96710205078125, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4423964023590088, |
|
"rewards/margins": 1.090103268623352, |
|
"rewards/rejected": -1.5324996709823608, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4926811852909675, |
|
"grad_norm": 5.170421123504639, |
|
"learning_rate": 2.5365940735451627e-05, |
|
"logits/chosen": 27.88661766052246, |
|
"logits/rejected": 27.40877914428711, |
|
"logps/chosen": -45.87278366088867, |
|
"logps/rejected": -45.47796630859375, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8744171261787415, |
|
"rewards/margins": 0.6523105502128601, |
|
"rewards/rejected": -1.5267279148101807, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4962513388075687, |
|
"grad_norm": 5.33081579208374, |
|
"learning_rate": 2.5187433059621564e-05, |
|
"logits/chosen": 21.309518814086914, |
|
"logits/rejected": 21.40631866455078, |
|
"logps/chosen": -36.923614501953125, |
|
"logps/rejected": -56.65825271606445, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4122052192687988, |
|
"rewards/margins": 0.27405181527137756, |
|
"rewards/rejected": -1.686257004737854, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.49982149232416995, |
|
"grad_norm": 10.743315696716309, |
|
"learning_rate": 2.5008925383791505e-05, |
|
"logits/chosen": 24.20417594909668, |
|
"logits/rejected": 24.082937240600586, |
|
"logps/chosen": -33.370670318603516, |
|
"logps/rejected": -40.95515441894531, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0275386571884155, |
|
"rewards/margins": 0.8042508959770203, |
|
"rewards/rejected": -1.8317893743515015, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.49982149232416995, |
|
"eval_logits/chosen": 20.522747039794922, |
|
"eval_logits/rejected": 20.564208984375, |
|
"eval_logps/chosen": -48.346073150634766, |
|
"eval_logps/rejected": -51.10609817504883, |
|
"eval_loss": 0.4600922167301178, |
|
"eval_rewards/accuracies": 0.800000011920929, |
|
"eval_rewards/chosen": -0.6706183552742004, |
|
"eval_rewards/margins": 1.2362663745880127, |
|
"eval_rewards/rejected": -1.906884789466858, |
|
"eval_runtime": 99.1997, |
|
"eval_samples_per_second": 14.436, |
|
"eval_steps_per_second": 0.907, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5033916458407711, |
|
"grad_norm": 7.706882476806641, |
|
"learning_rate": 2.4830417707961442e-05, |
|
"logits/chosen": 18.212251663208008, |
|
"logits/rejected": 17.935840606689453, |
|
"logps/chosen": -55.223960876464844, |
|
"logps/rejected": -50.36370086669922, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1679266691207886, |
|
"rewards/margins": 1.6370041370391846, |
|
"rewards/rejected": -2.8049309253692627, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5069617993573724, |
|
"grad_norm": 9.615565299987793, |
|
"learning_rate": 2.4651910032131383e-05, |
|
"logits/chosen": 22.241762161254883, |
|
"logits/rejected": 22.47495460510254, |
|
"logps/chosen": -67.49073791503906, |
|
"logps/rejected": -73.30606079101562, |
|
"loss": 0.4645, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1100456714630127, |
|
"rewards/margins": 1.8767004013061523, |
|
"rewards/rejected": -2.986745834350586, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5105319528739736, |
|
"grad_norm": 5.209810733795166, |
|
"learning_rate": 2.4473402356301323e-05, |
|
"logits/chosen": 22.897598266601562, |
|
"logits/rejected": 22.638469696044922, |
|
"logps/chosen": -53.96964645385742, |
|
"logps/rejected": -58.29240036010742, |
|
"loss": 0.4386, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6232610940933228, |
|
"rewards/margins": 0.9309207201004028, |
|
"rewards/rejected": -1.554181694984436, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.5141021063905747, |
|
"grad_norm": 8.026928901672363, |
|
"learning_rate": 2.429489468047126e-05, |
|
"logits/chosen": 22.4190731048584, |
|
"logits/rejected": 22.359643936157227, |
|
"logps/chosen": -48.24509048461914, |
|
"logps/rejected": -47.77333450317383, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1848137378692627, |
|
"rewards/margins": 0.29461732506752014, |
|
"rewards/rejected": -1.479430913925171, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.517672259907176, |
|
"grad_norm": 10.013033866882324, |
|
"learning_rate": 2.41163870046412e-05, |
|
"logits/chosen": 22.765010833740234, |
|
"logits/rejected": 22.88141441345215, |
|
"logps/chosen": -54.80376052856445, |
|
"logps/rejected": -59.78020477294922, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1334106922149658, |
|
"rewards/margins": 0.6534554362297058, |
|
"rewards/rejected": -1.7868661880493164, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5212424134237772, |
|
"grad_norm": 9.498845100402832, |
|
"learning_rate": 2.3937879328811142e-05, |
|
"logits/chosen": 22.87884521484375, |
|
"logits/rejected": 22.664630889892578, |
|
"logps/chosen": -38.91038131713867, |
|
"logps/rejected": -40.58343505859375, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4328577518463135, |
|
"rewards/margins": 1.4672273397445679, |
|
"rewards/rejected": -1.9000848531723022, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5248125669403785, |
|
"grad_norm": 9.389070510864258, |
|
"learning_rate": 2.375937165298108e-05, |
|
"logits/chosen": 23.82521629333496, |
|
"logits/rejected": 24.009113311767578, |
|
"logps/chosen": -60.59919357299805, |
|
"logps/rejected": -68.24311828613281, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1264002323150635, |
|
"rewards/margins": 1.3599331378936768, |
|
"rewards/rejected": -2.4863333702087402, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5283827204569796, |
|
"grad_norm": 11.059606552124023, |
|
"learning_rate": 2.358086397715102e-05, |
|
"logits/chosen": 25.180397033691406, |
|
"logits/rejected": 25.406417846679688, |
|
"logps/chosen": -53.621864318847656, |
|
"logps/rejected": -58.304664611816406, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3794161081314087, |
|
"rewards/margins": 0.9802428483963013, |
|
"rewards/rejected": -2.359658718109131, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5319528739735808, |
|
"grad_norm": 19.903459548950195, |
|
"learning_rate": 2.3402356301320957e-05, |
|
"logits/chosen": 23.71100616455078, |
|
"logits/rejected": 23.361339569091797, |
|
"logps/chosen": -51.43365478515625, |
|
"logps/rejected": -51.136474609375, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7299798727035522, |
|
"rewards/margins": 1.1167805194854736, |
|
"rewards/rejected": -1.8467603921890259, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5355230274901821, |
|
"grad_norm": 9.738292694091797, |
|
"learning_rate": 2.3223848625490895e-05, |
|
"logits/chosen": 23.410078048706055, |
|
"logits/rejected": 23.206371307373047, |
|
"logps/chosen": -47.294952392578125, |
|
"logps/rejected": -54.903099060058594, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9249691963195801, |
|
"rewards/margins": 0.9751961827278137, |
|
"rewards/rejected": -1.9001652002334595, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5355230274901821, |
|
"eval_logits/chosen": 20.92539405822754, |
|
"eval_logits/rejected": 20.960508346557617, |
|
"eval_logps/chosen": -48.110416412353516, |
|
"eval_logps/rejected": -51.09627151489258, |
|
"eval_loss": 0.4577901065349579, |
|
"eval_rewards/accuracies": 0.800000011920929, |
|
"eval_rewards/chosen": -0.6470518112182617, |
|
"eval_rewards/margins": 1.2588502168655396, |
|
"eval_rewards/rejected": -1.9059022665023804, |
|
"eval_runtime": 97.9322, |
|
"eval_samples_per_second": 14.622, |
|
"eval_steps_per_second": 0.919, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5390931810067833, |
|
"grad_norm": 8.651128768920898, |
|
"learning_rate": 2.304534094966084e-05, |
|
"logits/chosen": 22.864696502685547, |
|
"logits/rejected": 22.489166259765625, |
|
"logps/chosen": -24.272085189819336, |
|
"logps/rejected": -25.032752990722656, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06202169135212898, |
|
"rewards/margins": 1.4595599174499512, |
|
"rewards/rejected": -1.5215816497802734, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5426633345233846, |
|
"grad_norm": 13.196417808532715, |
|
"learning_rate": 2.2866833273830776e-05, |
|
"logits/chosen": 23.409244537353516, |
|
"logits/rejected": 23.42110824584961, |
|
"logps/chosen": -70.71308898925781, |
|
"logps/rejected": -65.17359161376953, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.825936496257782, |
|
"rewards/margins": 1.5356091260910034, |
|
"rewards/rejected": -2.3615458011627197, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5462334880399857, |
|
"grad_norm": 13.838322639465332, |
|
"learning_rate": 2.2688325598000713e-05, |
|
"logits/chosen": 19.617794036865234, |
|
"logits/rejected": 19.378015518188477, |
|
"logps/chosen": -63.78108596801758, |
|
"logps/rejected": -57.444053649902344, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7819554805755615, |
|
"rewards/margins": 1.4755408763885498, |
|
"rewards/rejected": -2.2574961185455322, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5498036415565869, |
|
"grad_norm": 3.351306915283203, |
|
"learning_rate": 2.2509817922170654e-05, |
|
"logits/chosen": 18.362462997436523, |
|
"logits/rejected": 18.116527557373047, |
|
"logps/chosen": -61.28546905517578, |
|
"logps/rejected": -71.31098937988281, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7423151135444641, |
|
"rewards/margins": 1.1754957437515259, |
|
"rewards/rejected": -1.9178107976913452, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5533737950731882, |
|
"grad_norm": 10.512808799743652, |
|
"learning_rate": 2.2331310246340595e-05, |
|
"logits/chosen": 23.116546630859375, |
|
"logits/rejected": 23.27178955078125, |
|
"logps/chosen": -71.55606079101562, |
|
"logps/rejected": -59.09543991088867, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0912095308303833, |
|
"rewards/margins": 1.2087962627410889, |
|
"rewards/rejected": -2.3000059127807617, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5569439485897894, |
|
"grad_norm": 6.164778709411621, |
|
"learning_rate": 2.2152802570510532e-05, |
|
"logits/chosen": 22.27012062072754, |
|
"logits/rejected": 22.54111671447754, |
|
"logps/chosen": -50.849666595458984, |
|
"logps/rejected": -51.00347900390625, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8672236204147339, |
|
"rewards/margins": 0.6719032526016235, |
|
"rewards/rejected": -1.539126992225647, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5605141021063905, |
|
"grad_norm": 8.524353981018066, |
|
"learning_rate": 2.1974294894680473e-05, |
|
"logits/chosen": 24.240970611572266, |
|
"logits/rejected": 24.447324752807617, |
|
"logps/chosen": -50.92961120605469, |
|
"logps/rejected": -25.607860565185547, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0541558265686035, |
|
"rewards/margins": 0.1918071210384369, |
|
"rewards/rejected": -1.2459629774093628, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5640842556229918, |
|
"grad_norm": 3.2097225189208984, |
|
"learning_rate": 2.179578721885041e-05, |
|
"logits/chosen": 21.733001708984375, |
|
"logits/rejected": 21.849323272705078, |
|
"logps/chosen": -59.861595153808594, |
|
"logps/rejected": -74.30784606933594, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.154857873916626, |
|
"rewards/margins": 1.554736852645874, |
|
"rewards/rejected": -2.709594249725342, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.567654409139593, |
|
"grad_norm": 10.112348556518555, |
|
"learning_rate": 2.1617279543020354e-05, |
|
"logits/chosen": 22.288734436035156, |
|
"logits/rejected": 22.532554626464844, |
|
"logps/chosen": -49.292564392089844, |
|
"logps/rejected": -46.22431182861328, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7656620144844055, |
|
"rewards/margins": 0.9457171559333801, |
|
"rewards/rejected": -1.7113794088363647, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5712245626561943, |
|
"grad_norm": 7.5602288246154785, |
|
"learning_rate": 2.143877186719029e-05, |
|
"logits/chosen": 16.03813934326172, |
|
"logits/rejected": 16.125709533691406, |
|
"logps/chosen": -48.10554504394531, |
|
"logps/rejected": -57.5333137512207, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9440638422966003, |
|
"rewards/margins": 0.8066110610961914, |
|
"rewards/rejected": -1.750674843788147, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5712245626561943, |
|
"eval_logits/chosen": 21.25445556640625, |
|
"eval_logits/rejected": 21.29209327697754, |
|
"eval_logps/chosen": -46.128841400146484, |
|
"eval_logps/rejected": -50.25452423095703, |
|
"eval_loss": 0.4501490294933319, |
|
"eval_rewards/accuracies": 0.8166666626930237, |
|
"eval_rewards/chosen": -0.4488947093486786, |
|
"eval_rewards/margins": 1.372832179069519, |
|
"eval_rewards/rejected": -1.8217267990112305, |
|
"eval_runtime": 97.5045, |
|
"eval_samples_per_second": 14.687, |
|
"eval_steps_per_second": 0.923, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5747947161727954, |
|
"grad_norm": 21.815628051757812, |
|
"learning_rate": 2.126026419136023e-05, |
|
"logits/chosen": 22.183074951171875, |
|
"logits/rejected": 23.01669692993164, |
|
"logps/chosen": -25.743982315063477, |
|
"logps/rejected": -32.8138427734375, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7791844606399536, |
|
"rewards/margins": 1.4727532863616943, |
|
"rewards/rejected": -2.2519376277923584, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5783648696893966, |
|
"grad_norm": 9.609827041625977, |
|
"learning_rate": 2.108175651553017e-05, |
|
"logits/chosen": 21.618915557861328, |
|
"logits/rejected": 21.331867218017578, |
|
"logps/chosen": -36.85995864868164, |
|
"logps/rejected": -43.229026794433594, |
|
"loss": 0.7286, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.826258659362793, |
|
"rewards/margins": 1.1748714447021484, |
|
"rewards/rejected": -2.0011298656463623, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5819350232059979, |
|
"grad_norm": 4.5630059242248535, |
|
"learning_rate": 2.0903248839700106e-05, |
|
"logits/chosen": 19.84273910522461, |
|
"logits/rejected": 19.884912490844727, |
|
"logps/chosen": -30.239971160888672, |
|
"logps/rejected": -48.68877029418945, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15234649181365967, |
|
"rewards/margins": 1.829787015914917, |
|
"rewards/rejected": -1.9821338653564453, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.585505176722599, |
|
"grad_norm": 10.104663848876953, |
|
"learning_rate": 2.0724741163870047e-05, |
|
"logits/chosen": 22.992443084716797, |
|
"logits/rejected": 23.00868034362793, |
|
"logps/chosen": -63.122642517089844, |
|
"logps/rejected": -61.594627380371094, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.868377685546875, |
|
"rewards/margins": 1.27804696559906, |
|
"rewards/rejected": -2.1464245319366455, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5890753302392003, |
|
"grad_norm": 15.089373588562012, |
|
"learning_rate": 2.0546233488039988e-05, |
|
"logits/chosen": 25.846508026123047, |
|
"logits/rejected": 25.832605361938477, |
|
"logps/chosen": -36.8404655456543, |
|
"logps/rejected": -45.35338592529297, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2539387941360474, |
|
"rewards/margins": -0.1804993450641632, |
|
"rewards/rejected": -1.0734394788742065, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5926454837558015, |
|
"grad_norm": 8.563141822814941, |
|
"learning_rate": 2.0367725812209925e-05, |
|
"logits/chosen": 25.35030746459961, |
|
"logits/rejected": 25.004871368408203, |
|
"logps/chosen": -14.423406600952148, |
|
"logps/rejected": -27.992664337158203, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5140976309776306, |
|
"rewards/margins": 0.7111214995384216, |
|
"rewards/rejected": -1.2252190113067627, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5962156372724027, |
|
"grad_norm": 10.54287338256836, |
|
"learning_rate": 2.0189218136379866e-05, |
|
"logits/chosen": 23.3927001953125, |
|
"logits/rejected": 23.853105545043945, |
|
"logps/chosen": -48.61920928955078, |
|
"logps/rejected": -59.92339324951172, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8948885202407837, |
|
"rewards/margins": 1.2263555526733398, |
|
"rewards/rejected": -2.121243953704834, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.599785790789004, |
|
"grad_norm": 10.524778366088867, |
|
"learning_rate": 2.0010710460549806e-05, |
|
"logits/chosen": 19.91085433959961, |
|
"logits/rejected": 19.805999755859375, |
|
"logps/chosen": -37.84980010986328, |
|
"logps/rejected": -42.93219757080078, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.022962704300880432, |
|
"rewards/margins": 1.8917738199234009, |
|
"rewards/rejected": -1.868811011314392, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6033559443056051, |
|
"grad_norm": 14.300918579101562, |
|
"learning_rate": 1.9832202784719744e-05, |
|
"logits/chosen": 18.957054138183594, |
|
"logits/rejected": 19.1351318359375, |
|
"logps/chosen": -44.51586151123047, |
|
"logps/rejected": -51.03856658935547, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3876987397670746, |
|
"rewards/margins": 1.393635869026184, |
|
"rewards/rejected": -1.7813348770141602, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6069260978222063, |
|
"grad_norm": 10.632288932800293, |
|
"learning_rate": 1.9653695108889684e-05, |
|
"logits/chosen": 23.2322998046875, |
|
"logits/rejected": 22.36001205444336, |
|
"logps/chosen": -33.04428482055664, |
|
"logps/rejected": -44.446144104003906, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9377388954162598, |
|
"rewards/margins": 1.140350341796875, |
|
"rewards/rejected": -2.0780892372131348, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6069260978222063, |
|
"eval_logits/chosen": 21.207813262939453, |
|
"eval_logits/rejected": 21.23162078857422, |
|
"eval_logps/chosen": -46.74406433105469, |
|
"eval_logps/rejected": -49.7892951965332, |
|
"eval_loss": 0.4637661278247833, |
|
"eval_rewards/accuracies": 0.8222222328186035, |
|
"eval_rewards/chosen": -0.510417103767395, |
|
"eval_rewards/margins": 1.2647870779037476, |
|
"eval_rewards/rejected": -1.7752044200897217, |
|
"eval_runtime": 97.7592, |
|
"eval_samples_per_second": 14.648, |
|
"eval_steps_per_second": 0.921, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6104962513388076, |
|
"grad_norm": 9.3756742477417, |
|
"learning_rate": 1.9475187433059622e-05, |
|
"logits/chosen": 21.191104888916016, |
|
"logits/rejected": 21.159591674804688, |
|
"logps/chosen": -31.99550437927246, |
|
"logps/rejected": -44.552982330322266, |
|
"loss": 0.3974, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8665735125541687, |
|
"rewards/margins": 1.130059838294983, |
|
"rewards/rejected": -1.9966332912445068, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.6140664048554088, |
|
"grad_norm": 21.877330780029297, |
|
"learning_rate": 1.929667975722956e-05, |
|
"logits/chosen": 23.222850799560547, |
|
"logits/rejected": 23.243497848510742, |
|
"logps/chosen": -36.72397232055664, |
|
"logps/rejected": -67.38380432128906, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6531896591186523, |
|
"rewards/margins": 1.578351616859436, |
|
"rewards/rejected": -2.231541156768799, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.61763655837201, |
|
"grad_norm": 6.913092136383057, |
|
"learning_rate": 1.9118172081399503e-05, |
|
"logits/chosen": 22.709789276123047, |
|
"logits/rejected": 22.42465591430664, |
|
"logps/chosen": -80.70012664794922, |
|
"logps/rejected": -39.1956901550293, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.349876880645752, |
|
"rewards/margins": 0.47570380568504333, |
|
"rewards/rejected": -1.8255808353424072, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6212067118886112, |
|
"grad_norm": 8.642501831054688, |
|
"learning_rate": 1.893966440556944e-05, |
|
"logits/chosen": 22.583799362182617, |
|
"logits/rejected": 22.996828079223633, |
|
"logps/chosen": -33.973365783691406, |
|
"logps/rejected": -40.40299987792969, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8000438809394836, |
|
"rewards/margins": 1.3411608934402466, |
|
"rewards/rejected": -2.141204595565796, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6247768654052124, |
|
"grad_norm": 7.104326248168945, |
|
"learning_rate": 1.8761156729739378e-05, |
|
"logits/chosen": 19.01543426513672, |
|
"logits/rejected": 19.197078704833984, |
|
"logps/chosen": -56.73799514770508, |
|
"logps/rejected": -57.29972457885742, |
|
"loss": 0.3864, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8416491746902466, |
|
"rewards/margins": 1.005480170249939, |
|
"rewards/rejected": -1.847129464149475, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6283470189218137, |
|
"grad_norm": 9.371530532836914, |
|
"learning_rate": 1.858264905390932e-05, |
|
"logits/chosen": 23.223146438598633, |
|
"logits/rejected": 23.122949600219727, |
|
"logps/chosen": -45.33961868286133, |
|
"logps/rejected": -50.840667724609375, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8626285791397095, |
|
"rewards/margins": 0.9665843844413757, |
|
"rewards/rejected": -1.8292129039764404, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6319171724384148, |
|
"grad_norm": 9.019303321838379, |
|
"learning_rate": 1.840414137807926e-05, |
|
"logits/chosen": 25.988510131835938, |
|
"logits/rejected": 25.829853057861328, |
|
"logps/chosen": -39.30087661743164, |
|
"logps/rejected": -33.74537658691406, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3033127784729004, |
|
"rewards/margins": 0.49060168862342834, |
|
"rewards/rejected": -1.793914556503296, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6354873259550161, |
|
"grad_norm": 23.491329193115234, |
|
"learning_rate": 1.8225633702249196e-05, |
|
"logits/chosen": 23.851768493652344, |
|
"logits/rejected": 24.093891143798828, |
|
"logps/chosen": -38.49626541137695, |
|
"logps/rejected": -29.503376007080078, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6341218948364258, |
|
"rewards/margins": 0.8659729957580566, |
|
"rewards/rejected": -1.5000948905944824, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6390574794716173, |
|
"grad_norm": 11.341009140014648, |
|
"learning_rate": 1.8047126026419137e-05, |
|
"logits/chosen": 20.8333683013916, |
|
"logits/rejected": 21.53514289855957, |
|
"logps/chosen": -30.507156372070312, |
|
"logps/rejected": -51.23711013793945, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3501494526863098, |
|
"rewards/margins": 1.454388976097107, |
|
"rewards/rejected": -1.8045384883880615, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6426276329882185, |
|
"grad_norm": 15.42497730255127, |
|
"learning_rate": 1.7868618350589074e-05, |
|
"logits/chosen": 23.256488800048828, |
|
"logits/rejected": 23.68168067932129, |
|
"logps/chosen": -23.0216007232666, |
|
"logps/rejected": -56.100563049316406, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.222777247428894, |
|
"rewards/margins": 1.2910280227661133, |
|
"rewards/rejected": -2.5138051509857178, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6426276329882185, |
|
"eval_logits/chosen": 21.823591232299805, |
|
"eval_logits/rejected": 21.842632293701172, |
|
"eval_logps/chosen": -45.74694061279297, |
|
"eval_logps/rejected": -48.988624572753906, |
|
"eval_loss": 0.4546748399734497, |
|
"eval_rewards/accuracies": 0.8111110925674438, |
|
"eval_rewards/chosen": -0.4107048809528351, |
|
"eval_rewards/margins": 1.2844319343566895, |
|
"eval_rewards/rejected": -1.6951369047164917, |
|
"eval_runtime": 97.7489, |
|
"eval_samples_per_second": 14.65, |
|
"eval_steps_per_second": 0.921, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6461977865048197, |
|
"grad_norm": 5.2203288078308105, |
|
"learning_rate": 1.769011067475902e-05, |
|
"logits/chosen": 24.605764389038086, |
|
"logits/rejected": 24.0284366607666, |
|
"logps/chosen": -44.47723388671875, |
|
"logps/rejected": -43.85586929321289, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3751835823059082, |
|
"rewards/margins": 2.202699899673462, |
|
"rewards/rejected": -2.57788348197937, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6497679400214209, |
|
"grad_norm": 15.71258544921875, |
|
"learning_rate": 1.7511602998928956e-05, |
|
"logits/chosen": 17.750822067260742, |
|
"logits/rejected": 18.020795822143555, |
|
"logps/chosen": -12.859057426452637, |
|
"logps/rejected": -30.338369369506836, |
|
"loss": 0.4199, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.33368760347366333, |
|
"rewards/margins": 1.292131781578064, |
|
"rewards/rejected": -1.625819444656372, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6533380935380221, |
|
"grad_norm": 4.521580219268799, |
|
"learning_rate": 1.7333095323098893e-05, |
|
"logits/chosen": 23.34914207458496, |
|
"logits/rejected": 23.413654327392578, |
|
"logps/chosen": -37.8521614074707, |
|
"logps/rejected": -67.8910903930664, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.48519086837768555, |
|
"rewards/margins": 1.7367101907730103, |
|
"rewards/rejected": -2.2219011783599854, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6569082470546234, |
|
"grad_norm": 3.1471476554870605, |
|
"learning_rate": 1.7154587647268834e-05, |
|
"logits/chosen": 18.366016387939453, |
|
"logits/rejected": 18.327919006347656, |
|
"logps/chosen": -55.4625358581543, |
|
"logps/rejected": -69.14282989501953, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9972825050354004, |
|
"rewards/margins": 0.521903395652771, |
|
"rewards/rejected": -1.5191859006881714, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.6604784005712245, |
|
"grad_norm": 12.677329063415527, |
|
"learning_rate": 1.697607997143877e-05, |
|
"logits/chosen": 22.81624984741211, |
|
"logits/rejected": 22.75391960144043, |
|
"logps/chosen": -57.1756706237793, |
|
"logps/rejected": -61.690345764160156, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12716366350650787, |
|
"rewards/margins": 1.8074487447738647, |
|
"rewards/rejected": -1.934612512588501, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6640485540878258, |
|
"grad_norm": 12.960306167602539, |
|
"learning_rate": 1.679757229560871e-05, |
|
"logits/chosen": 23.8206844329834, |
|
"logits/rejected": 23.760665893554688, |
|
"logps/chosen": -41.61978530883789, |
|
"logps/rejected": -53.76505661010742, |
|
"loss": 0.4355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2521887421607971, |
|
"rewards/margins": 1.3217939138412476, |
|
"rewards/rejected": -1.5739825963974, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.667618707604427, |
|
"grad_norm": 16.337543487548828, |
|
"learning_rate": 1.6619064619778652e-05, |
|
"logits/chosen": 16.793262481689453, |
|
"logits/rejected": 16.742605209350586, |
|
"logps/chosen": -48.788875579833984, |
|
"logps/rejected": -43.506622314453125, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.416334331035614, |
|
"rewards/margins": 1.9437252283096313, |
|
"rewards/rejected": -2.3600597381591797, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6711888611210282, |
|
"grad_norm": 10.352446556091309, |
|
"learning_rate": 1.644055694394859e-05, |
|
"logits/chosen": 20.40374183654785, |
|
"logits/rejected": 20.37020492553711, |
|
"logps/chosen": -58.631561279296875, |
|
"logps/rejected": -57.344520568847656, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8105586767196655, |
|
"rewards/margins": 0.7123308777809143, |
|
"rewards/rejected": -1.5228896141052246, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6747590146376294, |
|
"grad_norm": 5.631677627563477, |
|
"learning_rate": 1.626204926811853e-05, |
|
"logits/chosen": 23.19020652770996, |
|
"logits/rejected": 23.061664581298828, |
|
"logps/chosen": -61.78633499145508, |
|
"logps/rejected": -35.33949279785156, |
|
"loss": 0.4221, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6975164413452148, |
|
"rewards/margins": 1.0272176265716553, |
|
"rewards/rejected": -1.7247339487075806, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6783291681542306, |
|
"grad_norm": 4.499826431274414, |
|
"learning_rate": 1.608354159228847e-05, |
|
"logits/chosen": 24.53340721130371, |
|
"logits/rejected": 24.41737937927246, |
|
"logps/chosen": -38.38304901123047, |
|
"logps/rejected": -41.079261779785156, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7768689393997192, |
|
"rewards/margins": 1.1220049858093262, |
|
"rewards/rejected": -1.898874044418335, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6783291681542306, |
|
"eval_logits/chosen": 21.38434600830078, |
|
"eval_logits/rejected": 21.400619506835938, |
|
"eval_logps/chosen": -46.102115631103516, |
|
"eval_logps/rejected": -50.66127395629883, |
|
"eval_loss": 0.4424135386943817, |
|
"eval_rewards/accuracies": 0.8166666626930237, |
|
"eval_rewards/chosen": -0.4462220072746277, |
|
"eval_rewards/margins": 1.41618013381958, |
|
"eval_rewards/rejected": -1.862402319908142, |
|
"eval_runtime": 97.7047, |
|
"eval_samples_per_second": 14.656, |
|
"eval_steps_per_second": 0.921, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6818993216708319, |
|
"grad_norm": 7.356407165527344, |
|
"learning_rate": 1.5905033916458408e-05, |
|
"logits/chosen": 22.510066986083984, |
|
"logits/rejected": 22.577346801757812, |
|
"logps/chosen": -35.227081298828125, |
|
"logps/rejected": -55.93198776245117, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9009400606155396, |
|
"rewards/margins": 1.257596731185913, |
|
"rewards/rejected": -2.158536911010742, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6854694751874331, |
|
"grad_norm": 3.671786069869995, |
|
"learning_rate": 1.572652624062835e-05, |
|
"logits/chosen": 20.359420776367188, |
|
"logits/rejected": 20.034189224243164, |
|
"logps/chosen": -63.456451416015625, |
|
"logps/rejected": -51.79132080078125, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6570903658866882, |
|
"rewards/margins": 1.6083368062973022, |
|
"rewards/rejected": -2.265427350997925, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6890396287040342, |
|
"grad_norm": 10.456278800964355, |
|
"learning_rate": 1.5548018564798286e-05, |
|
"logits/chosen": 23.675350189208984, |
|
"logits/rejected": 23.863216400146484, |
|
"logps/chosen": -26.890039443969727, |
|
"logps/rejected": -62.48893356323242, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0192630290985107, |
|
"rewards/margins": 1.5647847652435303, |
|
"rewards/rejected": -2.584047794342041, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6926097822206355, |
|
"grad_norm": 9.162726402282715, |
|
"learning_rate": 1.5369510888968227e-05, |
|
"logits/chosen": 23.965158462524414, |
|
"logits/rejected": 23.98688316345215, |
|
"logps/chosen": -21.68771743774414, |
|
"logps/rejected": -23.776859283447266, |
|
"loss": 0.3677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.539600133895874, |
|
"rewards/margins": 0.6697312593460083, |
|
"rewards/rejected": -1.2093312740325928, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6961799357372367, |
|
"grad_norm": 3.7567951679229736, |
|
"learning_rate": 1.5191003213138166e-05, |
|
"logits/chosen": 24.331890106201172, |
|
"logits/rejected": 24.351388931274414, |
|
"logps/chosen": -18.292221069335938, |
|
"logps/rejected": -28.088825225830078, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5271309614181519, |
|
"rewards/margins": 1.1870901584625244, |
|
"rewards/rejected": -1.7142210006713867, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.699750089253838, |
|
"grad_norm": 10.841853141784668, |
|
"learning_rate": 1.5012495537308105e-05, |
|
"logits/chosen": 24.288837432861328, |
|
"logits/rejected": 24.071971893310547, |
|
"logps/chosen": -47.823211669921875, |
|
"logps/rejected": -52.2962532043457, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8029332160949707, |
|
"rewards/margins": 1.4008492231369019, |
|
"rewards/rejected": -2.203782558441162, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.7033202427704391, |
|
"grad_norm": 3.0264902114868164, |
|
"learning_rate": 1.4833987861478044e-05, |
|
"logits/chosen": 19.48323631286621, |
|
"logits/rejected": 19.590219497680664, |
|
"logps/chosen": -42.73374557495117, |
|
"logps/rejected": -58.14360427856445, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6881227493286133, |
|
"rewards/margins": 1.56888747215271, |
|
"rewards/rejected": -2.2570104598999023, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.7068903962870403, |
|
"grad_norm": 4.581413745880127, |
|
"learning_rate": 1.4655480185647983e-05, |
|
"logits/chosen": 23.510986328125, |
|
"logits/rejected": 23.64889144897461, |
|
"logps/chosen": -50.88288116455078, |
|
"logps/rejected": -30.534610748291016, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.262825071811676, |
|
"rewards/margins": 1.9995416402816772, |
|
"rewards/rejected": -2.262366771697998, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.7104605498036416, |
|
"grad_norm": 15.385260581970215, |
|
"learning_rate": 1.4476972509817924e-05, |
|
"logits/chosen": 22.773696899414062, |
|
"logits/rejected": 22.666271209716797, |
|
"logps/chosen": -40.43771743774414, |
|
"logps/rejected": -64.87566375732422, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.787145972251892, |
|
"rewards/margins": 1.5212252140045166, |
|
"rewards/rejected": -3.3083713054656982, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.7140307033202428, |
|
"grad_norm": 20.59077262878418, |
|
"learning_rate": 1.4298464833987863e-05, |
|
"logits/chosen": 14.725168228149414, |
|
"logits/rejected": 15.075048446655273, |
|
"logps/chosen": -58.4405517578125, |
|
"logps/rejected": -52.311119079589844, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5943628549575806, |
|
"rewards/margins": -0.12944069504737854, |
|
"rewards/rejected": -1.4649221897125244, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7140307033202428, |
|
"eval_logits/chosen": 20.44503402709961, |
|
"eval_logits/rejected": 20.467418670654297, |
|
"eval_logps/chosen": -47.48448944091797, |
|
"eval_logps/rejected": -52.786041259765625, |
|
"eval_loss": 0.440356969833374, |
|
"eval_rewards/accuracies": 0.8277778029441833, |
|
"eval_rewards/chosen": -0.5844593644142151, |
|
"eval_rewards/margins": 1.4904190301895142, |
|
"eval_rewards/rejected": -2.074878454208374, |
|
"eval_runtime": 97.8926, |
|
"eval_samples_per_second": 14.628, |
|
"eval_steps_per_second": 0.919, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7176008568368439, |
|
"grad_norm": 10.820709228515625, |
|
"learning_rate": 1.4119957158157802e-05, |
|
"logits/chosen": 23.97198486328125, |
|
"logits/rejected": 23.974132537841797, |
|
"logps/chosen": -42.35189437866211, |
|
"logps/rejected": -57.66106414794922, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9043490290641785, |
|
"rewards/margins": 1.2323650121688843, |
|
"rewards/rejected": -2.136713981628418, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.7211710103534452, |
|
"grad_norm": 4.93384313583374, |
|
"learning_rate": 1.394144948232774e-05, |
|
"logits/chosen": 21.256620407104492, |
|
"logits/rejected": 20.852317810058594, |
|
"logps/chosen": -27.885059356689453, |
|
"logps/rejected": -38.469505310058594, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8113853335380554, |
|
"rewards/margins": 1.1693836450576782, |
|
"rewards/rejected": -1.9807687997817993, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7247411638700464, |
|
"grad_norm": 8.402702331542969, |
|
"learning_rate": 1.3762941806497681e-05, |
|
"logits/chosen": 22.48406982421875, |
|
"logits/rejected": 22.659347534179688, |
|
"logps/chosen": -30.04986572265625, |
|
"logps/rejected": -39.668617248535156, |
|
"loss": 0.3847, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.43164730072021484, |
|
"rewards/margins": 1.5857521295547485, |
|
"rewards/rejected": -2.017399549484253, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7283113173866477, |
|
"grad_norm": 5.305787563323975, |
|
"learning_rate": 1.358443413066762e-05, |
|
"logits/chosen": 24.485246658325195, |
|
"logits/rejected": 24.59568214416504, |
|
"logps/chosen": -61.41462326049805, |
|
"logps/rejected": -70.96296691894531, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9171468019485474, |
|
"rewards/margins": 0.8545171618461609, |
|
"rewards/rejected": -1.7716642618179321, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7318814709032488, |
|
"grad_norm": 7.894079685211182, |
|
"learning_rate": 1.340592645483756e-05, |
|
"logits/chosen": 18.658212661743164, |
|
"logits/rejected": 18.898082733154297, |
|
"logps/chosen": -29.949609756469727, |
|
"logps/rejected": -52.39385986328125, |
|
"loss": 0.349, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.49497169256210327, |
|
"rewards/margins": 1.6922725439071655, |
|
"rewards/rejected": -2.187243938446045, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.73545162441985, |
|
"grad_norm": 7.683061599731445, |
|
"learning_rate": 1.3227418779007496e-05, |
|
"logits/chosen": 21.266719818115234, |
|
"logits/rejected": 21.34499740600586, |
|
"logps/chosen": -37.20063018798828, |
|
"logps/rejected": -41.06432342529297, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.48325157165527344, |
|
"rewards/margins": 0.4681263566017151, |
|
"rewards/rejected": -0.9513779878616333, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7390217779364513, |
|
"grad_norm": 9.4763765335083, |
|
"learning_rate": 1.3048911103177439e-05, |
|
"logits/chosen": 21.261632919311523, |
|
"logits/rejected": 21.200904846191406, |
|
"logps/chosen": -37.98931121826172, |
|
"logps/rejected": -50.971412658691406, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8451720476150513, |
|
"rewards/margins": 1.3964030742645264, |
|
"rewards/rejected": -2.241574764251709, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7425919314530525, |
|
"grad_norm": 12.83144474029541, |
|
"learning_rate": 1.2870403427347378e-05, |
|
"logits/chosen": 26.042606353759766, |
|
"logits/rejected": 25.88700294494629, |
|
"logps/chosen": -62.49781036376953, |
|
"logps/rejected": -77.66305541992188, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1033135652542114, |
|
"rewards/margins": 1.2711939811706543, |
|
"rewards/rejected": -2.374507427215576, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7461620849696537, |
|
"grad_norm": 4.098615646362305, |
|
"learning_rate": 1.2691895751517315e-05, |
|
"logits/chosen": 22.199886322021484, |
|
"logits/rejected": 22.246328353881836, |
|
"logps/chosen": -53.25043869018555, |
|
"logps/rejected": -53.634849548339844, |
|
"loss": 0.447, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8297556042671204, |
|
"rewards/margins": 1.6126811504364014, |
|
"rewards/rejected": -2.442436695098877, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7497322384862549, |
|
"grad_norm": 7.720545291900635, |
|
"learning_rate": 1.2513388075687254e-05, |
|
"logits/chosen": 23.562522888183594, |
|
"logits/rejected": 23.22288703918457, |
|
"logps/chosen": -79.41565704345703, |
|
"logps/rejected": -45.413997650146484, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48383426666259766, |
|
"rewards/margins": 1.4371916055679321, |
|
"rewards/rejected": -1.9210258722305298, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7497322384862549, |
|
"eval_logits/chosen": 20.6312198638916, |
|
"eval_logits/rejected": 20.657325744628906, |
|
"eval_logps/chosen": -47.1960334777832, |
|
"eval_logps/rejected": -52.259559631347656, |
|
"eval_loss": 0.4409657418727875, |
|
"eval_rewards/accuracies": 0.8333333134651184, |
|
"eval_rewards/chosen": -0.5556142330169678, |
|
"eval_rewards/margins": 1.4666165113449097, |
|
"eval_rewards/rejected": -2.022230625152588, |
|
"eval_runtime": 97.8612, |
|
"eval_samples_per_second": 14.633, |
|
"eval_steps_per_second": 0.92, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7533023920028561, |
|
"grad_norm": 11.717628479003906, |
|
"learning_rate": 1.2334880399857195e-05, |
|
"logits/chosen": 19.04401969909668, |
|
"logits/rejected": 19.13692855834961, |
|
"logps/chosen": -56.0210075378418, |
|
"logps/rejected": -46.79364013671875, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6606385707855225, |
|
"rewards/margins": 1.5798553228378296, |
|
"rewards/rejected": -2.2404940128326416, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7568725455194574, |
|
"grad_norm": 6.157966136932373, |
|
"learning_rate": 1.2156372724027134e-05, |
|
"logits/chosen": 17.96826171875, |
|
"logits/rejected": 18.081592559814453, |
|
"logps/chosen": -42.12479782104492, |
|
"logps/rejected": -44.53673553466797, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.36954426765441895, |
|
"rewards/margins": 1.775122880935669, |
|
"rewards/rejected": -2.144667148590088, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.7604426990360585, |
|
"grad_norm": 3.8417954444885254, |
|
"learning_rate": 1.1977865048197073e-05, |
|
"logits/chosen": 20.064462661743164, |
|
"logits/rejected": 19.88588523864746, |
|
"logps/chosen": -44.317867279052734, |
|
"logps/rejected": -63.211326599121094, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0525072813034058, |
|
"rewards/margins": 1.6052157878875732, |
|
"rewards/rejected": -2.6577229499816895, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7640128525526597, |
|
"grad_norm": 3.4509365558624268, |
|
"learning_rate": 1.1799357372367012e-05, |
|
"logits/chosen": 22.211759567260742, |
|
"logits/rejected": 22.149486541748047, |
|
"logps/chosen": -37.72747039794922, |
|
"logps/rejected": -46.89754867553711, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0622680187225342, |
|
"rewards/margins": 1.1859983205795288, |
|
"rewards/rejected": -2.2482664585113525, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.767583006069261, |
|
"grad_norm": 13.607682228088379, |
|
"learning_rate": 1.1620849696536952e-05, |
|
"logits/chosen": 23.63653564453125, |
|
"logits/rejected": 23.572277069091797, |
|
"logps/chosen": -41.84113311767578, |
|
"logps/rejected": -45.45246887207031, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7334656715393066, |
|
"rewards/margins": 1.2255051136016846, |
|
"rewards/rejected": -1.9589707851409912, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7711531595858622, |
|
"grad_norm": 12.166585922241211, |
|
"learning_rate": 1.1442342020706891e-05, |
|
"logits/chosen": 23.18703842163086, |
|
"logits/rejected": 23.008901596069336, |
|
"logps/chosen": -48.22418975830078, |
|
"logps/rejected": -39.83467102050781, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0254920721054077, |
|
"rewards/margins": 1.014207124710083, |
|
"rewards/rejected": -2.039699077606201, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7747233131024635, |
|
"grad_norm": 4.827226161956787, |
|
"learning_rate": 1.126383434487683e-05, |
|
"logits/chosen": 19.71803092956543, |
|
"logits/rejected": 19.300630569458008, |
|
"logps/chosen": -55.87500762939453, |
|
"logps/rejected": -42.52462387084961, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7345470190048218, |
|
"rewards/margins": 0.6044321656227112, |
|
"rewards/rejected": -1.3389792442321777, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7782934666190646, |
|
"grad_norm": 9.447345733642578, |
|
"learning_rate": 1.108532666904677e-05, |
|
"logits/chosen": 19.782794952392578, |
|
"logits/rejected": 19.624738693237305, |
|
"logps/chosen": -45.2284049987793, |
|
"logps/rejected": -68.25267791748047, |
|
"loss": 0.443, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21583375334739685, |
|
"rewards/margins": 0.9129629135131836, |
|
"rewards/rejected": -1.1287966966629028, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7818636201356658, |
|
"grad_norm": 6.821338176727295, |
|
"learning_rate": 1.090681899321671e-05, |
|
"logits/chosen": 22.291385650634766, |
|
"logits/rejected": 22.491024017333984, |
|
"logps/chosen": -35.10066223144531, |
|
"logps/rejected": -53.37052536010742, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.867012619972229, |
|
"rewards/margins": 0.7674096822738647, |
|
"rewards/rejected": -1.6344223022460938, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7854337736522671, |
|
"grad_norm": 13.862120628356934, |
|
"learning_rate": 1.0728311317386647e-05, |
|
"logits/chosen": 24.0218448638916, |
|
"logits/rejected": 23.70220947265625, |
|
"logps/chosen": -70.60053253173828, |
|
"logps/rejected": -49.052589416503906, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9927393794059753, |
|
"rewards/margins": 0.9688960313796997, |
|
"rewards/rejected": -1.9616353511810303, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7854337736522671, |
|
"eval_logits/chosen": 20.780187606811523, |
|
"eval_logits/rejected": 20.791622161865234, |
|
"eval_logps/chosen": -47.48575210571289, |
|
"eval_logps/rejected": -52.499168395996094, |
|
"eval_loss": 0.43738028407096863, |
|
"eval_rewards/accuracies": 0.8166666626930237, |
|
"eval_rewards/chosen": -0.5845859050750732, |
|
"eval_rewards/margins": 1.4616053104400635, |
|
"eval_rewards/rejected": -2.0461909770965576, |
|
"eval_runtime": 97.9018, |
|
"eval_samples_per_second": 14.627, |
|
"eval_steps_per_second": 0.919, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7890039271688682, |
|
"grad_norm": 9.391676902770996, |
|
"learning_rate": 1.0549803641556586e-05, |
|
"logits/chosen": 19.061254501342773, |
|
"logits/rejected": 18.95170021057129, |
|
"logps/chosen": -24.127471923828125, |
|
"logps/rejected": -29.48319435119629, |
|
"loss": 0.3811, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1544402837753296, |
|
"rewards/margins": 0.755560040473938, |
|
"rewards/rejected": -1.9100004434585571, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7925740806854695, |
|
"grad_norm": 15.025814056396484, |
|
"learning_rate": 1.0371295965726527e-05, |
|
"logits/chosen": 22.466449737548828, |
|
"logits/rejected": 22.482467651367188, |
|
"logps/chosen": -66.04009246826172, |
|
"logps/rejected": -77.5560073852539, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.26618120074272156, |
|
"rewards/margins": 1.5414941310882568, |
|
"rewards/rejected": -1.8076753616333008, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7961442342020707, |
|
"grad_norm": 4.339944839477539, |
|
"learning_rate": 1.0192788289896466e-05, |
|
"logits/chosen": 22.064773559570312, |
|
"logits/rejected": 22.116168975830078, |
|
"logps/chosen": -46.800048828125, |
|
"logps/rejected": -47.54998016357422, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3751857280731201, |
|
"rewards/margins": 0.8837114572525024, |
|
"rewards/rejected": -2.258897304534912, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7997143877186719, |
|
"grad_norm": 6.989345550537109, |
|
"learning_rate": 1.0014280614066405e-05, |
|
"logits/chosen": 21.530237197875977, |
|
"logits/rejected": 21.780250549316406, |
|
"logps/chosen": -20.928579330444336, |
|
"logps/rejected": -34.38627243041992, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6259101629257202, |
|
"rewards/margins": 1.1122276782989502, |
|
"rewards/rejected": -1.7381378412246704, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.8032845412352732, |
|
"grad_norm": 10.125934600830078, |
|
"learning_rate": 9.835772938236344e-06, |
|
"logits/chosen": 19.070201873779297, |
|
"logits/rejected": 19.056577682495117, |
|
"logps/chosen": -57.353233337402344, |
|
"logps/rejected": -45.93163299560547, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8172680735588074, |
|
"rewards/margins": 1.0970861911773682, |
|
"rewards/rejected": -1.9143543243408203, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8068546947518743, |
|
"grad_norm": 3.600658655166626, |
|
"learning_rate": 9.657265262406285e-06, |
|
"logits/chosen": 25.529754638671875, |
|
"logits/rejected": 25.521421432495117, |
|
"logps/chosen": -34.79078674316406, |
|
"logps/rejected": -43.617095947265625, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.6766828894615173, |
|
"rewards/margins": -0.25649863481521606, |
|
"rewards/rejected": -0.42018431425094604, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.8104248482684755, |
|
"grad_norm": 9.139204025268555, |
|
"learning_rate": 9.478757586576224e-06, |
|
"logits/chosen": 19.52242660522461, |
|
"logits/rejected": 19.63443946838379, |
|
"logps/chosen": -33.019832611083984, |
|
"logps/rejected": -49.0518684387207, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5696147084236145, |
|
"rewards/margins": 1.7224088907241821, |
|
"rewards/rejected": -2.2920238971710205, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.8139950017850768, |
|
"grad_norm": 8.38289737701416, |
|
"learning_rate": 9.300249910746163e-06, |
|
"logits/chosen": 21.76547622680664, |
|
"logits/rejected": 22.202754974365234, |
|
"logps/chosen": -57.740699768066406, |
|
"logps/rejected": -56.75148391723633, |
|
"loss": 0.3864, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.994134247303009, |
|
"rewards/margins": 1.7650930881500244, |
|
"rewards/rejected": -2.7592272758483887, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.817565155301678, |
|
"grad_norm": 7.5334272384643555, |
|
"learning_rate": 9.121742234916102e-06, |
|
"logits/chosen": 24.34808349609375, |
|
"logits/rejected": 24.858325958251953, |
|
"logps/chosen": -25.712127685546875, |
|
"logps/rejected": -42.50498962402344, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5895062685012817, |
|
"rewards/margins": 1.4605395793914795, |
|
"rewards/rejected": -2.0500457286834717, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.8211353088182792, |
|
"grad_norm": 6.730588912963867, |
|
"learning_rate": 8.943234559086042e-06, |
|
"logits/chosen": 23.88949203491211, |
|
"logits/rejected": 23.867450714111328, |
|
"logps/chosen": -29.585073471069336, |
|
"logps/rejected": -44.84501647949219, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.01124107837677, |
|
"rewards/margins": 1.032428503036499, |
|
"rewards/rejected": -2.0436697006225586, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8211353088182792, |
|
"eval_logits/chosen": 20.524213790893555, |
|
"eval_logits/rejected": 20.531003952026367, |
|
"eval_logps/chosen": -47.56471252441406, |
|
"eval_logps/rejected": -52.853515625, |
|
"eval_loss": 0.4356454610824585, |
|
"eval_rewards/accuracies": 0.8333333134651184, |
|
"eval_rewards/chosen": -0.5924822688102722, |
|
"eval_rewards/margins": 1.4891438484191895, |
|
"eval_rewards/rejected": -2.0816261768341064, |
|
"eval_runtime": 97.6895, |
|
"eval_samples_per_second": 14.659, |
|
"eval_steps_per_second": 0.921, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8247054623348804, |
|
"grad_norm": 7.662333011627197, |
|
"learning_rate": 8.76472688325598e-06, |
|
"logits/chosen": 19.167724609375, |
|
"logits/rejected": 19.311237335205078, |
|
"logps/chosen": -60.642913818359375, |
|
"logps/rejected": -61.336265563964844, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7695547342300415, |
|
"rewards/margins": 1.0697910785675049, |
|
"rewards/rejected": -1.839345932006836, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8282756158514816, |
|
"grad_norm": 13.9297513961792, |
|
"learning_rate": 8.58621920742592e-06, |
|
"logits/chosen": 24.825382232666016, |
|
"logits/rejected": 24.4403076171875, |
|
"logps/chosen": -50.69221115112305, |
|
"logps/rejected": -43.589820861816406, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14803439378738403, |
|
"rewards/margins": 2.013376474380493, |
|
"rewards/rejected": -2.1614108085632324, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8318457693680829, |
|
"grad_norm": 7.068613529205322, |
|
"learning_rate": 8.40771153159586e-06, |
|
"logits/chosen": 20.52377700805664, |
|
"logits/rejected": 20.742507934570312, |
|
"logps/chosen": -48.990299224853516, |
|
"logps/rejected": -56.431175231933594, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6809730529785156, |
|
"rewards/margins": 1.5615565776824951, |
|
"rewards/rejected": -2.2425293922424316, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.835415922884684, |
|
"grad_norm": 6.40026330947876, |
|
"learning_rate": 8.229203855765798e-06, |
|
"logits/chosen": 20.41210174560547, |
|
"logits/rejected": 20.379867553710938, |
|
"logps/chosen": -59.4276123046875, |
|
"logps/rejected": -71.07743072509766, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5872411727905273, |
|
"rewards/margins": 0.9024609327316284, |
|
"rewards/rejected": -2.4897022247314453, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8389860764012853, |
|
"grad_norm": 9.892749786376953, |
|
"learning_rate": 8.050696179935737e-06, |
|
"logits/chosen": 22.273807525634766, |
|
"logits/rejected": 22.05521011352539, |
|
"logps/chosen": -37.72688293457031, |
|
"logps/rejected": -37.593387603759766, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0601468086242676, |
|
"rewards/margins": 0.7513321042060852, |
|
"rewards/rejected": -1.811478853225708, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8425562299178865, |
|
"grad_norm": 19.193857192993164, |
|
"learning_rate": 7.872188504105676e-06, |
|
"logits/chosen": 21.529739379882812, |
|
"logits/rejected": 21.197284698486328, |
|
"logps/chosen": -25.73797607421875, |
|
"logps/rejected": -45.44214630126953, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01898249424993992, |
|
"rewards/margins": 1.9577863216400146, |
|
"rewards/rejected": -1.9388039112091064, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8461263834344876, |
|
"grad_norm": 9.629522323608398, |
|
"learning_rate": 7.693680828275617e-06, |
|
"logits/chosen": 13.054910659790039, |
|
"logits/rejected": 12.92231559753418, |
|
"logps/chosen": -28.6776180267334, |
|
"logps/rejected": -39.813411712646484, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1042935848236084, |
|
"rewards/margins": 1.2019240856170654, |
|
"rewards/rejected": -2.306217670440674, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.8496965369510889, |
|
"grad_norm": 8.702773094177246, |
|
"learning_rate": 7.515173152445555e-06, |
|
"logits/chosen": 24.35626792907715, |
|
"logits/rejected": 24.593463897705078, |
|
"logps/chosen": -44.167537689208984, |
|
"logps/rejected": -70.86328125, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1845743656158447, |
|
"rewards/margins": 1.8204050064086914, |
|
"rewards/rejected": -3.004979372024536, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.8532666904676901, |
|
"grad_norm": 11.280304908752441, |
|
"learning_rate": 7.336665476615495e-06, |
|
"logits/chosen": 21.997047424316406, |
|
"logits/rejected": 22.113534927368164, |
|
"logps/chosen": -57.37868118286133, |
|
"logps/rejected": -64.85008239746094, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2687997817993164, |
|
"rewards/margins": 0.5181077718734741, |
|
"rewards/rejected": -1.7869075536727905, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8568368439842913, |
|
"grad_norm": 7.5682854652404785, |
|
"learning_rate": 7.158157800785434e-06, |
|
"logits/chosen": 23.916316986083984, |
|
"logits/rejected": 23.814477920532227, |
|
"logps/chosen": -60.103416442871094, |
|
"logps/rejected": -71.51342010498047, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.49635568261146545, |
|
"rewards/margins": 1.9991073608398438, |
|
"rewards/rejected": -2.4954631328582764, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8568368439842913, |
|
"eval_logits/chosen": 20.50776481628418, |
|
"eval_logits/rejected": 20.51170539855957, |
|
"eval_logps/chosen": -47.33561706542969, |
|
"eval_logps/rejected": -52.59519958496094, |
|
"eval_loss": 0.43392252922058105, |
|
"eval_rewards/accuracies": 0.8444444537162781, |
|
"eval_rewards/chosen": -0.5695720911026001, |
|
"eval_rewards/margins": 1.4862221479415894, |
|
"eval_rewards/rejected": -2.0557942390441895, |
|
"eval_runtime": 97.6403, |
|
"eval_samples_per_second": 14.666, |
|
"eval_steps_per_second": 0.922, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8604069975008926, |
|
"grad_norm": 5.012470722198486, |
|
"learning_rate": 6.979650124955374e-06, |
|
"logits/chosen": 21.85761260986328, |
|
"logits/rejected": 21.804880142211914, |
|
"logps/chosen": -31.270885467529297, |
|
"logps/rejected": -52.126930236816406, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.35933440923690796, |
|
"rewards/margins": 1.06755530834198, |
|
"rewards/rejected": -1.4268897771835327, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8639771510174937, |
|
"grad_norm": 5.9783854484558105, |
|
"learning_rate": 6.801142449125313e-06, |
|
"logits/chosen": 19.979433059692383, |
|
"logits/rejected": 20.035179138183594, |
|
"logps/chosen": -51.7042121887207, |
|
"logps/rejected": -57.767913818359375, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6708565354347229, |
|
"rewards/margins": 1.6906753778457642, |
|
"rewards/rejected": -2.3615317344665527, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.867547304534095, |
|
"grad_norm": 7.05979061126709, |
|
"learning_rate": 6.6226347732952526e-06, |
|
"logits/chosen": 15.46339225769043, |
|
"logits/rejected": 15.658937454223633, |
|
"logps/chosen": -40.01195526123047, |
|
"logps/rejected": -46.074501037597656, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18836592137813568, |
|
"rewards/margins": 1.5064311027526855, |
|
"rewards/rejected": -1.6947968006134033, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8711174580506962, |
|
"grad_norm": 12.882414817810059, |
|
"learning_rate": 6.4441270974651915e-06, |
|
"logits/chosen": 21.048954010009766, |
|
"logits/rejected": 21.059772491455078, |
|
"logps/chosen": -53.65748977661133, |
|
"logps/rejected": -48.547874450683594, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3018702268600464, |
|
"rewards/margins": 1.0138559341430664, |
|
"rewards/rejected": -2.3157262802124023, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8746876115672974, |
|
"grad_norm": 11.930545806884766, |
|
"learning_rate": 6.26561942163513e-06, |
|
"logits/chosen": 20.654542922973633, |
|
"logits/rejected": 20.686420440673828, |
|
"logps/chosen": -39.971920013427734, |
|
"logps/rejected": -35.28145217895508, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6068005561828613, |
|
"rewards/margins": 1.3182332515716553, |
|
"rewards/rejected": -1.9250338077545166, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8782577650838986, |
|
"grad_norm": 9.403008460998535, |
|
"learning_rate": 6.08711174580507e-06, |
|
"logits/chosen": 22.882566452026367, |
|
"logits/rejected": 22.231182098388672, |
|
"logps/chosen": -52.44697952270508, |
|
"logps/rejected": -47.005287170410156, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8271516561508179, |
|
"rewards/margins": 0.7867218852043152, |
|
"rewards/rejected": -1.6138734817504883, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.8818279186004998, |
|
"grad_norm": 23.268875122070312, |
|
"learning_rate": 5.908604069975009e-06, |
|
"logits/chosen": 21.904462814331055, |
|
"logits/rejected": 22.555965423583984, |
|
"logps/chosen": -63.27705001831055, |
|
"logps/rejected": -87.00935363769531, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2159323692321777, |
|
"rewards/margins": 1.8115613460540771, |
|
"rewards/rejected": -3.027493715286255, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8853980721171011, |
|
"grad_norm": 16.253660202026367, |
|
"learning_rate": 5.730096394144948e-06, |
|
"logits/chosen": 22.083019256591797, |
|
"logits/rejected": 22.175024032592773, |
|
"logps/chosen": -43.36615753173828, |
|
"logps/rejected": -42.4653205871582, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2614014148712158, |
|
"rewards/margins": 0.07179747521877289, |
|
"rewards/rejected": -1.33319890499115, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8889682256337023, |
|
"grad_norm": 8.766143798828125, |
|
"learning_rate": 5.551588718314888e-06, |
|
"logits/chosen": 22.502155303955078, |
|
"logits/rejected": 22.834453582763672, |
|
"logps/chosen": -57.746620178222656, |
|
"logps/rejected": -73.5748519897461, |
|
"loss": 0.386, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6636447906494141, |
|
"rewards/margins": 1.0091074705123901, |
|
"rewards/rejected": -1.6727523803710938, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8925383791503034, |
|
"grad_norm": 9.421289443969727, |
|
"learning_rate": 5.373081042484827e-06, |
|
"logits/chosen": 22.923538208007812, |
|
"logits/rejected": 23.32149887084961, |
|
"logps/chosen": -63.822181701660156, |
|
"logps/rejected": -57.5202522277832, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.152776837348938, |
|
"rewards/margins": 0.4619804322719574, |
|
"rewards/rejected": -1.6147572994232178, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8925383791503034, |
|
"eval_logits/chosen": 20.416898727416992, |
|
"eval_logits/rejected": 20.427398681640625, |
|
"eval_logps/chosen": -47.32997131347656, |
|
"eval_logps/rejected": -52.617210388183594, |
|
"eval_loss": 0.4359460473060608, |
|
"eval_rewards/accuracies": 0.8333333134651184, |
|
"eval_rewards/chosen": -0.5690072774887085, |
|
"eval_rewards/margins": 1.4889880418777466, |
|
"eval_rewards/rejected": -2.057995319366455, |
|
"eval_runtime": 98.594, |
|
"eval_samples_per_second": 14.524, |
|
"eval_steps_per_second": 0.913, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8961085326669047, |
|
"grad_norm": 10.914727210998535, |
|
"learning_rate": 5.194573366654766e-06, |
|
"logits/chosen": 27.528060913085938, |
|
"logits/rejected": 27.470294952392578, |
|
"logps/chosen": -26.07723045349121, |
|
"logps/rejected": -25.864681243896484, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3140444755554199, |
|
"rewards/margins": 0.8342065811157227, |
|
"rewards/rejected": -1.1482508182525635, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8996786861835059, |
|
"grad_norm": 4.41356897354126, |
|
"learning_rate": 5.016065690824705e-06, |
|
"logits/chosen": 21.382648468017578, |
|
"logits/rejected": 21.26772689819336, |
|
"logps/chosen": -65.88087463378906, |
|
"logps/rejected": -38.547142028808594, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7923516631126404, |
|
"rewards/margins": 0.6893059015274048, |
|
"rewards/rejected": -1.4816573858261108, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.903248839700107, |
|
"grad_norm": 14.50999641418457, |
|
"learning_rate": 4.837558014994645e-06, |
|
"logits/chosen": 24.574230194091797, |
|
"logits/rejected": 24.432174682617188, |
|
"logps/chosen": -40.37361145019531, |
|
"logps/rejected": -61.320648193359375, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6994588971138, |
|
"rewards/margins": 1.3289554119110107, |
|
"rewards/rejected": -2.028414487838745, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.9068189932167083, |
|
"grad_norm": 3.6535849571228027, |
|
"learning_rate": 4.659050339164584e-06, |
|
"logits/chosen": 22.09380340576172, |
|
"logits/rejected": 22.342832565307617, |
|
"logps/chosen": -43.704811096191406, |
|
"logps/rejected": -57.10416793823242, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5590577721595764, |
|
"rewards/margins": 2.217897891998291, |
|
"rewards/rejected": -2.7769553661346436, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.9103891467333095, |
|
"grad_norm": 12.662737846374512, |
|
"learning_rate": 4.480542663334524e-06, |
|
"logits/chosen": 26.030466079711914, |
|
"logits/rejected": 26.27060317993164, |
|
"logps/chosen": -45.546714782714844, |
|
"logps/rejected": -56.18561935424805, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6547432541847229, |
|
"rewards/margins": 1.1012141704559326, |
|
"rewards/rejected": -1.7559573650360107, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9139593002499108, |
|
"grad_norm": 12.380247116088867, |
|
"learning_rate": 4.302034987504463e-06, |
|
"logits/chosen": 26.792774200439453, |
|
"logits/rejected": 26.5482120513916, |
|
"logps/chosen": -56.17375564575195, |
|
"logps/rejected": -41.74138641357422, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.4354368448257446, |
|
"rewards/margins": 0.7226849794387817, |
|
"rewards/rejected": -2.1581218242645264, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.917529453766512, |
|
"grad_norm": 14.337486267089844, |
|
"learning_rate": 4.123527311674403e-06, |
|
"logits/chosen": 20.274442672729492, |
|
"logits/rejected": 20.080303192138672, |
|
"logps/chosen": -64.95756530761719, |
|
"logps/rejected": -54.136146545410156, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9958245158195496, |
|
"rewards/margins": 1.3853988647460938, |
|
"rewards/rejected": -2.381223440170288, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.9210996072831131, |
|
"grad_norm": 5.5064616203308105, |
|
"learning_rate": 3.945019635844342e-06, |
|
"logits/chosen": 23.955215454101562, |
|
"logits/rejected": 23.814537048339844, |
|
"logps/chosen": -47.43402862548828, |
|
"logps/rejected": -75.19926452636719, |
|
"loss": 0.3622, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0230860710144043, |
|
"rewards/margins": 1.3491160869598389, |
|
"rewards/rejected": -2.372201919555664, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.9246697607997144, |
|
"grad_norm": 8.065810203552246, |
|
"learning_rate": 3.766511960014281e-06, |
|
"logits/chosen": 24.445186614990234, |
|
"logits/rejected": 24.539066314697266, |
|
"logps/chosen": -54.058349609375, |
|
"logps/rejected": -57.293846130371094, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8545524477958679, |
|
"rewards/margins": 0.8429762721061707, |
|
"rewards/rejected": -1.697528600692749, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.9282399143163156, |
|
"grad_norm": 8.037162780761719, |
|
"learning_rate": 3.5880042841842204e-06, |
|
"logits/chosen": 22.97157859802246, |
|
"logits/rejected": 22.82067108154297, |
|
"logps/chosen": -36.165348052978516, |
|
"logps/rejected": -39.38242721557617, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.074142336845398, |
|
"rewards/margins": 0.5587809681892395, |
|
"rewards/rejected": -1.6329231262207031, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9282399143163156, |
|
"eval_logits/chosen": 20.513593673706055, |
|
"eval_logits/rejected": 20.519195556640625, |
|
"eval_logps/chosen": -47.434913635253906, |
|
"eval_logps/rejected": -52.493404388427734, |
|
"eval_loss": 0.43444961309432983, |
|
"eval_rewards/accuracies": 0.8333333134651184, |
|
"eval_rewards/chosen": -0.5795022249221802, |
|
"eval_rewards/margins": 1.4661126136779785, |
|
"eval_rewards/rejected": -2.045614719390869, |
|
"eval_runtime": 97.6912, |
|
"eval_samples_per_second": 14.658, |
|
"eval_steps_per_second": 0.921, |
|
"step": 2600 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2801, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|