|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 120, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0041841004184100415, |
|
"grad_norm": 7.098792998705111, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.6615781784057617, |
|
"logits/rejected": -2.691082239151001, |
|
"logps/chosen": -298.690673828125, |
|
"logps/rejected": -323.7967834472656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04184100418410042, |
|
"grad_norm": 6.225657254336768, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.6289236545562744, |
|
"logits/rejected": -2.5816118717193604, |
|
"logps/chosen": -270.58856201171875, |
|
"logps/rejected": -274.70672607421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 4.151556277065538e-05, |
|
"rewards/margins": -0.00020179300918243825, |
|
"rewards/rejected": 0.00024330861924681813, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08368200836820083, |
|
"grad_norm": 6.414259013418684, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.6335225105285645, |
|
"logits/rejected": -2.581326961517334, |
|
"logps/chosen": -283.6557312011719, |
|
"logps/rejected": -254.2339324951172, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.002201706636697054, |
|
"rewards/margins": 0.0030217303428798914, |
|
"rewards/rejected": -0.0008200235897675157, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12552301255230125, |
|
"grad_norm": 6.392010117734167, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.5969154834747314, |
|
"logits/rejected": -2.549656629562378, |
|
"logps/chosen": -273.95355224609375, |
|
"logps/rejected": -294.8790283203125, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.017481274902820587, |
|
"rewards/margins": 0.011718345806002617, |
|
"rewards/rejected": 0.005762930028140545, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 7.248442563268854, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.595952272415161, |
|
"logits/rejected": -2.5652222633361816, |
|
"logps/chosen": -289.846923828125, |
|
"logps/rejected": -260.589599609375, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.04376225173473358, |
|
"rewards/margins": 0.04539982229471207, |
|
"rewards/rejected": -0.0016375742852687836, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.20920502092050208, |
|
"grad_norm": 8.50678895832972, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.543365001678467, |
|
"logits/rejected": -2.503990411758423, |
|
"logps/chosen": -262.071533203125, |
|
"logps/rejected": -270.35162353515625, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.02312565967440605, |
|
"rewards/margins": 0.1329289972782135, |
|
"rewards/rejected": -0.15605466067790985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2510460251046025, |
|
"grad_norm": 11.516370003267136, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.530785083770752, |
|
"logits/rejected": -2.471062183380127, |
|
"logps/chosen": -303.14373779296875, |
|
"logps/rejected": -292.22222900390625, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10289859771728516, |
|
"rewards/margins": 0.2306007593870163, |
|
"rewards/rejected": -0.33349937200546265, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2928870292887029, |
|
"grad_norm": 13.753588341017098, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.4841325283050537, |
|
"logits/rejected": -2.452695846557617, |
|
"logps/chosen": -315.9739990234375, |
|
"logps/rejected": -333.6136169433594, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.16998834908008575, |
|
"rewards/margins": 0.3206475079059601, |
|
"rewards/rejected": -0.49063587188720703, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 11.35627067081585, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.5453217029571533, |
|
"logits/rejected": -2.4769363403320312, |
|
"logps/chosen": -309.53948974609375, |
|
"logps/rejected": -329.1136169433594, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31399667263031006, |
|
"rewards/margins": 0.30904850363731384, |
|
"rewards/rejected": -0.6230451464653015, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37656903765690375, |
|
"grad_norm": 11.664778462055333, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.4549801349639893, |
|
"logits/rejected": -2.400050640106201, |
|
"logps/chosen": -286.2328186035156, |
|
"logps/rejected": -295.94677734375, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.33411160111427307, |
|
"rewards/margins": 0.33180540800094604, |
|
"rewards/rejected": -0.6659170389175415, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"grad_norm": 22.592232850432577, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.287551164627075, |
|
"logits/rejected": -2.256509780883789, |
|
"logps/chosen": -297.83599853515625, |
|
"logps/rejected": -330.54534912109375, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3224055767059326, |
|
"rewards/margins": 0.4239285886287689, |
|
"rewards/rejected": -0.7463341951370239, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4602510460251046, |
|
"grad_norm": 22.163061153015562, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -1.209032654762268, |
|
"logits/rejected": -1.048477053642273, |
|
"logps/chosen": -346.4924621582031, |
|
"logps/rejected": -375.23907470703125, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6897166967391968, |
|
"rewards/margins": 0.5778916478157043, |
|
"rewards/rejected": -1.267608404159546, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 15.378009113689544, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -0.8860737681388855, |
|
"logits/rejected": -0.8127919435501099, |
|
"logps/chosen": -314.23931884765625, |
|
"logps/rejected": -369.73126220703125, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.5789464712142944, |
|
"rewards/margins": 0.6342366933822632, |
|
"rewards/rejected": -1.2131831645965576, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"eval_logits/chosen": -0.7461479902267456, |
|
"eval_logits/rejected": -0.5260862112045288, |
|
"eval_logps/chosen": -320.57440185546875, |
|
"eval_logps/rejected": -397.0288391113281, |
|
"eval_loss": 0.5378162860870361, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -0.5794447660446167, |
|
"eval_rewards/margins": 0.7642225623130798, |
|
"eval_rewards/rejected": -1.3436672687530518, |
|
"eval_runtime": 92.7816, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 0.345, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5439330543933054, |
|
"grad_norm": 16.331914790031046, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -0.3421172797679901, |
|
"logits/rejected": 0.013933861628174782, |
|
"logps/chosen": -370.59661865234375, |
|
"logps/rejected": -408.4056701660156, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.7434619665145874, |
|
"rewards/margins": 0.773231565952301, |
|
"rewards/rejected": -1.5166934728622437, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5857740585774058, |
|
"grad_norm": 18.80518382048536, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -0.10041685402393341, |
|
"logits/rejected": 0.32575997710227966, |
|
"logps/chosen": -341.23138427734375, |
|
"logps/rejected": -383.2661437988281, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.7187865376472473, |
|
"rewards/margins": 0.6145631670951843, |
|
"rewards/rejected": -1.333349585533142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6276150627615062, |
|
"grad_norm": 18.425345545348925, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": 0.12889204919338226, |
|
"logits/rejected": 0.6717992424964905, |
|
"logps/chosen": -351.72857666015625, |
|
"logps/rejected": -393.3982238769531, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.756680965423584, |
|
"rewards/margins": 0.6671403050422668, |
|
"rewards/rejected": -1.423821210861206, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 20.303023485708806, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": 0.8695524334907532, |
|
"logits/rejected": 1.3302185535430908, |
|
"logps/chosen": -381.60040283203125, |
|
"logps/rejected": -408.2425231933594, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.8709249496459961, |
|
"rewards/margins": 0.6947117447853088, |
|
"rewards/rejected": -1.5656368732452393, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7112970711297071, |
|
"grad_norm": 21.411465314912224, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": 0.351454496383667, |
|
"logits/rejected": 0.9576075673103333, |
|
"logps/chosen": -344.99151611328125, |
|
"logps/rejected": -422.1979064941406, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6883319020271301, |
|
"rewards/margins": 0.8354803323745728, |
|
"rewards/rejected": -1.5238120555877686, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7531380753138075, |
|
"grad_norm": 15.900677315245543, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": 0.4454058110713959, |
|
"logits/rejected": 1.2450531721115112, |
|
"logps/chosen": -380.77337646484375, |
|
"logps/rejected": -434.308837890625, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.003240942955017, |
|
"rewards/margins": 0.8300566673278809, |
|
"rewards/rejected": -1.8332977294921875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7949790794979079, |
|
"grad_norm": 19.05972863927213, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": 0.9014388918876648, |
|
"logits/rejected": 1.4588403701782227, |
|
"logps/chosen": -386.5629577636719, |
|
"logps/rejected": -458.37982177734375, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.291537880897522, |
|
"rewards/margins": 0.8075591921806335, |
|
"rewards/rejected": -2.099097490310669, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 15.243960568414757, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": 0.5543237328529358, |
|
"logits/rejected": 1.2367184162139893, |
|
"logps/chosen": -395.40985107421875, |
|
"logps/rejected": -464.16790771484375, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.0732558965682983, |
|
"rewards/margins": 0.8137453198432922, |
|
"rewards/rejected": -1.8870010375976562, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8786610878661087, |
|
"grad_norm": 21.79705764989603, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": 0.8483422994613647, |
|
"logits/rejected": 1.708146333694458, |
|
"logps/chosen": -397.6256408691406, |
|
"logps/rejected": -457.26251220703125, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9962018728256226, |
|
"rewards/margins": 0.8600943684577942, |
|
"rewards/rejected": -1.856296181678772, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9205020920502092, |
|
"grad_norm": 22.280979761106266, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": 1.346966028213501, |
|
"logits/rejected": 2.0918831825256348, |
|
"logps/chosen": -388.975830078125, |
|
"logps/rejected": -459.4931640625, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -1.072899341583252, |
|
"rewards/margins": 0.8876863718032837, |
|
"rewards/rejected": -1.9605858325958252, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9623430962343096, |
|
"grad_norm": 16.23995219778178, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": 0.7860053777694702, |
|
"logits/rejected": 1.4969043731689453, |
|
"logps/chosen": -385.02020263671875, |
|
"logps/rejected": -456.2998046875, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -1.0216295719146729, |
|
"rewards/margins": 0.7925800085067749, |
|
"rewards/rejected": -1.8142093420028687, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.00418410041841, |
|
"grad_norm": 15.78073429480071, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": 0.3803390562534332, |
|
"logits/rejected": 1.1587107181549072, |
|
"logps/chosen": -385.11871337890625, |
|
"logps/rejected": -451.45074462890625, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.0135738849639893, |
|
"rewards/margins": 0.7812036871910095, |
|
"rewards/rejected": -1.794777274131775, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.00418410041841, |
|
"eval_logits/chosen": 0.16183078289031982, |
|
"eval_logits/rejected": 0.8933889865875244, |
|
"eval_logps/chosen": -356.4661865234375, |
|
"eval_logps/rejected": -448.0263977050781, |
|
"eval_loss": 0.5070639848709106, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": -0.9383625984191895, |
|
"eval_rewards/margins": 0.9152804613113403, |
|
"eval_rewards/rejected": -1.8536430597305298, |
|
"eval_runtime": 92.714, |
|
"eval_samples_per_second": 21.572, |
|
"eval_steps_per_second": 0.345, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0460251046025104, |
|
"grad_norm": 23.18218490845035, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": 0.6104318499565125, |
|
"logits/rejected": 1.8869889974594116, |
|
"logps/chosen": -382.5747985839844, |
|
"logps/rejected": -469.8555603027344, |
|
"loss": 0.3958, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.9375108480453491, |
|
"rewards/margins": 1.2374130487442017, |
|
"rewards/rejected": -2.17492413520813, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0878661087866108, |
|
"grad_norm": 21.374034964142144, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": 1.7387173175811768, |
|
"logits/rejected": 2.858025074005127, |
|
"logps/chosen": -386.90936279296875, |
|
"logps/rejected": -497.8505859375, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.0845394134521484, |
|
"rewards/margins": 1.3076287508010864, |
|
"rewards/rejected": -2.3921682834625244, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1297071129707112, |
|
"grad_norm": 19.96170112877807, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": 1.5150352716445923, |
|
"logits/rejected": 2.6775641441345215, |
|
"logps/chosen": -381.6761169433594, |
|
"logps/rejected": -502.1348571777344, |
|
"loss": 0.3925, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2450298070907593, |
|
"rewards/margins": 1.307213306427002, |
|
"rewards/rejected": -2.55224347114563, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1715481171548117, |
|
"grad_norm": 22.51354307439107, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": 1.4362571239471436, |
|
"logits/rejected": 2.767298460006714, |
|
"logps/chosen": -396.7832336425781, |
|
"logps/rejected": -508.6070861816406, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.2052323818206787, |
|
"rewards/margins": 1.3936196565628052, |
|
"rewards/rejected": -2.5988519191741943, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.213389121338912, |
|
"grad_norm": 20.36948553954091, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": 1.4359409809112549, |
|
"logits/rejected": 2.4658379554748535, |
|
"logps/chosen": -401.22698974609375, |
|
"logps/rejected": -520.5963134765625, |
|
"loss": 0.3799, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.2225128412246704, |
|
"rewards/margins": 1.338401436805725, |
|
"rewards/rejected": -2.5609142780303955, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2552301255230125, |
|
"grad_norm": 22.413182089565385, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": 0.9247149229049683, |
|
"logits/rejected": 2.0291850566864014, |
|
"logps/chosen": -396.5758361816406, |
|
"logps/rejected": -495.02789306640625, |
|
"loss": 0.3728, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.2224568128585815, |
|
"rewards/margins": 1.2523252964019775, |
|
"rewards/rejected": -2.4747822284698486, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.297071129707113, |
|
"grad_norm": 24.42601699935305, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": 1.1332646608352661, |
|
"logits/rejected": 2.3863754272460938, |
|
"logps/chosen": -445.12030029296875, |
|
"logps/rejected": -543.061767578125, |
|
"loss": 0.3818, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.449904203414917, |
|
"rewards/margins": 1.3690345287322998, |
|
"rewards/rejected": -2.818938732147217, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.3389121338912133, |
|
"grad_norm": 20.99065467608554, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": 1.08762788772583, |
|
"logits/rejected": 2.3433494567871094, |
|
"logps/chosen": -394.6717529296875, |
|
"logps/rejected": -507.3253479003906, |
|
"loss": 0.3768, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.1884700059890747, |
|
"rewards/margins": 1.3756790161132812, |
|
"rewards/rejected": -2.5641491413116455, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.3807531380753137, |
|
"grad_norm": 22.94560752577754, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": 1.2340595722198486, |
|
"logits/rejected": 2.4653096199035645, |
|
"logps/chosen": -392.4693298339844, |
|
"logps/rejected": -525.5010986328125, |
|
"loss": 0.3765, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -1.2584080696105957, |
|
"rewards/margins": 1.4808101654052734, |
|
"rewards/rejected": -2.739218235015869, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4225941422594142, |
|
"grad_norm": 21.208221289898617, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": 1.8728317022323608, |
|
"logits/rejected": 2.8767504692077637, |
|
"logps/chosen": -418.21063232421875, |
|
"logps/rejected": -570.7986450195312, |
|
"loss": 0.3668, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5739128589630127, |
|
"rewards/margins": 1.4613518714904785, |
|
"rewards/rejected": -3.035264730453491, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4644351464435146, |
|
"grad_norm": 18.63125933730427, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": 1.387270212173462, |
|
"logits/rejected": 2.4656243324279785, |
|
"logps/chosen": -419.15118408203125, |
|
"logps/rejected": -546.2973022460938, |
|
"loss": 0.3746, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -1.4073892831802368, |
|
"rewards/margins": 1.4395434856414795, |
|
"rewards/rejected": -2.8469326496124268, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.506276150627615, |
|
"grad_norm": 21.561834418503867, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": 1.3860183954238892, |
|
"logits/rejected": 2.663687229156494, |
|
"logps/chosen": -441.0545959472656, |
|
"logps/rejected": -565.4046630859375, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.4647375345230103, |
|
"rewards/margins": 1.5477092266082764, |
|
"rewards/rejected": -3.012446880340576, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.506276150627615, |
|
"eval_logits/chosen": 1.2559356689453125, |
|
"eval_logits/rejected": 2.126875400543213, |
|
"eval_logps/chosen": -418.867431640625, |
|
"eval_logps/rejected": -538.7271728515625, |
|
"eval_loss": 0.49959123134613037, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -1.5623750686645508, |
|
"eval_rewards/margins": 1.198276162147522, |
|
"eval_rewards/rejected": -2.760651111602783, |
|
"eval_runtime": 92.6897, |
|
"eval_samples_per_second": 21.577, |
|
"eval_steps_per_second": 0.345, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.5481171548117154, |
|
"grad_norm": 21.36945331074877, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": 1.0453870296478271, |
|
"logits/rejected": 2.2109768390655518, |
|
"logps/chosen": -424.0228576660156, |
|
"logps/rejected": -534.3453369140625, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3744726181030273, |
|
"rewards/margins": 1.4031000137329102, |
|
"rewards/rejected": -2.7775726318359375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.5899581589958158, |
|
"grad_norm": 22.911030035129745, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": 1.1873348951339722, |
|
"logits/rejected": 2.3200831413269043, |
|
"logps/chosen": -430.74041748046875, |
|
"logps/rejected": -583.8822021484375, |
|
"loss": 0.3643, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -1.362330675125122, |
|
"rewards/margins": 1.612577199935913, |
|
"rewards/rejected": -2.974907398223877, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6317991631799162, |
|
"grad_norm": 24.540688168711906, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": 1.3015904426574707, |
|
"logits/rejected": 2.6886634826660156, |
|
"logps/chosen": -425.5443420410156, |
|
"logps/rejected": -545.5137939453125, |
|
"loss": 0.3676, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4759005308151245, |
|
"rewards/margins": 1.4676305055618286, |
|
"rewards/rejected": -2.943531036376953, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6736401673640167, |
|
"grad_norm": 21.94633419757557, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": 1.3217722177505493, |
|
"logits/rejected": 2.891322612762451, |
|
"logps/chosen": -432.73016357421875, |
|
"logps/rejected": -545.9283447265625, |
|
"loss": 0.3465, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -1.3692272901535034, |
|
"rewards/margins": 1.5952247381210327, |
|
"rewards/rejected": -2.964452028274536, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.715481171548117, |
|
"grad_norm": 24.76781191446862, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": 1.4330356121063232, |
|
"logits/rejected": 2.685926675796509, |
|
"logps/chosen": -429.12396240234375, |
|
"logps/rejected": -579.2725830078125, |
|
"loss": 0.373, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -1.558347225189209, |
|
"rewards/margins": 1.5663774013519287, |
|
"rewards/rejected": -3.1247243881225586, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.7573221757322175, |
|
"grad_norm": 25.20072626876098, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": 1.412347435951233, |
|
"logits/rejected": 2.654418468475342, |
|
"logps/chosen": -440.5337829589844, |
|
"logps/rejected": -564.5003662109375, |
|
"loss": 0.3579, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.606597900390625, |
|
"rewards/margins": 1.5640956163406372, |
|
"rewards/rejected": -3.1706936359405518, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.799163179916318, |
|
"grad_norm": 26.825223612817034, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": 1.6009809970855713, |
|
"logits/rejected": 2.670750141143799, |
|
"logps/chosen": -428.7069396972656, |
|
"logps/rejected": -545.6900634765625, |
|
"loss": 0.367, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.4805653095245361, |
|
"rewards/margins": 1.4459596872329712, |
|
"rewards/rejected": -2.9265246391296387, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.8410041841004183, |
|
"grad_norm": 24.005425425971413, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": 1.6811132431030273, |
|
"logits/rejected": 2.9491238594055176, |
|
"logps/chosen": -423.1639099121094, |
|
"logps/rejected": -534.7427368164062, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.541284441947937, |
|
"rewards/margins": 1.473746657371521, |
|
"rewards/rejected": -3.015031099319458, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8828451882845187, |
|
"grad_norm": 22.489524898683964, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": 1.561452031135559, |
|
"logits/rejected": 2.7442641258239746, |
|
"logps/chosen": -434.5982971191406, |
|
"logps/rejected": -559.0643310546875, |
|
"loss": 0.3441, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -1.5354801416397095, |
|
"rewards/margins": 1.545243501663208, |
|
"rewards/rejected": -3.080723524093628, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9246861924686192, |
|
"grad_norm": 26.262256603057228, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": 1.7851985692977905, |
|
"logits/rejected": 2.9062929153442383, |
|
"logps/chosen": -402.18853759765625, |
|
"logps/rejected": -562.9603271484375, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.484203577041626, |
|
"rewards/margins": 1.569826602935791, |
|
"rewards/rejected": -3.054030179977417, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.9665271966527196, |
|
"grad_norm": 26.928130235610237, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": 1.2375901937484741, |
|
"logits/rejected": 2.649852991104126, |
|
"logps/chosen": -434.2308654785156, |
|
"logps/rejected": -528.9287719726562, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4850108623504639, |
|
"rewards/margins": 1.387107253074646, |
|
"rewards/rejected": -2.8721179962158203, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4685918840903119, |
|
"train_runtime": 13863.6386, |
|
"train_samples_per_second": 8.819, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 120, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|