|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.886524822695035, |
|
"eval_steps": 80, |
|
"global_step": 840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6052009456264775, |
|
"grad_norm": 897.57861328125, |
|
"learning_rate": 1.9047619047619045e-07, |
|
"log_odds_chosen": 0.061996445059776306, |
|
"log_odds_ratio": -0.7339106202125549, |
|
"logits/chosen": -2.475102663040161, |
|
"logits/rejected": -2.5303642749786377, |
|
"logps/chosen": -1.3030776977539062, |
|
"logps/rejected": -1.351835012435913, |
|
"loss": 1.9448, |
|
"nll_loss": 1.881751298904419, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13030776381492615, |
|
"rewards/margins": 0.004875739570707083, |
|
"rewards/rejected": -0.13518351316452026, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.210401891252955, |
|
"grad_norm": 555.6674194335938, |
|
"learning_rate": 3.809523809523809e-07, |
|
"log_odds_chosen": 0.13086628913879395, |
|
"log_odds_ratio": -0.6972255110740662, |
|
"logits/chosen": -2.408938407897949, |
|
"logits/rejected": -2.4563820362091064, |
|
"logps/chosen": -1.2012869119644165, |
|
"logps/rejected": -1.3022348880767822, |
|
"loss": 1.5253, |
|
"nll_loss": 1.4454330205917358, |
|
"rewards/accuracies": 0.54296875, |
|
"rewards/chosen": -0.12012868374586105, |
|
"rewards/margins": 0.010094808414578438, |
|
"rewards/rejected": -0.13022349774837494, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.5130023640661938, |
|
"eval_log_odds_chosen": 1.2037408351898193, |
|
"eval_log_odds_ratio": -0.2748129367828369, |
|
"eval_logits/chosen": -2.1409010887145996, |
|
"eval_logits/rejected": -2.1931569576263428, |
|
"eval_logps/chosen": -1.156149983406067, |
|
"eval_logps/rejected": -2.1107430458068848, |
|
"eval_loss": 1.3948438167572021, |
|
"eval_nll_loss": 1.5358692407608032, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11561501026153564, |
|
"eval_rewards/margins": 0.09545929729938507, |
|
"eval_rewards/rejected": -0.21107430756092072, |
|
"eval_runtime": 0.8754, |
|
"eval_samples_per_second": 156.493, |
|
"eval_steps_per_second": 5.711, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.8156028368794326, |
|
"grad_norm": 140.96612548828125, |
|
"learning_rate": 4.996892303047305e-07, |
|
"log_odds_chosen": 0.16402098536491394, |
|
"log_odds_ratio": -0.6756913065910339, |
|
"logits/chosen": -2.3939661979675293, |
|
"logits/rejected": -2.389753580093384, |
|
"logps/chosen": -1.0995960235595703, |
|
"logps/rejected": -1.2302087545394897, |
|
"loss": 1.3931, |
|
"nll_loss": 1.3130543231964111, |
|
"rewards/accuracies": 0.49609375, |
|
"rewards/chosen": -0.10995960980653763, |
|
"rewards/margins": 0.013061259873211384, |
|
"rewards/rejected": -0.12302087247371674, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.42080378250591, |
|
"grad_norm": 3005.20654296875, |
|
"learning_rate": 4.958326378681848e-07, |
|
"log_odds_chosen": 0.05211365222930908, |
|
"log_odds_ratio": -0.7710955142974854, |
|
"logits/chosen": -2.4226865768432617, |
|
"logits/rejected": -2.4471077919006348, |
|
"logps/chosen": -1.8894121646881104, |
|
"logps/rejected": -1.878553867340088, |
|
"loss": 1.9751, |
|
"nll_loss": 1.9949692487716675, |
|
"rewards/accuracies": 0.54296875, |
|
"rewards/chosen": -0.18894124031066895, |
|
"rewards/margins": -0.001085837371647358, |
|
"rewards/rejected": -0.18785539269447327, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.0260047281323876, |
|
"grad_norm": 3593.66064453125, |
|
"learning_rate": 4.876353872369572e-07, |
|
"log_odds_chosen": 0.010831637308001518, |
|
"log_odds_ratio": -0.8205243349075317, |
|
"logits/chosen": -2.4603629112243652, |
|
"logits/rejected": -2.4731788635253906, |
|
"logps/chosen": -1.9289910793304443, |
|
"logps/rejected": -1.854127049446106, |
|
"loss": 2.0756, |
|
"nll_loss": 2.116929769515991, |
|
"rewards/accuracies": 0.58203125, |
|
"rewards/chosen": -0.19289910793304443, |
|
"rewards/margins": -0.0074864043854177, |
|
"rewards/rejected": -0.1854127049446106, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.0260047281323876, |
|
"eval_log_odds_chosen": 1.280719518661499, |
|
"eval_log_odds_ratio": -0.25084003806114197, |
|
"eval_logits/chosen": -2.156606912612915, |
|
"eval_logits/rejected": -2.2219834327697754, |
|
"eval_logps/chosen": -1.4854581356048584, |
|
"eval_logps/rejected": -2.5444798469543457, |
|
"eval_loss": 1.3283345699310303, |
|
"eval_nll_loss": 1.4989588260650635, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.14854581654071808, |
|
"eval_rewards/margins": 0.10590219497680664, |
|
"eval_rewards/rejected": -0.2544480264186859, |
|
"eval_runtime": 0.8785, |
|
"eval_samples_per_second": 155.943, |
|
"eval_steps_per_second": 5.691, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.631205673758865, |
|
"grad_norm": 817.2037353515625, |
|
"learning_rate": 4.752422169756047e-07, |
|
"log_odds_chosen": 0.09074901789426804, |
|
"log_odds_ratio": -0.7456185817718506, |
|
"logits/chosen": -2.377356767654419, |
|
"logits/rejected": -2.396003007888794, |
|
"logps/chosen": -1.5171489715576172, |
|
"logps/rejected": -1.5251379013061523, |
|
"loss": 1.9051, |
|
"nll_loss": 1.6593117713928223, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.15171489119529724, |
|
"rewards/margins": 0.0007988963043317199, |
|
"rewards/rejected": -0.152513787150383, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.236406619385343, |
|
"grad_norm": 954.6674194335938, |
|
"learning_rate": 4.588719528532341e-07, |
|
"log_odds_chosen": 0.1411646008491516, |
|
"log_odds_ratio": -0.6900860667228699, |
|
"logits/chosen": -2.398102283477783, |
|
"logits/rejected": -2.397972345352173, |
|
"logps/chosen": -1.2695732116699219, |
|
"logps/rejected": -1.3286174535751343, |
|
"loss": 1.4204, |
|
"nll_loss": 1.3868590593338013, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.12695731222629547, |
|
"rewards/margins": 0.005904428660869598, |
|
"rewards/rejected": -0.13286174833774567, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.539007092198582, |
|
"eval_log_odds_chosen": 1.1990762948989868, |
|
"eval_log_odds_ratio": -0.2697806656360626, |
|
"eval_logits/chosen": -2.137376546859741, |
|
"eval_logits/rejected": -2.1972498893737793, |
|
"eval_logps/chosen": -1.2540639638900757, |
|
"eval_logps/rejected": -2.2160115242004395, |
|
"eval_loss": 1.2844356298446655, |
|
"eval_nll_loss": 1.4172712564468384, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.125406414270401, |
|
"eval_rewards/margins": 0.09619472920894623, |
|
"eval_rewards/rejected": -0.22160112857818604, |
|
"eval_runtime": 0.8664, |
|
"eval_samples_per_second": 158.119, |
|
"eval_steps_per_second": 5.771, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.84160756501182, |
|
"grad_norm": 790.6442260742188, |
|
"learning_rate": 4.3881364404463375e-07, |
|
"log_odds_chosen": 0.20751571655273438, |
|
"log_odds_ratio": -0.6614270210266113, |
|
"logits/chosen": -2.3498642444610596, |
|
"logits/rejected": -2.370640993118286, |
|
"logps/chosen": -1.1192173957824707, |
|
"logps/rejected": -1.2252520322799683, |
|
"loss": 1.3469, |
|
"nll_loss": 1.2267839908599854, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1119217574596405, |
|
"rewards/margins": 0.010603459551930428, |
|
"rewards/rejected": -0.12252521514892578, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 5.446808510638298, |
|
"grad_norm": 2515.4189453125, |
|
"learning_rate": 4.154214593992149e-07, |
|
"log_odds_chosen": 0.23377765715122223, |
|
"log_odds_ratio": -0.6729075312614441, |
|
"logits/chosen": -2.322608709335327, |
|
"logits/rejected": -2.361389636993408, |
|
"logps/chosen": -1.1726882457733154, |
|
"logps/rejected": -1.2837783098220825, |
|
"loss": 1.3539, |
|
"nll_loss": 1.2735731601715088, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11726883798837662, |
|
"rewards/margins": 0.011109001003205776, |
|
"rewards/rejected": -0.12837782502174377, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 6.052009456264775, |
|
"grad_norm": 2093.776611328125, |
|
"learning_rate": 3.891084338941603e-07, |
|
"log_odds_chosen": 0.16962425410747528, |
|
"log_odds_ratio": -0.6696641445159912, |
|
"logits/chosen": -2.325108051300049, |
|
"logits/rejected": -2.3817710876464844, |
|
"logps/chosen": -3.6265933513641357, |
|
"logps/rejected": -3.700042724609375, |
|
"loss": 3.6173, |
|
"nll_loss": 3.7216219902038574, |
|
"rewards/accuracies": 0.59765625, |
|
"rewards/chosen": -0.3626593351364136, |
|
"rewards/margins": 0.007344960235059261, |
|
"rewards/rejected": -0.3700042963027954, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.052009456264775, |
|
"eval_log_odds_chosen": 1.1228582859039307, |
|
"eval_log_odds_ratio": -0.2914997637271881, |
|
"eval_logits/chosen": -2.153041362762451, |
|
"eval_logits/rejected": -2.239081621170044, |
|
"eval_logps/chosen": -1.1402614116668701, |
|
"eval_logps/rejected": -2.0236728191375732, |
|
"eval_loss": 1.2484513521194458, |
|
"eval_nll_loss": 1.3337957859039307, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11402615159749985, |
|
"eval_rewards/margins": 0.0883411318063736, |
|
"eval_rewards/rejected": -0.20236727595329285, |
|
"eval_runtime": 0.8835, |
|
"eval_samples_per_second": 155.057, |
|
"eval_steps_per_second": 5.659, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.657210401891253, |
|
"grad_norm": 750.7427978515625, |
|
"learning_rate": 3.6033917569043597e-07, |
|
"log_odds_chosen": 0.2158849686384201, |
|
"log_odds_ratio": -0.651162326335907, |
|
"logits/chosen": -2.2999160289764404, |
|
"logits/rejected": -2.3155159950256348, |
|
"logps/chosen": -3.3152918815612793, |
|
"logps/rejected": -3.4116926193237305, |
|
"loss": 3.4506, |
|
"nll_loss": 3.4377260208129883, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.3315292000770569, |
|
"rewards/margins": 0.00964003149420023, |
|
"rewards/rejected": -0.34116923809051514, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 7.26241134751773, |
|
"grad_norm": 466.0474548339844, |
|
"learning_rate": 3.296216625629211e-07, |
|
"log_odds_chosen": 0.2518257200717926, |
|
"log_odds_ratio": -0.6292858123779297, |
|
"logits/chosen": -2.287289619445801, |
|
"logits/rejected": -2.274383783340454, |
|
"logps/chosen": -2.936006784439087, |
|
"logps/rejected": -3.0706114768981934, |
|
"loss": 3.1836, |
|
"nll_loss": 3.031456708908081, |
|
"rewards/accuracies": 0.66796875, |
|
"rewards/chosen": -0.2936007082462311, |
|
"rewards/margins": 0.013460462912917137, |
|
"rewards/rejected": -0.30706116557121277, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 7.5650118203309695, |
|
"eval_log_odds_chosen": 1.1787246465682983, |
|
"eval_log_odds_ratio": -0.27878421545028687, |
|
"eval_logits/chosen": -2.131922721862793, |
|
"eval_logits/rejected": -2.198315143585205, |
|
"eval_logps/chosen": -1.1629152297973633, |
|
"eval_logps/rejected": -2.102142810821533, |
|
"eval_loss": 1.2289972305297852, |
|
"eval_nll_loss": 1.3089702129364014, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11629153788089752, |
|
"eval_rewards/margins": 0.09392273426055908, |
|
"eval_rewards/rejected": -0.2102142572402954, |
|
"eval_runtime": 0.8657, |
|
"eval_samples_per_second": 158.25, |
|
"eval_steps_per_second": 5.776, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.867612293144208, |
|
"grad_norm": 493.2022399902344, |
|
"learning_rate": 2.974982725547975e-07, |
|
"log_odds_chosen": 0.29160410165786743, |
|
"log_odds_ratio": -0.6114708781242371, |
|
"logits/chosen": -2.296574115753174, |
|
"logits/rejected": -2.3063693046569824, |
|
"logps/chosen": -2.907156229019165, |
|
"logps/rejected": -3.049989938735962, |
|
"loss": 3.0885, |
|
"nll_loss": 2.9950599670410156, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.29071560502052307, |
|
"rewards/margins": 0.014283367432653904, |
|
"rewards/rejected": -0.3049989938735962, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 8.472813238770685, |
|
"grad_norm": 2084.139892578125, |
|
"learning_rate": 2.6453620722761895e-07, |
|
"log_odds_chosen": 0.2739107012748718, |
|
"log_odds_ratio": -0.6295269727706909, |
|
"logits/chosen": -2.3001277446746826, |
|
"logits/rejected": -2.2884907722473145, |
|
"logps/chosen": -2.9699883460998535, |
|
"logps/rejected": -3.114020586013794, |
|
"loss": 2.9983, |
|
"nll_loss": 3.031224012374878, |
|
"rewards/accuracies": 0.65234375, |
|
"rewards/chosen": -0.29699885845184326, |
|
"rewards/margins": 0.014403235167264938, |
|
"rewards/rejected": -0.3114020824432373, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 9.078014184397164, |
|
"grad_norm": 557.6774291992188, |
|
"learning_rate": 2.3131747660339394e-07, |
|
"log_odds_chosen": 0.2703976333141327, |
|
"log_odds_ratio": -0.6236827969551086, |
|
"logits/chosen": -2.2672348022460938, |
|
"logits/rejected": -2.2586584091186523, |
|
"logps/chosen": -2.937666893005371, |
|
"logps/rejected": -3.061203718185425, |
|
"loss": 2.8082, |
|
"nll_loss": 3.023472785949707, |
|
"rewards/accuracies": 0.66015625, |
|
"rewards/chosen": -0.29376670718193054, |
|
"rewards/margins": 0.012353670783340931, |
|
"rewards/rejected": -0.306120365858078, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.078014184397164, |
|
"eval_log_odds_chosen": 1.1980304718017578, |
|
"eval_log_odds_ratio": -0.27382025122642517, |
|
"eval_logits/chosen": -2.1204967498779297, |
|
"eval_logits/rejected": -2.1800942420959473, |
|
"eval_logps/chosen": -1.192492961883545, |
|
"eval_logps/rejected": -2.1554245948791504, |
|
"eval_loss": 1.2367494106292725, |
|
"eval_nll_loss": 1.3177238702774048, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11924929916858673, |
|
"eval_rewards/margins": 0.09629315137863159, |
|
"eval_rewards/rejected": -0.21554246544837952, |
|
"eval_runtime": 0.88, |
|
"eval_samples_per_second": 155.689, |
|
"eval_steps_per_second": 5.682, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.68321513002364, |
|
"grad_norm": 8620.671875, |
|
"learning_rate": 1.984286226342056e-07, |
|
"log_odds_chosen": 0.36868974566459656, |
|
"log_odds_ratio": -0.6002693176269531, |
|
"logits/chosen": -2.237966537475586, |
|
"logits/rejected": -2.2450058460235596, |
|
"logps/chosen": -2.536555290222168, |
|
"logps/rejected": -2.738464117050171, |
|
"loss": 2.7562, |
|
"nll_loss": 2.642591714859009, |
|
"rewards/accuracies": 0.69921875, |
|
"rewards/chosen": -0.2536555230617523, |
|
"rewards/margins": 0.020190902054309845, |
|
"rewards/rejected": -0.27384641766548157, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 10.288416075650119, |
|
"grad_norm": 8913.7607421875, |
|
"learning_rate": 1.6645036265170313e-07, |
|
"log_odds_chosen": 0.23036888241767883, |
|
"log_odds_ratio": -0.6965319514274597, |
|
"logits/chosen": -2.346311092376709, |
|
"logits/rejected": -2.3196349143981934, |
|
"logps/chosen": -2.625997543334961, |
|
"logps/rejected": -2.695284605026245, |
|
"loss": 2.9109, |
|
"nll_loss": 2.6460041999816895, |
|
"rewards/accuracies": 0.68359375, |
|
"rewards/chosen": -0.26259979605674744, |
|
"rewards/margins": 0.006928655784577131, |
|
"rewards/rejected": -0.26952844858169556, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 10.591016548463356, |
|
"eval_log_odds_chosen": 1.2137528657913208, |
|
"eval_log_odds_ratio": -0.2704525589942932, |
|
"eval_logits/chosen": -2.1178054809570312, |
|
"eval_logits/rejected": -2.1774165630340576, |
|
"eval_logps/chosen": -1.1941485404968262, |
|
"eval_logps/rejected": -2.171353340148926, |
|
"eval_loss": 1.237461805343628, |
|
"eval_nll_loss": 1.3179538249969482, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11941485106945038, |
|
"eval_rewards/margins": 0.09772048145532608, |
|
"eval_rewards/rejected": -0.21713533997535706, |
|
"eval_runtime": 0.8664, |
|
"eval_samples_per_second": 158.121, |
|
"eval_steps_per_second": 5.771, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.893617021276595, |
|
"grad_norm": 11410.7939453125, |
|
"learning_rate": 1.3594733566170925e-07, |
|
"log_odds_chosen": 0.2925941050052643, |
|
"log_odds_ratio": -0.6866809725761414, |
|
"logits/chosen": -2.3280415534973145, |
|
"logits/rejected": -2.308394432067871, |
|
"logps/chosen": -3.2026498317718506, |
|
"logps/rejected": -3.3342018127441406, |
|
"loss": 2.9544, |
|
"nll_loss": 3.251168966293335, |
|
"rewards/accuracies": 0.66796875, |
|
"rewards/chosen": -0.320264995098114, |
|
"rewards/margins": 0.013155205175280571, |
|
"rewards/rejected": -0.33342018723487854, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 11.498817966903074, |
|
"grad_norm": 2123.895751953125, |
|
"learning_rate": 1.0745813253325956e-07, |
|
"log_odds_chosen": 0.3092188239097595, |
|
"log_odds_ratio": -0.6492509245872498, |
|
"logits/chosen": -2.3580808639526367, |
|
"logits/rejected": -2.349421501159668, |
|
"logps/chosen": -2.5302317142486572, |
|
"logps/rejected": -2.662865161895752, |
|
"loss": 2.8523, |
|
"nll_loss": 2.578503131866455, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.2530231475830078, |
|
"rewards/margins": 0.013263333588838577, |
|
"rewards/rejected": -0.2662864923477173, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 12.10401891252955, |
|
"grad_norm": 1731.5615234375, |
|
"learning_rate": 8.148578611867113e-08, |
|
"log_odds_chosen": 0.3393189013004303, |
|
"log_odds_ratio": -0.6164168119430542, |
|
"logits/chosen": -2.1805524826049805, |
|
"logits/rejected": -2.177432060241699, |
|
"logps/chosen": -2.5276594161987305, |
|
"logps/rejected": -2.710268497467041, |
|
"loss": 2.5512, |
|
"nll_loss": 2.681882381439209, |
|
"rewards/accuracies": 0.67578125, |
|
"rewards/chosen": -0.252765953540802, |
|
"rewards/margins": 0.01826086826622486, |
|
"rewards/rejected": -0.2710268199443817, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 12.10401891252955, |
|
"eval_log_odds_chosen": 1.2131071090698242, |
|
"eval_log_odds_ratio": -0.270443856716156, |
|
"eval_logits/chosen": -2.1185622215270996, |
|
"eval_logits/rejected": -2.178537368774414, |
|
"eval_logps/chosen": -1.198697566986084, |
|
"eval_logps/rejected": -2.176114559173584, |
|
"eval_loss": 1.2388056516647339, |
|
"eval_nll_loss": 1.3213987350463867, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11986975371837616, |
|
"eval_rewards/margins": 0.09774170815944672, |
|
"eval_rewards/rejected": -0.21761147677898407, |
|
"eval_runtime": 0.8825, |
|
"eval_samples_per_second": 155.235, |
|
"eval_steps_per_second": 5.666, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 12.709219858156029, |
|
"grad_norm": 1997.690185546875, |
|
"learning_rate": 5.848888922025552e-08, |
|
"log_odds_chosen": 0.3624497354030609, |
|
"log_odds_ratio": -0.6123137474060059, |
|
"logits/chosen": -2.179229259490967, |
|
"logits/rejected": -2.1755523681640625, |
|
"logps/chosen": -2.486222743988037, |
|
"logps/rejected": -2.6808714866638184, |
|
"loss": 2.5628, |
|
"nll_loss": 2.6199562549591064, |
|
"rewards/accuracies": 0.69140625, |
|
"rewards/chosen": -0.24862225353717804, |
|
"rewards/margins": 0.019464917480945587, |
|
"rewards/rejected": -0.2680871784687042, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 13.314420803782506, |
|
"grad_norm": 2036.525146484375, |
|
"learning_rate": 3.887349723342303e-08, |
|
"log_odds_chosen": 0.3463588356971741, |
|
"log_odds_ratio": -0.6327537298202515, |
|
"logits/chosen": -2.181072235107422, |
|
"logits/rejected": -2.1947262287139893, |
|
"logps/chosen": -2.517810344696045, |
|
"logps/rejected": -2.672647476196289, |
|
"loss": 2.6212, |
|
"nll_loss": 2.6852023601531982, |
|
"rewards/accuracies": 0.68359375, |
|
"rewards/chosen": -0.25178101658821106, |
|
"rewards/margins": 0.01548372209072113, |
|
"rewards/rejected": -0.2672647535800934, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 13.617021276595745, |
|
"eval_log_odds_chosen": 1.2199119329452515, |
|
"eval_log_odds_ratio": -0.26896363496780396, |
|
"eval_logits/chosen": -2.1166138648986816, |
|
"eval_logits/rejected": -2.1762003898620605, |
|
"eval_logps/chosen": -1.1962625980377197, |
|
"eval_logps/rejected": -2.1790993213653564, |
|
"eval_loss": 1.2387369871139526, |
|
"eval_nll_loss": 1.3203083276748657, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11962626129388809, |
|
"eval_rewards/margins": 0.09828367829322815, |
|
"eval_rewards/rejected": -0.21790993213653564, |
|
"eval_runtime": 0.8708, |
|
"eval_samples_per_second": 157.334, |
|
"eval_steps_per_second": 5.742, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 13.919621749408984, |
|
"grad_norm": 9910.3740234375, |
|
"learning_rate": 2.298595844092377e-08, |
|
"log_odds_chosen": 0.3617098927497864, |
|
"log_odds_ratio": -0.6060731410980225, |
|
"logits/chosen": -2.2685229778289795, |
|
"logits/rejected": -2.2752606868743896, |
|
"logps/chosen": -1.9047422409057617, |
|
"logps/rejected": -2.0876576900482178, |
|
"loss": 2.4957, |
|
"nll_loss": 1.975754737854004, |
|
"rewards/accuracies": 0.71484375, |
|
"rewards/chosen": -0.19047421216964722, |
|
"rewards/margins": 0.01829155907034874, |
|
"rewards/rejected": -0.20876577496528625, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 14.52482269503546, |
|
"grad_norm": 1543.000244140625, |
|
"learning_rate": 1.1106798553464802e-08, |
|
"log_odds_chosen": 0.42522603273391724, |
|
"log_odds_ratio": -0.5653746128082275, |
|
"logits/chosen": -2.353919744491577, |
|
"logits/rejected": -2.358372688293457, |
|
"logps/chosen": -1.2913402318954468, |
|
"logps/rejected": -1.542799711227417, |
|
"loss": 1.4582, |
|
"nll_loss": 1.3932266235351562, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.12913402915000916, |
|
"rewards/margins": 0.025145962834358215, |
|
"rewards/rejected": -0.15427997708320618, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 15.130023640661939, |
|
"grad_norm": 698.0999755859375, |
|
"learning_rate": 3.4457674771554422e-09, |
|
"log_odds_chosen": 0.4467349052429199, |
|
"log_odds_ratio": -0.545281171798706, |
|
"logits/chosen": -2.313391923904419, |
|
"logits/rejected": -2.3118624687194824, |
|
"logps/chosen": -1.2114390134811401, |
|
"logps/rejected": -1.4863505363464355, |
|
"loss": 1.3504, |
|
"nll_loss": 1.3252184391021729, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1211438924074173, |
|
"rewards/margins": 0.027491170912981033, |
|
"rewards/rejected": -0.14863505959510803, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.130023640661939, |
|
"eval_log_odds_chosen": 1.211981177330017, |
|
"eval_log_odds_ratio": -0.27068275213241577, |
|
"eval_logits/chosen": -2.118680715560913, |
|
"eval_logits/rejected": -2.1784884929656982, |
|
"eval_logps/chosen": -1.1996212005615234, |
|
"eval_logps/rejected": -2.176278829574585, |
|
"eval_loss": 1.2384228706359863, |
|
"eval_nll_loss": 1.3189568519592285, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11996213346719742, |
|
"eval_rewards/margins": 0.09766574203968048, |
|
"eval_rewards/rejected": -0.2176278829574585, |
|
"eval_runtime": 0.8764, |
|
"eval_samples_per_second": 156.329, |
|
"eval_steps_per_second": 5.705, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.735224586288416, |
|
"grad_norm": 295.0424499511719, |
|
"learning_rate": 1.3813576683111006e-10, |
|
"log_odds_chosen": 0.44846177101135254, |
|
"log_odds_ratio": -0.5449205636978149, |
|
"logits/chosen": -2.3085861206054688, |
|
"logits/rejected": -2.3130688667297363, |
|
"logps/chosen": -1.1487438678741455, |
|
"logps/rejected": -1.4232044219970703, |
|
"loss": 1.3316, |
|
"nll_loss": 1.246992588043213, |
|
"rewards/accuracies": 0.74609375, |
|
"rewards/chosen": -0.11487438529729843, |
|
"rewards/margins": 0.027446046471595764, |
|
"rewards/rejected": -0.142320454120636, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 15.886524822695035, |
|
"grad_norm": 305.3218078613281, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 0.500209391117096, |
|
"log_odds_ratio": -0.5302451848983765, |
|
"logits/chosen": -2.2818732261657715, |
|
"logits/rejected": -2.2850182056427, |
|
"logps/chosen": -1.1465669870376587, |
|
"logps/rejected": -1.4646430015563965, |
|
"loss": 1.3265, |
|
"nll_loss": 1.2768977880477905, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11465670168399811, |
|
"rewards/margins": 0.03180759772658348, |
|
"rewards/rejected": -0.1464642882347107, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.886524822695035, |
|
"eval_log_odds_chosen": 1.2168288230895996, |
|
"eval_log_odds_ratio": -0.26950639486312866, |
|
"eval_logits/chosen": -2.1189827919006348, |
|
"eval_logits/rejected": -2.1787045001983643, |
|
"eval_logps/chosen": -1.1971455812454224, |
|
"eval_logps/rejected": -2.1773040294647217, |
|
"eval_loss": 1.2378294467926025, |
|
"eval_nll_loss": 1.3174165487289429, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11971455812454224, |
|
"eval_rewards/margins": 0.09801585972309113, |
|
"eval_rewards/rejected": -0.21773043274879456, |
|
"eval_runtime": 0.8739, |
|
"eval_samples_per_second": 156.768, |
|
"eval_steps_per_second": 5.721, |
|
"step": 840 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 840, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 17, |
|
"save_steps": 80, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|