|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 625, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.936507936507937e-08, |
|
"logits/chosen": 0.0711287260055542, |
|
"logits/rejected": 0.20400863885879517, |
|
"logps/chosen": -313.75396728515625, |
|
"logps/rejected": -420.73980712890625, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.936507936507937e-07, |
|
"logits/chosen": 0.2657856047153473, |
|
"logits/rejected": 0.23175562918186188, |
|
"logps/chosen": -354.52484130859375, |
|
"logps/rejected": -365.6746520996094, |
|
"loss": 0.203, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0005699184257537127, |
|
"rewards/margins": 2.8342270525172353e-05, |
|
"rewards/rejected": -0.0005982607253827155, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"logits/chosen": 0.16931554675102234, |
|
"logits/rejected": 0.265936940908432, |
|
"logps/chosen": -376.02166748046875, |
|
"logps/rejected": -396.56524658203125, |
|
"loss": 0.2032, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.0009135094587691128, |
|
"rewards/margins": -1.457883081457112e-05, |
|
"rewards/rejected": -0.000898930593393743, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": 0.1378197968006134, |
|
"logits/rejected": 0.2801808714866638, |
|
"logps/chosen": -394.52301025390625, |
|
"logps/rejected": -406.01971435546875, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.0023785685189068317, |
|
"rewards/margins": -4.536235792329535e-05, |
|
"rewards/rejected": -0.0023332058917731047, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"logits/chosen": 0.22031190991401672, |
|
"logits/rejected": 0.2555253505706787, |
|
"logps/chosen": -413.9378356933594, |
|
"logps/rejected": -384.2980041503906, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0029828939586877823, |
|
"rewards/margins": 0.0004020760825369507, |
|
"rewards/rejected": -0.0033849701285362244, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.968253968253968e-06, |
|
"logits/chosen": 0.17930440604686737, |
|
"logits/rejected": 0.27743226289749146, |
|
"logps/chosen": -437.57220458984375, |
|
"logps/rejected": -428.25726318359375, |
|
"loss": 0.2109, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0068945749662816525, |
|
"rewards/margins": 0.0003428836935199797, |
|
"rewards/rejected": -0.0072374590672552586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": 0.1960189789533615, |
|
"logits/rejected": 0.20567241311073303, |
|
"logps/chosen": -412.0870666503906, |
|
"logps/rejected": -429.5210876464844, |
|
"loss": 0.2106, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.01030620839446783, |
|
"rewards/margins": 0.0008126860484480858, |
|
"rewards/rejected": -0.011118894442915916, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998086282661188e-06, |
|
"logits/chosen": 0.18384099006652832, |
|
"logits/rejected": 0.16322512924671173, |
|
"logps/chosen": -480.0482482910156, |
|
"logps/rejected": -449.12872314453125, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.012364747002720833, |
|
"rewards/margins": 0.0012391259660944343, |
|
"rewards/rejected": -0.013603871688246727, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988720025682995e-06, |
|
"logits/chosen": 0.17162439227104187, |
|
"logits/rejected": 0.3024311661720276, |
|
"logps/chosen": -425.10821533203125, |
|
"logps/rejected": -405.69903564453125, |
|
"loss": 0.203, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.002422564197331667, |
|
"rewards/margins": 0.0019961665384471416, |
|
"rewards/rejected": -0.004418730735778809, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9715789537359126e-06, |
|
"logits/chosen": 0.14976395666599274, |
|
"logits/rejected": 0.293326199054718, |
|
"logps/chosen": -450.09283447265625, |
|
"logps/rejected": -463.3335876464844, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004968429449945688, |
|
"rewards/margins": 0.004259931854903698, |
|
"rewards/rejected": 0.0007084974786266685, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946716615897932e-06, |
|
"logits/chosen": 0.14262747764587402, |
|
"logits/rejected": 0.14231689274311066, |
|
"logps/chosen": -409.68719482421875, |
|
"logps/rejected": -385.9997253417969, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0011129004415124655, |
|
"rewards/margins": 0.0027904310263693333, |
|
"rewards/rejected": -0.001677530468441546, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9142106826480114e-06, |
|
"logits/chosen": 0.09172149002552032, |
|
"logits/rejected": 0.18723782896995544, |
|
"logps/chosen": -376.23382568359375, |
|
"logps/rejected": -382.14312744140625, |
|
"loss": 0.2117, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0011799463536590338, |
|
"rewards/margins": 0.006289638578891754, |
|
"rewards/rejected": -0.005109691992402077, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.874162703221823e-06, |
|
"logits/chosen": 0.21056421101093292, |
|
"logits/rejected": 0.10638086497783661, |
|
"logps/chosen": -455.8184509277344, |
|
"logps/rejected": -461.58575439453125, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0018554453272372484, |
|
"rewards/margins": 0.007339824922382832, |
|
"rewards/rejected": -0.005484379827976227, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.826697788369752e-06, |
|
"logits/chosen": 0.0843835398554802, |
|
"logits/rejected": 0.10346312820911407, |
|
"logps/chosen": -420.7416076660156, |
|
"logps/rejected": -426.8565979003906, |
|
"loss": 0.1938, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0005354422028176486, |
|
"rewards/margins": 0.012502538040280342, |
|
"rewards/rejected": -0.011967095546424389, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7719642195082224e-06, |
|
"logits/chosen": 0.03641371801495552, |
|
"logits/rejected": 0.10479255020618439, |
|
"logps/chosen": -401.6028137207031, |
|
"logps/rejected": -464.2025451660156, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.024544300511479378, |
|
"rewards/margins": 0.018662814050912857, |
|
"rewards/rejected": -0.043207116425037384, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.710132985485355e-06, |
|
"logits/chosen": 0.11580105125904083, |
|
"logits/rejected": 0.021813513711094856, |
|
"logps/chosen": -430.5687561035156, |
|
"logps/rejected": -515.4051513671875, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04684633016586304, |
|
"rewards/margins": 0.043844569474458694, |
|
"rewards/rejected": -0.09069089591503143, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.641397248408122e-06, |
|
"logits/chosen": -0.0415755994617939, |
|
"logits/rejected": 0.07411660254001617, |
|
"logps/chosen": -467.6756286621094, |
|
"logps/rejected": -472.20001220703125, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0470997616648674, |
|
"rewards/margins": 0.02540501020848751, |
|
"rewards/rejected": -0.07250477373600006, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5659717401997655e-06, |
|
"logits/chosen": -0.03600798547267914, |
|
"logits/rejected": 0.027341466397047043, |
|
"logps/chosen": -504.47894287109375, |
|
"logps/rejected": -516.9270629882812, |
|
"loss": 0.1948, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.05515031889081001, |
|
"rewards/margins": 0.03458093851804733, |
|
"rewards/rejected": -0.08973126113414764, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4840920917726425e-06, |
|
"logits/chosen": -0.039528343826532364, |
|
"logits/rejected": -0.019377198070287704, |
|
"logps/chosen": -484.7760314941406, |
|
"logps/rejected": -515.7113037109375, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.05891401320695877, |
|
"rewards/margins": 0.03348463773727417, |
|
"rewards/rejected": -0.09239865839481354, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396014096912182e-06, |
|
"logits/chosen": 0.041537586599588394, |
|
"logits/rejected": -0.029888898134231567, |
|
"logps/chosen": -474.3414001464844, |
|
"logps/rejected": -515.9990234375, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.07046758383512497, |
|
"rewards/margins": 0.03831402584910393, |
|
"rewards/rejected": -0.1087816134095192, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302012913171584e-06, |
|
"logits/chosen": -0.017491130158305168, |
|
"logits/rejected": -0.04960538074374199, |
|
"logps/chosen": -484.7154846191406, |
|
"logps/rejected": -524.5064086914062, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.07989688217639923, |
|
"rewards/margins": 0.04493844509124756, |
|
"rewards/rejected": -0.12483533471822739, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.202382202273702e-06, |
|
"logits/chosen": -0.13748802244663239, |
|
"logits/rejected": -0.001641835318878293, |
|
"logps/chosen": -507.37506103515625, |
|
"logps/rejected": -535.3721923828125, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07145627588033676, |
|
"rewards/margins": 0.051401056349277496, |
|
"rewards/rejected": -0.12285731732845306, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.097433212705492e-06, |
|
"logits/chosen": -0.021799543872475624, |
|
"logits/rejected": -0.074518583714962, |
|
"logps/chosen": -477.99981689453125, |
|
"logps/rejected": -518.2962646484375, |
|
"loss": 0.2019, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.09213604032993317, |
|
"rewards/margins": 0.04488346725702286, |
|
"rewards/rejected": -0.13701951503753662, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.987493807371033e-06, |
|
"logits/chosen": -0.16067767143249512, |
|
"logits/rejected": 0.01047535240650177, |
|
"logps/chosen": -489.7818298339844, |
|
"logps/rejected": -525.8330688476562, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0610770583152771, |
|
"rewards/margins": 0.04805826395750046, |
|
"rewards/rejected": -0.10913531482219696, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.872907439340758e-06, |
|
"logits/chosen": -0.035437412559986115, |
|
"logits/rejected": -0.07582642138004303, |
|
"logps/chosen": -440.7459411621094, |
|
"logps/rejected": -492.434326171875, |
|
"loss": 0.177, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.04167747497558594, |
|
"rewards/margins": 0.04895929619669914, |
|
"rewards/rejected": -0.09063677489757538, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.75403207889666e-06, |
|
"logits/chosen": -0.09768973290920258, |
|
"logits/rejected": -0.0913015678524971, |
|
"logps/chosen": -445.75750732421875, |
|
"logps/rejected": -473.06671142578125, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.04659276083111763, |
|
"rewards/margins": 0.03554708510637283, |
|
"rewards/rejected": -0.08213984221220016, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.631239095225417e-06, |
|
"logits/chosen": -0.002584155648946762, |
|
"logits/rejected": -0.0503731295466423, |
|
"logps/chosen": -443.8072204589844, |
|
"logps/rejected": -522.3037109375, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.057579755783081055, |
|
"rewards/margins": 0.042577676475048065, |
|
"rewards/rejected": -0.10015741735696793, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5049120962530608e-06, |
|
"logits/chosen": -0.07919616252183914, |
|
"logits/rejected": -0.051559675484895706, |
|
"logps/chosen": -442.90020751953125, |
|
"logps/rejected": -506.71026611328125, |
|
"loss": 0.1801, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.04894575849175453, |
|
"rewards/margins": 0.060835689306259155, |
|
"rewards/rejected": -0.1097814291715622, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3754457302455464e-06, |
|
"logits/chosen": -0.05536097288131714, |
|
"logits/rejected": -0.05594850331544876, |
|
"logps/chosen": -491.963623046875, |
|
"logps/rejected": -476.1817932128906, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.05905281379818916, |
|
"rewards/margins": 0.03382394462823868, |
|
"rewards/rejected": -0.09287675470113754, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2432444529190714e-06, |
|
"logits/chosen": -0.06521332263946533, |
|
"logits/rejected": 0.04030764847993851, |
|
"logps/chosen": -503.37115478515625, |
|
"logps/rejected": -539.033935546875, |
|
"loss": 0.1758, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.06568136066198349, |
|
"rewards/margins": 0.04754118248820305, |
|
"rewards/rejected": -0.11322255432605743, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1087212639117057e-06, |
|
"logits/chosen": -0.007789143826812506, |
|
"logits/rejected": -0.08723556995391846, |
|
"logps/chosen": -448.15185546875, |
|
"logps/rejected": -472.66046142578125, |
|
"loss": 0.195, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04991251975297928, |
|
"rewards/margins": 0.03361000493168831, |
|
"rewards/rejected": -0.08352252840995789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9722964165636263e-06, |
|
"logits/chosen": -0.10513603687286377, |
|
"logits/rejected": -0.02427390217781067, |
|
"logps/chosen": -456.4302673339844, |
|
"logps/rejected": -480.01904296875, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.04909784346818924, |
|
"rewards/margins": 0.032266296446323395, |
|
"rewards/rejected": -0.08136413991451263, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8343961050366275e-06, |
|
"logits/chosen": -0.06911730021238327, |
|
"logits/rejected": -0.02331097424030304, |
|
"logps/chosen": -412.3163146972656, |
|
"logps/rejected": -419.2935485839844, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.031493835151195526, |
|
"rewards/margins": 0.03578699007630348, |
|
"rewards/rejected": -0.06728082150220871, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.695451132874385e-06, |
|
"logits/chosen": 0.030731942504644394, |
|
"logits/rejected": -0.028069671243429184, |
|
"logps/chosen": -477.44512939453125, |
|
"logps/rejected": -501.154541015625, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.045379411429166794, |
|
"rewards/margins": 0.04541900008916855, |
|
"rewards/rejected": -0.09079841524362564, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5558955671628964e-06, |
|
"logits/chosen": -0.0826629176735878, |
|
"logits/rejected": -0.04008691757917404, |
|
"logps/chosen": -429.1664123535156, |
|
"logps/rejected": -467.46173095703125, |
|
"loss": 0.1838, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.05404701828956604, |
|
"rewards/margins": 0.04224228113889694, |
|
"rewards/rejected": -0.09628931432962418, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4161653824955654e-06, |
|
"logits/chosen": -0.056598931550979614, |
|
"logits/rejected": -0.10974061489105225, |
|
"logps/chosen": -513.0817260742188, |
|
"logps/rejected": -546.1875, |
|
"loss": 0.1864, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06718280166387558, |
|
"rewards/margins": 0.05270420387387276, |
|
"rewards/rejected": -0.11988700926303864, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2766970989791697e-06, |
|
"logits/chosen": -0.04662217199802399, |
|
"logits/rejected": -0.05574822425842285, |
|
"logps/chosen": -480.88848876953125, |
|
"logps/rejected": -530.1246337890625, |
|
"loss": 0.1915, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.06772033870220184, |
|
"rewards/margins": 0.03862085938453674, |
|
"rewards/rejected": -0.10634119808673859, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1379264185356545e-06, |
|
"logits/chosen": -0.020046677440404892, |
|
"logits/rejected": 0.09056108444929123, |
|
"logps/chosen": -486.93115234375, |
|
"logps/rejected": -528.6192626953125, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.054186802357435226, |
|
"rewards/margins": 0.05737306550145149, |
|
"rewards/rejected": -0.11155986785888672, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.000286863759934e-06, |
|
"logits/chosen": -0.09826477617025375, |
|
"logits/rejected": -0.07946722954511642, |
|
"logps/chosen": -470.00811767578125, |
|
"logps/rejected": -511.309814453125, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.06328146904706955, |
|
"rewards/margins": 0.05404907464981079, |
|
"rewards/rejected": -0.11733055114746094, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8642084235859764e-06, |
|
"logits/chosen": -0.15358105301856995, |
|
"logits/rejected": -0.07723913341760635, |
|
"logps/chosen": -438.9100646972656, |
|
"logps/rejected": -479.63092041015625, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.05276118963956833, |
|
"rewards/margins": 0.04282676801085472, |
|
"rewards/rejected": -0.09558796137571335, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7301162099921013e-06, |
|
"logits/chosen": -0.06951303780078888, |
|
"logits/rejected": -0.16843798756599426, |
|
"logps/chosen": -422.1139221191406, |
|
"logps/rejected": -459.0763244628906, |
|
"loss": 0.1995, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.05717768147587776, |
|
"rewards/margins": 0.040271807461977005, |
|
"rewards/rejected": -0.09744948893785477, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5984291299420117e-06, |
|
"logits/chosen": -0.1256350576877594, |
|
"logits/rejected": -0.08233270049095154, |
|
"logps/chosen": -404.26263427734375, |
|
"logps/rejected": -455.4290466308594, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.04233751818537712, |
|
"rewards/margins": 0.05102572590112686, |
|
"rewards/rejected": -0.09336324036121368, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4695585767104092e-06, |
|
"logits/chosen": -0.0601225309073925, |
|
"logits/rejected": -0.12005972862243652, |
|
"logps/chosen": -482.4237365722656, |
|
"logps/rejected": -531.4822998046875, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.045725829899311066, |
|
"rewards/margins": 0.04561304301023483, |
|
"rewards/rejected": -0.0913388729095459, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3439071446815452e-06, |
|
"logits/chosen": -0.15923841297626495, |
|
"logits/rejected": 0.011970462277531624, |
|
"logps/chosen": -441.70648193359375, |
|
"logps/rejected": -458.97052001953125, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.03852088749408722, |
|
"rewards/margins": 0.038460638374090195, |
|
"rewards/rejected": -0.07698152959346771, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2218673716356919e-06, |
|
"logits/chosen": -0.05337870121002197, |
|
"logits/rejected": -0.16203683614730835, |
|
"logps/chosen": -411.7693786621094, |
|
"logps/rejected": -472.7538146972656, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04310641810297966, |
|
"rewards/margins": 0.053300343453884125, |
|
"rewards/rejected": -0.09640677273273468, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.103820512452661e-06, |
|
"logits/chosen": -0.16054467856884003, |
|
"logits/rejected": -0.10418369621038437, |
|
"logps/chosen": -471.15740966796875, |
|
"logps/rejected": -499.28997802734375, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.044102419167757034, |
|
"rewards/margins": 0.04977239668369293, |
|
"rewards/rejected": -0.09387481212615967, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.901353480633468e-07, |
|
"logits/chosen": -0.01945580169558525, |
|
"logits/rejected": -0.08020860701799393, |
|
"logps/chosen": -478.95135498046875, |
|
"logps/rejected": -510.05645751953125, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.05099078267812729, |
|
"rewards/margins": 0.04500190168619156, |
|
"rewards/rejected": -0.09599269926548004, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.811670333701544e-07, |
|
"logits/chosen": -0.09503830224275589, |
|
"logits/rejected": -0.12620458006858826, |
|
"logps/chosen": -442.7535095214844, |
|
"logps/rejected": -478.792236328125, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.039722852408885956, |
|
"rewards/margins": 0.04783398285508156, |
|
"rewards/rejected": -0.08755683898925781, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.772559877354341e-07, |
|
"logits/chosen": -0.1489885151386261, |
|
"logits/rejected": -0.052212201058864594, |
|
"logps/chosen": -408.1684265136719, |
|
"logps/rejected": -442.21466064453125, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.05573519319295883, |
|
"rewards/margins": 0.033387087285518646, |
|
"rewards/rejected": -0.08912228047847748, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.787268315040604e-07, |
|
"logits/chosen": -0.05187498405575752, |
|
"logits/rejected": -0.04689168184995651, |
|
"logps/chosen": -473.5567932128906, |
|
"logps/rejected": -553.9025268554688, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.047236062586307526, |
|
"rewards/margins": 0.06369349360466003, |
|
"rewards/rejected": -0.11092956364154816, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.858873718824829e-07, |
|
"logits/chosen": -0.1269344836473465, |
|
"logits/rejected": -0.12827740609645844, |
|
"logps/chosen": -436.95001220703125, |
|
"logps/rejected": -530.323486328125, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03250400722026825, |
|
"rewards/margins": 0.06746237725019455, |
|
"rewards/rejected": -0.0999663770198822, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.990276413423817e-07, |
|
"logits/chosen": -0.05357852578163147, |
|
"logits/rejected": -0.0927656888961792, |
|
"logps/chosen": -499.13262939453125, |
|
"logps/rejected": -548.8838500976562, |
|
"loss": 0.1697, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04802858829498291, |
|
"rewards/margins": 0.05423368886113167, |
|
"rewards/rejected": -0.10226227343082428, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.184189915529796e-07, |
|
"logits/chosen": -0.11798721551895142, |
|
"logits/rejected": -0.09332814067602158, |
|
"logps/chosen": -465.55816650390625, |
|
"logps/rejected": -533.0569458007812, |
|
"loss": 0.1967, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.042215488851070404, |
|
"rewards/margins": 0.05858848616480827, |
|
"rewards/rejected": -0.10080397129058838, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4431324567258176e-07, |
|
"logits/chosen": -0.1948605328798294, |
|
"logits/rejected": -0.08446381241083145, |
|
"logps/chosen": -440.0462951660156, |
|
"logps/rejected": -479.0397033691406, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.045319609344005585, |
|
"rewards/margins": 0.04320630431175232, |
|
"rewards/rejected": -0.08852590620517731, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.769419116476052e-07, |
|
"logits/chosen": -0.2041836529970169, |
|
"logits/rejected": -0.06282895803451538, |
|
"logps/chosen": -415.84576416015625, |
|
"logps/rejected": -452.8704528808594, |
|
"loss": 0.1829, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.050526998937129974, |
|
"rewards/margins": 0.03672551363706589, |
|
"rewards/rejected": -0.08725249767303467, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1651545897676512e-07, |
|
"logits/chosen": -0.056377578526735306, |
|
"logits/rejected": -0.0602828674018383, |
|
"logps/chosen": -482.2828674316406, |
|
"logps/rejected": -510.7273864746094, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.04934944957494736, |
|
"rewards/margins": 0.04233521968126297, |
|
"rewards/rejected": -0.09168466180562973, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6322266119983222e-07, |
|
"logits/chosen": -0.08606644719839096, |
|
"logits/rejected": -0.05609310790896416, |
|
"logps/chosen": -435.7982482910156, |
|
"logps/rejected": -460.5846252441406, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.037545233964920044, |
|
"rewards/margins": 0.0525357648730278, |
|
"rewards/rejected": -0.09008099883794785, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1723000616502167e-07, |
|
"logits/chosen": -0.21670150756835938, |
|
"logits/rejected": -0.034546859562397, |
|
"logps/chosen": -498.1796875, |
|
"logps/rejected": -531.9613037109375, |
|
"loss": 0.1779, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.05661952495574951, |
|
"rewards/margins": 0.05806880071759224, |
|
"rewards/rejected": -0.11468833684921265, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.868117591737585e-08, |
|
"logits/chosen": -0.15769873559474945, |
|
"logits/rejected": -0.11322434991598129, |
|
"logps/chosen": -501.90802001953125, |
|
"logps/rejected": -510.9298400878906, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0535421147942543, |
|
"rewards/margins": 0.04890746995806694, |
|
"rewards/rejected": -0.10244959592819214, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.769659783295383e-08, |
|
"logits/chosen": -0.1612723022699356, |
|
"logits/rejected": -0.09152115881443024, |
|
"logps/chosen": -478.5709533691406, |
|
"logps/rejected": -501.6288146972656, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.04939908906817436, |
|
"rewards/margins": 0.04336509853601456, |
|
"rewards/rejected": -0.09276418387889862, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4373068401120358e-08, |
|
"logits/chosen": -0.16256621479988098, |
|
"logits/rejected": 0.01677759736776352, |
|
"logps/chosen": -480.29571533203125, |
|
"logps/rejected": -480.0000915527344, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.059523385018110275, |
|
"rewards/margins": 0.03656711056828499, |
|
"rewards/rejected": -0.09609050303697586, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.78345083022425e-09, |
|
"logits/chosen": -0.2046932876110077, |
|
"logits/rejected": -0.010766489431262016, |
|
"logps/chosen": -467.3860778808594, |
|
"logps/rejected": -513.6978149414062, |
|
"loss": 0.195, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0569658987224102, |
|
"rewards/margins": 0.04467035457491875, |
|
"rewards/rejected": -0.10163625329732895, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.764474213677654e-10, |
|
"logits/chosen": -0.15736037492752075, |
|
"logits/rejected": -0.16347061097621918, |
|
"logps/chosen": -454.09942626953125, |
|
"logps/rejected": -512.8070678710938, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.06590863317251205, |
|
"rewards/margins": 0.044044043868780136, |
|
"rewards/rejected": -0.10995267331600189, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 625, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1911162176847458, |
|
"train_runtime": 4612.7069, |
|
"train_samples_per_second": 4.336, |
|
"train_steps_per_second": 0.135 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 625, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|