|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 805, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.172839506172839e-09, |
|
"logits/chosen": -2.8421168327331543, |
|
"logits/rejected": -2.6747336387634277, |
|
"logps/chosen": -92.33953094482422, |
|
"logps/rejected": -44.262760162353516, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.172839506172839e-08, |
|
"logits/chosen": -2.9818191528320312, |
|
"logits/rejected": -2.9740567207336426, |
|
"logps/chosen": -197.3586883544922, |
|
"logps/rejected": -149.28749084472656, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": -0.005469343159347773, |
|
"rewards/margins": 0.00850688572973013, |
|
"rewards/rejected": -0.01397622935473919, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2345679012345677e-07, |
|
"logits/chosen": -2.9317967891693115, |
|
"logits/rejected": -2.8763492107391357, |
|
"logps/chosen": -172.7858428955078, |
|
"logps/rejected": -133.58245849609375, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1074294000864029, |
|
"rewards/margins": 0.16630074381828308, |
|
"rewards/rejected": -0.05887135863304138, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -3.0448741912841797, |
|
"logits/rejected": -2.999143362045288, |
|
"logps/chosen": -179.38113403320312, |
|
"logps/rejected": -146.7749786376953, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08539465069770813, |
|
"rewards/margins": 0.540686845779419, |
|
"rewards/rejected": -0.4552922248840332, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4691358024691354e-07, |
|
"logits/chosen": -2.9631247520446777, |
|
"logits/rejected": -2.9552507400512695, |
|
"logps/chosen": -174.05288696289062, |
|
"logps/rejected": -143.798583984375, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.11930576711893082, |
|
"rewards/margins": 0.9224799871444702, |
|
"rewards/rejected": -0.8031741976737976, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.086419753086419e-07, |
|
"logits/chosen": -2.955972194671631, |
|
"logits/rejected": -2.880552053451538, |
|
"logps/chosen": -183.1387481689453, |
|
"logps/rejected": -157.20669555664062, |
|
"loss": 0.2956, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8193867802619934, |
|
"rewards/margins": 1.485733985900879, |
|
"rewards/rejected": -2.3051209449768066, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -2.960036516189575, |
|
"logits/rejected": -2.8513035774230957, |
|
"logps/chosen": -154.39926147460938, |
|
"logps/rejected": -133.40078735351562, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9133367538452148, |
|
"rewards/margins": 1.8072048425674438, |
|
"rewards/rejected": -2.720541477203369, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.320987654320987e-07, |
|
"logits/chosen": -2.8771309852600098, |
|
"logits/rejected": -2.797616481781006, |
|
"logps/chosen": -165.77328491210938, |
|
"logps/rejected": -142.69815063476562, |
|
"loss": 0.2689, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7793342471122742, |
|
"rewards/margins": 2.3448145389556885, |
|
"rewards/rejected": -3.1241488456726074, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.938271604938271e-07, |
|
"logits/chosen": -2.7670953273773193, |
|
"logits/rejected": -2.723829984664917, |
|
"logps/chosen": -185.12596130371094, |
|
"logps/rejected": -174.61465454101562, |
|
"loss": 0.2633, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7798545360565186, |
|
"rewards/margins": 2.6085076332092285, |
|
"rewards/rejected": -4.388362884521484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.937845303867402e-07, |
|
"logits/chosen": -2.933378219604492, |
|
"logits/rejected": -2.9017395973205566, |
|
"logps/chosen": -183.1439208984375, |
|
"logps/rejected": -171.66964721679688, |
|
"loss": 0.2236, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4862909317016602, |
|
"rewards/margins": 2.482652187347412, |
|
"rewards/rejected": -3.968942642211914, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.868784530386741e-07, |
|
"logits/chosen": -2.7947914600372314, |
|
"logits/rejected": -2.823068618774414, |
|
"logps/chosen": -189.07904052734375, |
|
"logps/rejected": -181.06781005859375, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5773736238479614, |
|
"rewards/margins": 3.1609904766082764, |
|
"rewards/rejected": -3.738363742828369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.799723756906077e-07, |
|
"logits/chosen": -2.972888946533203, |
|
"logits/rejected": -2.856055498123169, |
|
"logps/chosen": -194.32847595214844, |
|
"logps/rejected": -180.61575317382812, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5956058502197266, |
|
"rewards/margins": 2.7098982334136963, |
|
"rewards/rejected": -4.305504322052002, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.730662983425414e-07, |
|
"logits/chosen": -2.8165183067321777, |
|
"logits/rejected": -2.7924342155456543, |
|
"logps/chosen": -171.48434448242188, |
|
"logps/rejected": -174.36788940429688, |
|
"loss": 0.2177, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.069062352180481, |
|
"rewards/margins": 3.2766518592834473, |
|
"rewards/rejected": -4.345714092254639, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.661602209944751e-07, |
|
"logits/chosen": -2.941241502761841, |
|
"logits/rejected": -2.8772006034851074, |
|
"logps/chosen": -206.2366180419922, |
|
"logps/rejected": -196.18931579589844, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.6614869832992554, |
|
"rewards/margins": 3.376704454421997, |
|
"rewards/rejected": -5.038191318511963, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.592541436464088e-07, |
|
"logits/chosen": -2.7895777225494385, |
|
"logits/rejected": -2.7228431701660156, |
|
"logps/chosen": -185.4031524658203, |
|
"logps/rejected": -193.98040771484375, |
|
"loss": 0.174, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.8083890080451965, |
|
"rewards/margins": 4.341578960418701, |
|
"rewards/rejected": -5.149968147277832, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5234806629834255e-07, |
|
"logits/chosen": -2.73811674118042, |
|
"logits/rejected": -2.7120604515075684, |
|
"logps/chosen": -214.88565063476562, |
|
"logps/rejected": -221.6265411376953, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.5129263401031494, |
|
"rewards/margins": 3.6045918464660645, |
|
"rewards/rejected": -6.117517948150635, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.454419889502762e-07, |
|
"logits/chosen": -2.9045376777648926, |
|
"logits/rejected": -2.8451740741729736, |
|
"logps/chosen": -202.41903686523438, |
|
"logps/rejected": -205.6617889404297, |
|
"loss": 0.208, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.077017307281494, |
|
"rewards/margins": 4.358880996704102, |
|
"rewards/rejected": -6.4358978271484375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3853591160220993e-07, |
|
"logits/chosen": -2.8927552700042725, |
|
"logits/rejected": -2.7776336669921875, |
|
"logps/chosen": -203.76724243164062, |
|
"logps/rejected": -200.79864501953125, |
|
"loss": 0.1703, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2595643997192383, |
|
"rewards/margins": 3.9248855113983154, |
|
"rewards/rejected": -6.184449672698975, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3162983425414365e-07, |
|
"logits/chosen": -2.7550511360168457, |
|
"logits/rejected": -2.7225847244262695, |
|
"logps/chosen": -225.6936798095703, |
|
"logps/rejected": -231.35400390625, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.379578113555908, |
|
"rewards/margins": 3.530996322631836, |
|
"rewards/rejected": -6.910574436187744, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.247237569060773e-07, |
|
"logits/chosen": -2.8363826274871826, |
|
"logits/rejected": -2.7442939281463623, |
|
"logps/chosen": -212.888427734375, |
|
"logps/rejected": -198.71542358398438, |
|
"loss": 0.218, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.968935251235962, |
|
"rewards/margins": 3.221897602081299, |
|
"rewards/rejected": -6.19083309173584, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1781767955801103e-07, |
|
"logits/chosen": -2.8280460834503174, |
|
"logits/rejected": -2.7499425411224365, |
|
"logps/chosen": -182.047607421875, |
|
"logps/rejected": -188.87193298339844, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.369425058364868, |
|
"rewards/margins": 4.022088050842285, |
|
"rewards/rejected": -6.391513824462891, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1091160220994475e-07, |
|
"logits/chosen": -2.7316782474517822, |
|
"logits/rejected": -2.6802334785461426, |
|
"logps/chosen": -212.96212768554688, |
|
"logps/rejected": -203.3765106201172, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.15626859664917, |
|
"rewards/margins": 3.610233783721924, |
|
"rewards/rejected": -6.766502380371094, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0400552486187846e-07, |
|
"logits/chosen": -2.8573451042175293, |
|
"logits/rejected": -2.751213550567627, |
|
"logps/chosen": -225.369140625, |
|
"logps/rejected": -213.1463623046875, |
|
"loss": 0.2091, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.183094024658203, |
|
"rewards/margins": 3.5506489276885986, |
|
"rewards/rejected": -6.733743190765381, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.970994475138121e-07, |
|
"logits/chosen": -2.7918601036071777, |
|
"logits/rejected": -2.731823444366455, |
|
"logps/chosen": -194.12893676757812, |
|
"logps/rejected": -183.81556701660156, |
|
"loss": 0.1979, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.6381051540374756, |
|
"rewards/margins": 2.9515299797058105, |
|
"rewards/rejected": -5.589634895324707, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.9019337016574584e-07, |
|
"logits/chosen": -2.699711561203003, |
|
"logits/rejected": -2.6905601024627686, |
|
"logps/chosen": -212.59793090820312, |
|
"logps/rejected": -232.6545867919922, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.5139076709747314, |
|
"rewards/margins": 4.449800491333008, |
|
"rewards/rejected": -7.96370792388916, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.832872928176795e-07, |
|
"logits/chosen": -2.721381425857544, |
|
"logits/rejected": -2.7046854496002197, |
|
"logps/chosen": -200.67886352539062, |
|
"logps/rejected": -220.4629669189453, |
|
"loss": 0.1447, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1384024620056152, |
|
"rewards/margins": 4.899154186248779, |
|
"rewards/rejected": -8.037556648254395, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.763812154696133e-07, |
|
"logits/chosen": -2.8270747661590576, |
|
"logits/rejected": -2.741473913192749, |
|
"logps/chosen": -208.8101043701172, |
|
"logps/rejected": -218.16537475585938, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.2333245277404785, |
|
"rewards/margins": 4.820733547210693, |
|
"rewards/rejected": -8.054059028625488, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6947513812154694e-07, |
|
"logits/chosen": -2.8693325519561768, |
|
"logits/rejected": -2.7638630867004395, |
|
"logps/chosen": -211.0121612548828, |
|
"logps/rejected": -217.35910034179688, |
|
"loss": 0.1731, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.540541410446167, |
|
"rewards/margins": 4.472687721252441, |
|
"rewards/rejected": -8.013228416442871, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6256906077348066e-07, |
|
"logits/chosen": -2.7264175415039062, |
|
"logits/rejected": -2.7106542587280273, |
|
"logps/chosen": -221.5767822265625, |
|
"logps/rejected": -234.4220733642578, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.8089592456817627, |
|
"rewards/margins": 4.285206317901611, |
|
"rewards/rejected": -8.094165802001953, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.556629834254143e-07, |
|
"logits/chosen": -2.709073305130005, |
|
"logits/rejected": -2.650305986404419, |
|
"logps/chosen": -183.47171020507812, |
|
"logps/rejected": -189.16053771972656, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.39518666267395, |
|
"rewards/margins": 3.255284070968628, |
|
"rewards/rejected": -6.6504716873168945, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4875690607734804e-07, |
|
"logits/chosen": -2.742584228515625, |
|
"logits/rejected": -2.634887456893921, |
|
"logps/chosen": -212.4958953857422, |
|
"logps/rejected": -214.77978515625, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.987830400466919, |
|
"rewards/margins": 4.540104389190674, |
|
"rewards/rejected": -7.527935028076172, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.418508287292817e-07, |
|
"logits/chosen": -2.803213119506836, |
|
"logits/rejected": -2.705699920654297, |
|
"logps/chosen": -201.23739624023438, |
|
"logps/rejected": -210.5272979736328, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.2409796714782715, |
|
"rewards/margins": 4.629876136779785, |
|
"rewards/rejected": -6.870855808258057, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3494475138121547e-07, |
|
"logits/chosen": -2.821776866912842, |
|
"logits/rejected": -2.6838371753692627, |
|
"logps/chosen": -205.13998413085938, |
|
"logps/rejected": -210.2687225341797, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8670909404754639, |
|
"rewards/margins": 5.2015180587768555, |
|
"rewards/rejected": -7.068609714508057, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.280386740331492e-07, |
|
"logits/chosen": -2.717777729034424, |
|
"logits/rejected": -2.6688733100891113, |
|
"logps/chosen": -181.1573028564453, |
|
"logps/rejected": -195.77450561523438, |
|
"loss": 0.2068, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.757702589035034, |
|
"rewards/margins": 4.233901023864746, |
|
"rewards/rejected": -6.991603851318359, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2113259668508285e-07, |
|
"logits/chosen": -2.834904193878174, |
|
"logits/rejected": -2.751018524169922, |
|
"logps/chosen": -236.5355224609375, |
|
"logps/rejected": -249.8627166748047, |
|
"loss": 0.1873, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.415268898010254, |
|
"rewards/margins": 4.573099136352539, |
|
"rewards/rejected": -8.988368034362793, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1422651933701657e-07, |
|
"logits/chosen": -2.702388048171997, |
|
"logits/rejected": -2.5783984661102295, |
|
"logps/chosen": -205.3250274658203, |
|
"logps/rejected": -226.51025390625, |
|
"loss": 0.1513, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.3364059925079346, |
|
"rewards/margins": 5.607662200927734, |
|
"rewards/rejected": -8.94406795501709, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0732044198895023e-07, |
|
"logits/chosen": -2.719581127166748, |
|
"logits/rejected": -2.6670548915863037, |
|
"logps/chosen": -215.56069946289062, |
|
"logps/rejected": -238.80691528320312, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.7043700218200684, |
|
"rewards/margins": 5.375087261199951, |
|
"rewards/rejected": -9.07945728302002, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.00414364640884e-07, |
|
"logits/chosen": -2.687743902206421, |
|
"logits/rejected": -2.534635305404663, |
|
"logps/chosen": -197.8555145263672, |
|
"logps/rejected": -212.0418243408203, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.731755495071411, |
|
"rewards/margins": 6.006522178649902, |
|
"rewards/rejected": -8.738277435302734, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9350828729281767e-07, |
|
"logits/chosen": -2.8031535148620605, |
|
"logits/rejected": -2.7039589881896973, |
|
"logps/chosen": -196.32485961914062, |
|
"logps/rejected": -220.0038604736328, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.087531805038452, |
|
"rewards/margins": 5.792913436889648, |
|
"rewards/rejected": -8.88044548034668, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.866022099447514e-07, |
|
"logits/chosen": -2.762528657913208, |
|
"logits/rejected": -2.7139079570770264, |
|
"logps/chosen": -245.5567169189453, |
|
"logps/rejected": -268.78729248046875, |
|
"loss": 0.1335, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.6741371154785156, |
|
"rewards/margins": 6.558190822601318, |
|
"rewards/rejected": -10.232328414916992, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7969613259668505e-07, |
|
"logits/chosen": -2.8232762813568115, |
|
"logits/rejected": -2.6880855560302734, |
|
"logps/chosen": -210.9452362060547, |
|
"logps/rejected": -228.576416015625, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.587395191192627, |
|
"rewards/margins": 6.285602569580078, |
|
"rewards/rejected": -9.87299919128418, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7279005524861877e-07, |
|
"logits/chosen": -2.968181610107422, |
|
"logits/rejected": -2.783268690109253, |
|
"logps/chosen": -250.2484893798828, |
|
"logps/rejected": -248.7700653076172, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.302699565887451, |
|
"rewards/margins": 5.751161098480225, |
|
"rewards/rejected": -10.053861618041992, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6588397790055243e-07, |
|
"logits/chosen": -2.843967914581299, |
|
"logits/rejected": -2.7779390811920166, |
|
"logps/chosen": -220.41262817382812, |
|
"logps/rejected": -247.0951690673828, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.6894760131835938, |
|
"rewards/margins": 5.2908501625061035, |
|
"rewards/rejected": -8.980325698852539, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.589779005524862e-07, |
|
"logits/chosen": -2.895799398422241, |
|
"logits/rejected": -2.795949935913086, |
|
"logps/chosen": -243.14785766601562, |
|
"logps/rejected": -262.537109375, |
|
"loss": 0.1558, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.353732109069824, |
|
"rewards/margins": 6.2553300857543945, |
|
"rewards/rejected": -10.609061241149902, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5207182320441986e-07, |
|
"logits/chosen": -2.8478636741638184, |
|
"logits/rejected": -2.782703399658203, |
|
"logps/chosen": -251.97689819335938, |
|
"logps/rejected": -279.1159362792969, |
|
"loss": 0.1489, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.981522083282471, |
|
"rewards/margins": 6.379393100738525, |
|
"rewards/rejected": -11.360913276672363, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.451657458563536e-07, |
|
"logits/chosen": -2.9137637615203857, |
|
"logits/rejected": -2.846818447113037, |
|
"logps/chosen": -231.48171997070312, |
|
"logps/rejected": -252.91494750976562, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.0656819343566895, |
|
"rewards/margins": 6.312979221343994, |
|
"rewards/rejected": -10.378661155700684, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3825966850828727e-07, |
|
"logits/chosen": -2.820435047149658, |
|
"logits/rejected": -2.7222158908843994, |
|
"logps/chosen": -176.95230102539062, |
|
"logps/rejected": -202.005859375, |
|
"loss": 0.1487, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8066993951797485, |
|
"rewards/margins": 6.806242942810059, |
|
"rewards/rejected": -8.612942695617676, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.31353591160221e-07, |
|
"logits/chosen": -2.861485004425049, |
|
"logits/rejected": -2.7676100730895996, |
|
"logps/chosen": -223.93276977539062, |
|
"logps/rejected": -244.85665893554688, |
|
"loss": 0.1293, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.4348251819610596, |
|
"rewards/margins": 6.165853023529053, |
|
"rewards/rejected": -9.600679397583008, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2444751381215468e-07, |
|
"logits/chosen": -2.933964967727661, |
|
"logits/rejected": -2.788942337036133, |
|
"logps/chosen": -226.66796875, |
|
"logps/rejected": -244.5904998779297, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.5399329662323, |
|
"rewards/margins": 5.981089115142822, |
|
"rewards/rejected": -9.521021842956543, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.175414364640884e-07, |
|
"logits/chosen": -2.798732280731201, |
|
"logits/rejected": -2.7525930404663086, |
|
"logps/chosen": -224.1324462890625, |
|
"logps/rejected": -260.4334411621094, |
|
"loss": 0.1676, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.89032244682312, |
|
"rewards/margins": 7.569252967834473, |
|
"rewards/rejected": -11.459574699401855, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1063535911602208e-07, |
|
"logits/chosen": -2.837965726852417, |
|
"logits/rejected": -2.7505483627319336, |
|
"logps/chosen": -236.68588256835938, |
|
"logps/rejected": -258.0523681640625, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.027956008911133, |
|
"rewards/margins": 6.000524997711182, |
|
"rewards/rejected": -11.028480529785156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.780914545059204, |
|
"eval_logits/rejected": -2.70768666267395, |
|
"eval_logps/chosen": -219.7342071533203, |
|
"eval_logps/rejected": -246.59507751464844, |
|
"eval_loss": 0.1479674130678177, |
|
"eval_rewards/accuracies": 0.8563829660415649, |
|
"eval_rewards/chosen": -3.757824182510376, |
|
"eval_rewards/margins": 6.220169544219971, |
|
"eval_rewards/rejected": -9.97799301147461, |
|
"eval_runtime": 240.8343, |
|
"eval_samples_per_second": 6.208, |
|
"eval_steps_per_second": 0.195, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0372928176795578e-07, |
|
"logits/chosen": -2.8163836002349854, |
|
"logits/rejected": -2.7235684394836426, |
|
"logps/chosen": -206.2197265625, |
|
"logps/rejected": -240.59561157226562, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.1149652004241943, |
|
"rewards/margins": 7.466650485992432, |
|
"rewards/rejected": -10.58161449432373, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.968232044198895e-07, |
|
"logits/chosen": -2.895784854888916, |
|
"logits/rejected": -2.8176522254943848, |
|
"logps/chosen": -211.858642578125, |
|
"logps/rejected": -227.81741333007812, |
|
"loss": 0.1775, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.783520698547363, |
|
"rewards/margins": 4.954631805419922, |
|
"rewards/rejected": -9.738151550292969, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.899171270718232e-07, |
|
"logits/chosen": -2.9654347896575928, |
|
"logits/rejected": -2.8510003089904785, |
|
"logps/chosen": -215.161376953125, |
|
"logps/rejected": -225.93490600585938, |
|
"loss": 0.1701, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.928018093109131, |
|
"rewards/margins": 5.275222301483154, |
|
"rewards/rejected": -9.203241348266602, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.830110497237569e-07, |
|
"logits/chosen": -2.834559202194214, |
|
"logits/rejected": -2.7768056392669678, |
|
"logps/chosen": -248.0672149658203, |
|
"logps/rejected": -277.980224609375, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.741249084472656, |
|
"rewards/margins": 5.934549331665039, |
|
"rewards/rejected": -10.675798416137695, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7610497237569062e-07, |
|
"logits/chosen": -2.878166913986206, |
|
"logits/rejected": -2.8250679969787598, |
|
"logps/chosen": -214.7487030029297, |
|
"logps/rejected": -249.8466339111328, |
|
"loss": 0.1641, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.848902940750122, |
|
"rewards/margins": 6.1274847984313965, |
|
"rewards/rejected": -9.976387023925781, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.691988950276243e-07, |
|
"logits/chosen": -2.9045345783233643, |
|
"logits/rejected": -2.85447096824646, |
|
"logps/chosen": -237.44955444335938, |
|
"logps/rejected": -270.6697998046875, |
|
"loss": 0.1439, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.506590843200684, |
|
"rewards/margins": 6.747040748596191, |
|
"rewards/rejected": -11.253631591796875, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.62292817679558e-07, |
|
"logits/chosen": -2.8267338275909424, |
|
"logits/rejected": -2.673027515411377, |
|
"logps/chosen": -219.37026977539062, |
|
"logps/rejected": -235.9922332763672, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.315529823303223, |
|
"rewards/margins": 6.053648471832275, |
|
"rewards/rejected": -10.369178771972656, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5538674033149171e-07, |
|
"logits/chosen": -2.883451223373413, |
|
"logits/rejected": -2.7439939975738525, |
|
"logps/chosen": -228.4202117919922, |
|
"logps/rejected": -242.2844696044922, |
|
"loss": 0.1627, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.730469226837158, |
|
"rewards/margins": 6.399707317352295, |
|
"rewards/rejected": -11.130178451538086, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.484806629834254e-07, |
|
"logits/chosen": -2.9942538738250732, |
|
"logits/rejected": -2.868765115737915, |
|
"logps/chosen": -260.10406494140625, |
|
"logps/rejected": -253.8542938232422, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.60601282119751, |
|
"rewards/margins": 4.876471519470215, |
|
"rewards/rejected": -10.482483863830566, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4157458563535912e-07, |
|
"logits/chosen": -2.9481163024902344, |
|
"logits/rejected": -2.8333523273468018, |
|
"logps/chosen": -226.2001953125, |
|
"logps/rejected": -235.05014038085938, |
|
"loss": 0.1784, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.186473846435547, |
|
"rewards/margins": 4.799227714538574, |
|
"rewards/rejected": -9.985700607299805, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.346685082872928e-07, |
|
"logits/chosen": -2.820510149002075, |
|
"logits/rejected": -2.734377384185791, |
|
"logps/chosen": -261.7950134277344, |
|
"logps/rejected": -286.0494689941406, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.037507057189941, |
|
"rewards/margins": 6.992823600769043, |
|
"rewards/rejected": -12.030329704284668, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.277624309392265e-07, |
|
"logits/chosen": -2.909327507019043, |
|
"logits/rejected": -2.750497817993164, |
|
"logps/chosen": -224.2764129638672, |
|
"logps/rejected": -231.7129364013672, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -5.132498741149902, |
|
"rewards/margins": 5.400217056274414, |
|
"rewards/rejected": -10.532715797424316, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2085635359116022e-07, |
|
"logits/chosen": -2.9544167518615723, |
|
"logits/rejected": -2.8220763206481934, |
|
"logps/chosen": -245.1588134765625, |
|
"logps/rejected": -261.6687927246094, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.218487739562988, |
|
"rewards/margins": 6.255653381347656, |
|
"rewards/rejected": -11.474142074584961, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1395027624309392e-07, |
|
"logits/chosen": -2.8086042404174805, |
|
"logits/rejected": -2.785409927368164, |
|
"logps/chosen": -253.0203857421875, |
|
"logps/rejected": -281.2375793457031, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -6.011744976043701, |
|
"rewards/margins": 5.636991024017334, |
|
"rewards/rejected": -11.648736000061035, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0704419889502763e-07, |
|
"logits/chosen": -2.9435603618621826, |
|
"logits/rejected": -2.7380692958831787, |
|
"logps/chosen": -261.493896484375, |
|
"logps/rejected": -278.2936096191406, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.648413181304932, |
|
"rewards/margins": 7.05194616317749, |
|
"rewards/rejected": -11.700358390808105, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0013812154696132e-07, |
|
"logits/chosen": -2.9581801891326904, |
|
"logits/rejected": -2.8110265731811523, |
|
"logps/chosen": -244.45413208007812, |
|
"logps/rejected": -264.9123840332031, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.337543487548828, |
|
"rewards/margins": 6.183696746826172, |
|
"rewards/rejected": -11.521239280700684, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.323204419889502e-08, |
|
"logits/chosen": -2.905041217803955, |
|
"logits/rejected": -2.7901289463043213, |
|
"logps/chosen": -200.6727752685547, |
|
"logps/rejected": -228.93032836914062, |
|
"loss": 0.1408, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.679917812347412, |
|
"rewards/margins": 6.9505295753479, |
|
"rewards/rejected": -9.630447387695312, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.632596685082872e-08, |
|
"logits/chosen": -2.856194257736206, |
|
"logits/rejected": -2.761082410812378, |
|
"logps/chosen": -218.3007049560547, |
|
"logps/rejected": -234.4198455810547, |
|
"loss": 0.1398, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.6605896949768066, |
|
"rewards/margins": 6.374427795410156, |
|
"rewards/rejected": -10.035017013549805, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.941988950276243e-08, |
|
"logits/chosen": -2.896955966949463, |
|
"logits/rejected": -2.786186933517456, |
|
"logps/chosen": -197.8193817138672, |
|
"logps/rejected": -214.23001098632812, |
|
"loss": 0.1442, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.441520690917969, |
|
"rewards/margins": 5.363109588623047, |
|
"rewards/rejected": -9.804631233215332, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.251381215469612e-08, |
|
"logits/chosen": -2.957908868789673, |
|
"logits/rejected": -2.8377201557159424, |
|
"logps/chosen": -270.6792907714844, |
|
"logps/rejected": -286.62213134765625, |
|
"loss": 0.1486, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.179241180419922, |
|
"rewards/margins": 6.042364597320557, |
|
"rewards/rejected": -12.22160530090332, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.560773480662984e-08, |
|
"logits/chosen": -2.774834156036377, |
|
"logits/rejected": -2.703329563140869, |
|
"logps/chosen": -202.3549041748047, |
|
"logps/rejected": -244.16653442382812, |
|
"loss": 0.1335, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.156593322753906, |
|
"rewards/margins": 6.949100494384766, |
|
"rewards/rejected": -11.105693817138672, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.870165745856354e-08, |
|
"logits/chosen": -2.8945531845092773, |
|
"logits/rejected": -2.7444348335266113, |
|
"logps/chosen": -190.70558166503906, |
|
"logps/rejected": -232.6694793701172, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.4744460582733154, |
|
"rewards/margins": 8.159748077392578, |
|
"rewards/rejected": -10.634195327758789, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.1795580110497236e-08, |
|
"logits/chosen": -2.9436967372894287, |
|
"logits/rejected": -2.8356709480285645, |
|
"logps/chosen": -236.69949340820312, |
|
"logps/rejected": -253.46145629882812, |
|
"loss": 0.1855, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.680401802062988, |
|
"rewards/margins": 6.0413007736206055, |
|
"rewards/rejected": -10.72170352935791, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.488950276243094e-08, |
|
"logits/chosen": -2.7878644466400146, |
|
"logits/rejected": -2.745734930038452, |
|
"logps/chosen": -197.5469970703125, |
|
"logps/rejected": -246.12252807617188, |
|
"loss": 0.1106, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.669739246368408, |
|
"rewards/margins": 6.831077575683594, |
|
"rewards/rejected": -10.500818252563477, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7983425414364637e-08, |
|
"logits/chosen": -2.918865203857422, |
|
"logits/rejected": -2.7714521884918213, |
|
"logps/chosen": -224.0564727783203, |
|
"logps/rejected": -241.874267578125, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.486175537109375, |
|
"rewards/margins": 6.323982238769531, |
|
"rewards/rejected": -10.810157775878906, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.107734806629834e-08, |
|
"logits/chosen": -2.789961338043213, |
|
"logits/rejected": -2.702083110809326, |
|
"logps/chosen": -197.3814697265625, |
|
"logps/rejected": -234.6428680419922, |
|
"loss": 0.1653, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.8847968578338623, |
|
"rewards/margins": 6.971263885498047, |
|
"rewards/rejected": -10.856060981750488, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4171270718232044e-08, |
|
"logits/chosen": -2.886162042617798, |
|
"logits/rejected": -2.782167673110962, |
|
"logps/chosen": -251.7931365966797, |
|
"logps/rejected": -276.71856689453125, |
|
"loss": 0.1811, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.165857791900635, |
|
"rewards/margins": 6.922626495361328, |
|
"rewards/rejected": -12.088483810424805, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7265193370165747e-08, |
|
"logits/chosen": -2.998176336288452, |
|
"logits/rejected": -2.831453800201416, |
|
"logps/chosen": -238.40634155273438, |
|
"logps/rejected": -264.1795349121094, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.043548345565796, |
|
"rewards/margins": 8.052096366882324, |
|
"rewards/rejected": -11.0956449508667, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0359116022099446e-08, |
|
"logits/chosen": -2.931680202484131, |
|
"logits/rejected": -2.758117198944092, |
|
"logps/chosen": -215.9856719970703, |
|
"logps/rejected": -238.5784912109375, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.6980485916137695, |
|
"rewards/margins": 6.217514991760254, |
|
"rewards/rejected": -10.915563583374023, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.453038674033149e-09, |
|
"logits/chosen": -2.8354744911193848, |
|
"logits/rejected": -2.7321650981903076, |
|
"logps/chosen": -240.4857940673828, |
|
"logps/rejected": -271.0428771972656, |
|
"loss": 0.1495, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.8582940101623535, |
|
"rewards/margins": 7.768431186676025, |
|
"rewards/rejected": -11.626726150512695, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 805, |
|
"total_flos": 0.0, |
|
"train_loss": 0.18541636852003773, |
|
"train_runtime": 4261.3779, |
|
"train_samples_per_second": 3.022, |
|
"train_steps_per_second": 0.189 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 805, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|