|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 651, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 117760.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.36438828706741333, |
|
"log_odds_ratio": -0.6397662162780762, |
|
"logits/chosen": 3.8861491680145264, |
|
"logits/rejected": 5.231001853942871, |
|
"logps/chosen": -0.9861465692520142, |
|
"logps/rejected": -1.2529093027114868, |
|
"loss": 1.953, |
|
"nll_loss": 3.2415008544921875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04930732399225235, |
|
"rewards/margins": 0.013338141143321991, |
|
"rewards/rejected": -0.06264545768499374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 29184.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": 0.17107267677783966, |
|
"log_odds_ratio": -0.6301043033599854, |
|
"logits/chosen": 4.779696464538574, |
|
"logits/rejected": 5.251872539520264, |
|
"logps/chosen": -1.1045284271240234, |
|
"logps/rejected": -1.2445374727249146, |
|
"loss": 1.7108, |
|
"nll_loss": 1.8614288568496704, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05522642284631729, |
|
"rewards/margins": 0.007000453770160675, |
|
"rewards/rejected": -0.06222687289118767, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3932160.0, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": 0.478428453207016, |
|
"log_odds_ratio": -0.5682710409164429, |
|
"logits/chosen": 4.58956241607666, |
|
"logits/rejected": 5.215265274047852, |
|
"logps/chosen": -0.9884525537490845, |
|
"logps/rejected": -1.2604442834854126, |
|
"loss": 2.1071, |
|
"nll_loss": 1.525723934173584, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04942262917757034, |
|
"rewards/margins": 0.013599586673080921, |
|
"rewards/rejected": -0.06302221864461899, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 63232.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.2763148248195648, |
|
"log_odds_ratio": -0.6428317427635193, |
|
"logits/chosen": 5.248695373535156, |
|
"logits/rejected": 5.335747718811035, |
|
"logps/chosen": -0.9019734263420105, |
|
"logps/rejected": -1.058569312095642, |
|
"loss": 1.6367, |
|
"nll_loss": 1.17539644241333, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.045098677277565, |
|
"rewards/margins": 0.007829795591533184, |
|
"rewards/rejected": -0.052928466349840164, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1802240.0, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": -0.07109338045120239, |
|
"log_odds_ratio": -0.9068069458007812, |
|
"logits/chosen": 4.34699821472168, |
|
"logits/rejected": 5.148941993713379, |
|
"logps/chosen": -1.0307292938232422, |
|
"logps/rejected": -1.0001896619796753, |
|
"loss": 2.0499, |
|
"nll_loss": 2.4581282138824463, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05153647065162659, |
|
"rewards/margins": -0.0015269846189767122, |
|
"rewards/rejected": -0.05000948905944824, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 473088.0, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.7022095918655396, |
|
"log_odds_ratio": -0.47877854108810425, |
|
"logits/chosen": 5.137725830078125, |
|
"logits/rejected": 5.073107719421387, |
|
"logps/chosen": -0.7480964660644531, |
|
"logps/rejected": -1.172572374343872, |
|
"loss": 1.9116, |
|
"nll_loss": 1.2398216724395752, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.037404827773571014, |
|
"rewards/margins": 0.021223794668912888, |
|
"rewards/rejected": -0.058628618717193604, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4161536.0, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": -0.30822521448135376, |
|
"log_odds_ratio": -1.0616459846496582, |
|
"logits/chosen": 4.378929615020752, |
|
"logits/rejected": 5.239219665527344, |
|
"logps/chosen": -1.115562081336975, |
|
"logps/rejected": -0.8684147596359253, |
|
"loss": 2.0166, |
|
"nll_loss": 2.142368793487549, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05577809736132622, |
|
"rewards/margins": -0.012357364408671856, |
|
"rewards/rejected": -0.043420739471912384, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 211968.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.38707518577575684, |
|
"log_odds_ratio": -0.5776039361953735, |
|
"logits/chosen": 5.019408226013184, |
|
"logits/rejected": 5.3371453285217285, |
|
"logps/chosen": -0.9375723004341125, |
|
"logps/rejected": -1.1710981130599976, |
|
"loss": 1.8918, |
|
"nll_loss": 1.6274166107177734, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.046878617256879807, |
|
"rewards/margins": 0.011676294729113579, |
|
"rewards/rejected": -0.058554910123348236, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1704.0, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.5051761865615845, |
|
"log_odds_ratio": -0.5127500295639038, |
|
"logits/chosen": 4.478859901428223, |
|
"logits/rejected": 4.748915672302246, |
|
"logps/chosen": -0.8121053576469421, |
|
"logps/rejected": -1.1007237434387207, |
|
"loss": 1.8015, |
|
"nll_loss": 1.57842218875885, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04060526192188263, |
|
"rewards/margins": 0.014430919662117958, |
|
"rewards/rejected": -0.055036187171936035, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 1.037414789199829, |
|
"log_odds_ratio": -0.3248421549797058, |
|
"logits/chosen": 4.653676509857178, |
|
"logits/rejected": 5.350204944610596, |
|
"logps/chosen": -0.6695261001586914, |
|
"logps/rejected": -1.300445795059204, |
|
"loss": 0.9356, |
|
"nll_loss": 0.7057152986526489, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03347630053758621, |
|
"rewards/margins": 0.031545985490083694, |
|
"rewards/rejected": -0.0650222972035408, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 0.6677854061126709, |
|
"log_odds_ratio": -0.5610898733139038, |
|
"logits/chosen": 4.671368598937988, |
|
"logits/rejected": 5.119524002075195, |
|
"logps/chosen": -0.8684868812561035, |
|
"logps/rejected": -1.2020485401153564, |
|
"loss": 0.8288, |
|
"nll_loss": 0.9947482347488403, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.043424345552921295, |
|
"rewards/margins": 0.01667807623744011, |
|
"rewards/rejected": -0.06010241433978081, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 0.5597886443138123, |
|
"log_odds_ratio": -0.5193617343902588, |
|
"logits/chosen": 5.5248026847839355, |
|
"logits/rejected": 6.067958354949951, |
|
"logps/chosen": -0.9015194773674011, |
|
"logps/rejected": -1.2406196594238281, |
|
"loss": 0.7451, |
|
"nll_loss": 0.8342186212539673, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.045075975358486176, |
|
"rewards/margins": 0.01695500686764717, |
|
"rewards/rejected": -0.06203098222613335, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.875, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 0.26957136392593384, |
|
"log_odds_ratio": -0.7732787728309631, |
|
"logits/chosen": 4.8973588943481445, |
|
"logits/rejected": 5.552582263946533, |
|
"logps/chosen": -0.877202033996582, |
|
"logps/rejected": -0.9518612623214722, |
|
"loss": 0.7319, |
|
"nll_loss": 0.6940464377403259, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04386010393500328, |
|
"rewards/margins": 0.0037329583428800106, |
|
"rewards/rejected": -0.04759306460618973, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.5, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": 0.7680839896202087, |
|
"log_odds_ratio": -0.5321913957595825, |
|
"logits/chosen": 5.471996307373047, |
|
"logits/rejected": 5.644137382507324, |
|
"logps/chosen": -0.6714180111885071, |
|
"logps/rejected": -0.9587985277175903, |
|
"loss": 0.732, |
|
"nll_loss": 0.6440631151199341, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.033570900559425354, |
|
"rewards/margins": 0.014369020238518715, |
|
"rewards/rejected": -0.047939930111169815, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.375, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 0.5068908929824829, |
|
"log_odds_ratio": -0.604145884513855, |
|
"logits/chosen": 5.474297523498535, |
|
"logits/rejected": 5.376832485198975, |
|
"logps/chosen": -0.8357957005500793, |
|
"logps/rejected": -1.0136160850524902, |
|
"loss": 0.706, |
|
"nll_loss": 0.6737378835678101, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04178978502750397, |
|
"rewards/margins": 0.00889101903885603, |
|
"rewards/rejected": -0.05068080872297287, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 0.615983784198761, |
|
"log_odds_ratio": -0.4876289963722229, |
|
"logits/chosen": 5.473410129547119, |
|
"logits/rejected": 6.06318998336792, |
|
"logps/chosen": -0.9676389694213867, |
|
"logps/rejected": -1.349385142326355, |
|
"loss": 0.6996, |
|
"nll_loss": 0.6852242350578308, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.048381954431533813, |
|
"rewards/margins": 0.019087309017777443, |
|
"rewards/rejected": -0.06746925413608551, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 0.4551977515220642, |
|
"log_odds_ratio": -0.5428072214126587, |
|
"logits/chosen": 4.785284042358398, |
|
"logits/rejected": 6.005092620849609, |
|
"logps/chosen": -0.7350739240646362, |
|
"logps/rejected": -1.0496256351470947, |
|
"loss": 0.6959, |
|
"nll_loss": 0.5339438319206238, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03675369173288345, |
|
"rewards/margins": 0.015727588906884193, |
|
"rewards/rejected": -0.052481282502412796, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 3.72677996249965e-06, |
|
"log_odds_chosen": 0.5587902665138245, |
|
"log_odds_ratio": -0.6063727140426636, |
|
"logits/chosen": 4.6595892906188965, |
|
"logits/rejected": 5.4700422286987305, |
|
"logps/chosen": -0.7482207417488098, |
|
"logps/rejected": -0.9887701272964478, |
|
"loss": 0.7233, |
|
"nll_loss": 0.5874465703964233, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03741103783249855, |
|
"rewards/margins": 0.012027469463646412, |
|
"rewards/rejected": -0.04943850636482239, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 3.6273812505500587e-06, |
|
"log_odds_chosen": 0.9965683817863464, |
|
"log_odds_ratio": -0.4162277281284332, |
|
"logits/chosen": 5.304540157318115, |
|
"logits/rejected": 5.486930847167969, |
|
"logps/chosen": -0.7579169869422913, |
|
"logps/rejected": -1.1843591928482056, |
|
"loss": 0.7298, |
|
"nll_loss": 0.6787526607513428, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03789585083723068, |
|
"rewards/margins": 0.021322116255760193, |
|
"rewards/rejected": -0.059217967092990875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 3.5355339059327378e-06, |
|
"log_odds_chosen": 0.2911016047000885, |
|
"log_odds_ratio": -0.6208275556564331, |
|
"logits/chosen": 5.865508556365967, |
|
"logits/rejected": 5.9140448570251465, |
|
"logps/chosen": -1.0318800210952759, |
|
"logps/rejected": -1.2233208417892456, |
|
"loss": 0.6888, |
|
"nll_loss": 0.8277570009231567, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.051594000309705734, |
|
"rewards/margins": 0.009572046808898449, |
|
"rewards/rejected": -0.061166055500507355, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.546875, |
|
"learning_rate": 3.450327796711771e-06, |
|
"log_odds_chosen": 0.3929597735404968, |
|
"log_odds_ratio": -0.6252869367599487, |
|
"logits/chosen": 5.480368137359619, |
|
"logits/rejected": 5.818605899810791, |
|
"logps/chosen": -0.8382253646850586, |
|
"logps/rejected": -1.1194109916687012, |
|
"loss": 0.703, |
|
"nll_loss": 0.7914389967918396, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04191126674413681, |
|
"rewards/margins": 0.014059278182685375, |
|
"rewards/rejected": -0.05597054958343506, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 3.3709993123162106e-06, |
|
"log_odds_chosen": 1.1686198711395264, |
|
"log_odds_ratio": -0.39844751358032227, |
|
"logits/chosen": 4.818378448486328, |
|
"logits/rejected": 5.660789966583252, |
|
"logps/chosen": -0.5040851831436157, |
|
"logps/rejected": -0.9685913324356079, |
|
"loss": 0.6554, |
|
"nll_loss": 0.49605101346969604, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.025204259902238846, |
|
"rewards/margins": 0.02322530373930931, |
|
"rewards/rejected": -0.04842956364154816, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 3.296902366978936e-06, |
|
"log_odds_chosen": 0.7159255743026733, |
|
"log_odds_ratio": -0.5276229977607727, |
|
"logits/chosen": 4.3275017738342285, |
|
"logits/rejected": 5.1829423904418945, |
|
"logps/chosen": -0.7593253254890442, |
|
"logps/rejected": -1.0148638486862183, |
|
"loss": 0.6289, |
|
"nll_loss": 0.609928548336029, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03796626627445221, |
|
"rewards/margins": 0.012776928022503853, |
|
"rewards/rejected": -0.05074319988489151, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 2.375, |
|
"learning_rate": 3.2274861218395142e-06, |
|
"log_odds_chosen": 0.7326894998550415, |
|
"log_odds_ratio": -0.5214331150054932, |
|
"logits/chosen": 4.783654689788818, |
|
"logits/rejected": 5.283537864685059, |
|
"logps/chosen": -0.7465990781784058, |
|
"logps/rejected": -0.9910147786140442, |
|
"loss": 0.6382, |
|
"nll_loss": 0.7347540855407715, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03732995316386223, |
|
"rewards/margins": 0.012220785021781921, |
|
"rewards/rejected": -0.04955074191093445, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 0.040362291038036346, |
|
"log_odds_ratio": -0.7654204964637756, |
|
"logits/chosen": 4.929324150085449, |
|
"logits/rejected": 4.940483570098877, |
|
"logps/chosen": -0.939703106880188, |
|
"logps/rejected": -0.9395262598991394, |
|
"loss": 0.6626, |
|
"nll_loss": 0.7169132232666016, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.04698516055941582, |
|
"rewards/margins": -8.843839168548584e-06, |
|
"rewards/rejected": -0.04697632044553757, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 3.1008683647302113e-06, |
|
"log_odds_chosen": 0.8304751515388489, |
|
"log_odds_ratio": -0.4627406597137451, |
|
"logits/chosen": 4.34907341003418, |
|
"logits/rejected": 4.541801929473877, |
|
"logps/chosen": -0.7797168493270874, |
|
"logps/rejected": -1.0878037214279175, |
|
"loss": 0.6408, |
|
"nll_loss": 0.6424815058708191, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03898584097623825, |
|
"rewards/margins": 0.015404346399009228, |
|
"rewards/rejected": -0.05439019203186035, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.328125, |
|
"learning_rate": 3.0429030972509227e-06, |
|
"log_odds_chosen": 0.2547241747379303, |
|
"log_odds_ratio": -0.7041358351707458, |
|
"logits/chosen": 4.1212077140808105, |
|
"logits/rejected": 5.139257431030273, |
|
"logps/chosen": -0.5988011360168457, |
|
"logps/rejected": -0.7647382020950317, |
|
"loss": 0.6441, |
|
"nll_loss": 0.4384763836860657, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.029940057545900345, |
|
"rewards/margins": 0.008296851068735123, |
|
"rewards/rejected": -0.03823690861463547, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 2.988071523335984e-06, |
|
"log_odds_chosen": 0.7432643175125122, |
|
"log_odds_ratio": -0.4928904175758362, |
|
"logits/chosen": 4.240169525146484, |
|
"logits/rejected": 4.746310234069824, |
|
"logps/chosen": -0.7583116292953491, |
|
"logps/rejected": -1.0217373371124268, |
|
"loss": 0.6349, |
|
"nll_loss": 0.5912537574768066, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03791557624936104, |
|
"rewards/margins": 0.013171288184821606, |
|
"rewards/rejected": -0.05108686536550522, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 2.5, |
|
"learning_rate": 2.9361010975735177e-06, |
|
"log_odds_chosen": 0.6404408812522888, |
|
"log_odds_ratio": -0.5461726784706116, |
|
"logits/chosen": 4.347890377044678, |
|
"logits/rejected": 5.2955708503723145, |
|
"logps/chosen": -0.8145158886909485, |
|
"logps/rejected": -1.124975323677063, |
|
"loss": 0.6204, |
|
"nll_loss": 0.5651360154151917, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.040725789964199066, |
|
"rewards/margins": 0.015522971749305725, |
|
"rewards/rejected": -0.05624876171350479, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 2.8867513459481293e-06, |
|
"log_odds_chosen": 0.4704459607601166, |
|
"log_odds_ratio": -0.6623938083648682, |
|
"logits/chosen": 4.255876064300537, |
|
"logits/rejected": 5.063040733337402, |
|
"logps/chosen": -0.7718355059623718, |
|
"logps/rejected": -1.144460916519165, |
|
"loss": 0.6404, |
|
"nll_loss": 0.6724303364753723, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03859177231788635, |
|
"rewards/margins": 0.01863126829266548, |
|
"rewards/rejected": -0.05722304433584213, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 2.839809171235324e-06, |
|
"log_odds_chosen": 1.5952459573745728, |
|
"log_odds_ratio": -0.2707791328430176, |
|
"logits/chosen": 2.7694969177246094, |
|
"logits/rejected": 5.479510307312012, |
|
"logps/chosen": -0.4962679445743561, |
|
"logps/rejected": -1.2316776514053345, |
|
"loss": 0.6428, |
|
"nll_loss": 0.3623020648956299, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.024813394993543625, |
|
"rewards/margins": 0.03677048534154892, |
|
"rewards/rejected": -0.06158388406038284, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 2.7950849718747376e-06, |
|
"log_odds_chosen": 0.4402007460594177, |
|
"log_odds_ratio": -0.5388344526290894, |
|
"logits/chosen": 4.8701372146606445, |
|
"logits/rejected": 4.049181938171387, |
|
"logps/chosen": -0.8427563905715942, |
|
"logps/rejected": -1.1280080080032349, |
|
"loss": 0.6661, |
|
"nll_loss": 0.6774541735649109, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04213782027363777, |
|
"rewards/margins": 0.014262576587498188, |
|
"rewards/rejected": -0.056400395929813385, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 2.752409412815902e-06, |
|
"log_odds_chosen": 1.4536019563674927, |
|
"log_odds_ratio": -0.3178521990776062, |
|
"logits/chosen": 4.046222686767578, |
|
"logits/rejected": 4.855486869812012, |
|
"logps/chosen": -0.4614998400211334, |
|
"logps/rejected": -1.0025476217269897, |
|
"loss": 0.6396, |
|
"nll_loss": 0.46759381890296936, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.02307499200105667, |
|
"rewards/margins": 0.027052391320466995, |
|
"rewards/rejected": -0.05012737959623337, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 2.711630722733202e-06, |
|
"log_odds_chosen": 0.4552677273750305, |
|
"log_odds_ratio": -0.5441101789474487, |
|
"logits/chosen": 4.233187198638916, |
|
"logits/rejected": 4.776756286621094, |
|
"logps/chosen": -0.9984881281852722, |
|
"logps/rejected": -1.3039405345916748, |
|
"loss": 0.6326, |
|
"nll_loss": 0.7266319990158081, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04992440715432167, |
|
"rewards/margins": 0.01527262944728136, |
|
"rewards/rejected": -0.06519703567028046, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 2.6726124191242444e-06, |
|
"log_odds_chosen": 0.3951299488544464, |
|
"log_odds_ratio": -0.6442996263504028, |
|
"logits/chosen": 4.592418193817139, |
|
"logits/rejected": 4.885247707366943, |
|
"logps/chosen": -0.9690208435058594, |
|
"logps/rejected": -1.1191128492355347, |
|
"loss": 0.6271, |
|
"nll_loss": 0.7028160095214844, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04845104366540909, |
|
"rewards/margins": 0.007504602428525686, |
|
"rewards/rejected": -0.055955640971660614, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 2.6352313834736496e-06, |
|
"log_odds_chosen": 0.6397253274917603, |
|
"log_odds_ratio": -0.4948647916316986, |
|
"logits/chosen": 3.1035220623016357, |
|
"logits/rejected": 4.4074320793151855, |
|
"logps/chosen": -0.7063679695129395, |
|
"logps/rejected": -1.086042881011963, |
|
"loss": 0.6133, |
|
"nll_loss": 0.5209956765174866, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03531839698553085, |
|
"rewards/margins": 0.0189837496727705, |
|
"rewards/rejected": -0.054302144795656204, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 2.599376224550182e-06, |
|
"log_odds_chosen": 0.5072129368782043, |
|
"log_odds_ratio": -0.5375211834907532, |
|
"logits/chosen": 4.4618144035339355, |
|
"logits/rejected": 4.897726535797119, |
|
"logps/chosen": -0.8658114671707153, |
|
"logps/rejected": -1.161678433418274, |
|
"loss": 0.625, |
|
"nll_loss": 0.7147814035415649, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.043290577828884125, |
|
"rewards/margins": 0.014793348498642445, |
|
"rewards/rejected": -0.058083921670913696, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 2.564945880212886e-06, |
|
"log_odds_chosen": 0.5736058950424194, |
|
"log_odds_ratio": -0.4948197305202484, |
|
"logits/chosen": 4.31764554977417, |
|
"logits/rejected": 4.153486251831055, |
|
"logps/chosen": -0.8540223836898804, |
|
"logps/rejected": -1.1471771001815796, |
|
"loss": 0.6393, |
|
"nll_loss": 0.6763076186180115, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0427011176943779, |
|
"rewards/margins": 0.014657735824584961, |
|
"rewards/rejected": -0.05735884979367256, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 2.5318484177091667e-06, |
|
"log_odds_chosen": 0.8381564021110535, |
|
"log_odds_ratio": -0.5308811068534851, |
|
"logits/chosen": 4.037534236907959, |
|
"logits/rejected": 5.888669013977051, |
|
"logps/chosen": -0.700161337852478, |
|
"logps/rejected": -1.2042081356048584, |
|
"loss": 0.6318, |
|
"nll_loss": 0.5512461066246033, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03500806540250778, |
|
"rewards/margins": 0.025202345103025436, |
|
"rewards/rejected": -0.060210417956113815, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.7038768529891968, |
|
"log_odds_ratio": -0.43052348494529724, |
|
"logits/chosen": 3.822885036468506, |
|
"logits/rejected": 4.210227012634277, |
|
"logps/chosen": -0.6150542497634888, |
|
"logps/rejected": -0.9889954328536987, |
|
"loss": 0.6218, |
|
"nll_loss": 0.5013046264648438, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.030752714723348618, |
|
"rewards/margins": 0.01869705691933632, |
|
"rewards/rejected": -0.04944976791739464, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 2.4693239916239746e-06, |
|
"log_odds_chosen": 0.49417972564697266, |
|
"log_odds_ratio": -0.5454962253570557, |
|
"logits/chosen": 3.7158710956573486, |
|
"logits/rejected": 4.625822067260742, |
|
"logps/chosen": -0.7136448621749878, |
|
"logps/rejected": -0.9806584119796753, |
|
"loss": 0.6163, |
|
"nll_loss": 0.5766875147819519, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03568224236369133, |
|
"rewards/margins": 0.013350683264434338, |
|
"rewards/rejected": -0.04903292655944824, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 2.4397501823713327e-06, |
|
"log_odds_chosen": 1.2905668020248413, |
|
"log_odds_ratio": -0.3054632544517517, |
|
"logits/chosen": 4.375031471252441, |
|
"logits/rejected": 5.165828704833984, |
|
"logps/chosen": -0.6634560823440552, |
|
"logps/rejected": -1.2297804355621338, |
|
"loss": 0.6299, |
|
"nll_loss": 0.5654190182685852, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03317280486226082, |
|
"rewards/margins": 0.02831621840596199, |
|
"rewards/rejected": -0.06148902326822281, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 2.411214110852061e-06, |
|
"log_odds_chosen": 0.4614163041114807, |
|
"log_odds_ratio": -0.5477044582366943, |
|
"logits/chosen": 3.945091724395752, |
|
"logits/rejected": 4.783943176269531, |
|
"logps/chosen": -0.670985758304596, |
|
"logps/rejected": -0.8528381586074829, |
|
"loss": 0.6328, |
|
"nll_loss": 0.5353778004646301, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03354928642511368, |
|
"rewards/margins": 0.009092616848647594, |
|
"rewards/rejected": -0.04264190047979355, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 2.3836564731139807e-06, |
|
"log_odds_chosen": 0.519318699836731, |
|
"log_odds_ratio": -0.5034213066101074, |
|
"logits/chosen": 3.990828037261963, |
|
"logits/rejected": 4.283727645874023, |
|
"logps/chosen": -0.7843809723854065, |
|
"logps/rejected": -1.1084554195404053, |
|
"loss": 0.598, |
|
"nll_loss": 0.6064985394477844, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03921904414892197, |
|
"rewards/margins": 0.01620371639728546, |
|
"rewards/rejected": -0.055422764271497726, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 2.357022603955159e-06, |
|
"log_odds_chosen": 1.2161670923233032, |
|
"log_odds_ratio": -0.5558447241783142, |
|
"logits/chosen": 2.7631869316101074, |
|
"logits/rejected": 4.014997959136963, |
|
"logps/chosen": -0.4891352653503418, |
|
"logps/rejected": -1.057556390762329, |
|
"loss": 0.6063, |
|
"nll_loss": 0.5005042552947998, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.02445676364004612, |
|
"rewards/margins": 0.028421055525541306, |
|
"rewards/rejected": -0.052877821028232574, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 2.3312620206007847e-06, |
|
"log_odds_chosen": 0.8278636932373047, |
|
"log_odds_ratio": -0.43884754180908203, |
|
"logits/chosen": 4.009448051452637, |
|
"logits/rejected": 4.671367645263672, |
|
"logps/chosen": -0.7134698629379272, |
|
"logps/rejected": -1.146784782409668, |
|
"loss": 0.5862, |
|
"nll_loss": 0.5619599223136902, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03567349165678024, |
|
"rewards/margins": 0.021665748208761215, |
|
"rewards/rejected": -0.05733924359083176, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 2.3063280200722128e-06, |
|
"log_odds_chosen": 1.677671194076538, |
|
"log_odds_ratio": -0.2895694375038147, |
|
"logits/chosen": 2.985790491104126, |
|
"logits/rejected": 4.190914630889893, |
|
"logps/chosen": -0.5018793344497681, |
|
"logps/rejected": -1.0572091341018677, |
|
"loss": 0.5765, |
|
"nll_loss": 0.5001329183578491, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.025093963369727135, |
|
"rewards/margins": 0.02776649035513401, |
|
"rewards/rejected": -0.05286044999957085, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 2.0, |
|
"learning_rate": 2.2821773229381924e-06, |
|
"log_odds_chosen": 1.0791471004486084, |
|
"log_odds_ratio": -0.37350553274154663, |
|
"logits/chosen": 3.676426649093628, |
|
"logits/rejected": 3.8374907970428467, |
|
"logps/chosen": -0.7438164353370667, |
|
"logps/rejected": -1.29355788230896, |
|
"loss": 0.5652, |
|
"nll_loss": 0.6555451154708862, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.037190817296504974, |
|
"rewards/margins": 0.027487074956297874, |
|
"rewards/rejected": -0.0646779015660286, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 2.2587697572631284e-06, |
|
"log_odds_chosen": 0.275502473115921, |
|
"log_odds_ratio": -0.7135687470436096, |
|
"logits/chosen": 4.321534156799316, |
|
"logits/rejected": 4.41732120513916, |
|
"logps/chosen": -0.9727070927619934, |
|
"logps/rejected": -1.0810346603393555, |
|
"loss": 0.5952, |
|
"nll_loss": 0.7110171914100647, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04863535612821579, |
|
"rewards/margins": 0.005416377447545528, |
|
"rewards/rejected": -0.05405173450708389, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 2.23606797749979e-06, |
|
"log_odds_chosen": 0.34863442182540894, |
|
"log_odds_ratio": -0.6463712453842163, |
|
"logits/chosen": 4.6876606941223145, |
|
"logits/rejected": 5.054124355316162, |
|
"logps/chosen": -0.9338000416755676, |
|
"logps/rejected": -1.1037800312042236, |
|
"loss": 0.5953, |
|
"nll_loss": 0.8528131246566772, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04669000208377838, |
|
"rewards/margins": 0.008499005809426308, |
|
"rewards/rejected": -0.05518900603055954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 2.2140372138502386e-06, |
|
"log_odds_chosen": 0.9548345804214478, |
|
"log_odds_ratio": -0.39882007241249084, |
|
"logits/chosen": 3.5289406776428223, |
|
"logits/rejected": 3.8287463188171387, |
|
"logps/chosen": -0.6570809483528137, |
|
"logps/rejected": -1.1388274431228638, |
|
"loss": 0.609, |
|
"nll_loss": 0.5968061685562134, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.032854050397872925, |
|
"rewards/margins": 0.024087321013212204, |
|
"rewards/rejected": -0.05694136768579483, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 2.1926450482675734e-06, |
|
"log_odds_chosen": 0.4539831280708313, |
|
"log_odds_ratio": -0.5872747302055359, |
|
"logits/chosen": 3.2061939239501953, |
|
"logits/rejected": 4.589787006378174, |
|
"logps/chosen": -0.7979894280433655, |
|
"logps/rejected": -1.0285401344299316, |
|
"loss": 0.5827, |
|
"nll_loss": 0.6084668636322021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.039899468421936035, |
|
"rewards/margins": 0.011527536436915398, |
|
"rewards/rejected": -0.051426999270915985, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 2.1718612138153473e-06, |
|
"log_odds_chosen": 0.8493059277534485, |
|
"log_odds_ratio": -0.6372500658035278, |
|
"logits/chosen": 3.078615665435791, |
|
"logits/rejected": 4.099945068359375, |
|
"logps/chosen": -0.6704202890396118, |
|
"logps/rejected": -0.7899671792984009, |
|
"loss": 0.5788, |
|
"nll_loss": 0.5733928084373474, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03352101519703865, |
|
"rewards/margins": 0.005977341439574957, |
|
"rewards/rejected": -0.039498358964920044, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 2.151657414559676e-06, |
|
"log_odds_chosen": 0.6374627351760864, |
|
"log_odds_ratio": -0.5592355728149414, |
|
"logits/chosen": 3.680483341217041, |
|
"logits/rejected": 3.9816291332244873, |
|
"logps/chosen": -0.8559755086898804, |
|
"logps/rejected": -1.1612054109573364, |
|
"loss": 0.6003, |
|
"nll_loss": 0.6403124928474426, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04279877990484238, |
|
"rewards/margins": 0.015261486172676086, |
|
"rewards/rejected": -0.05806026607751846, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 2.132007163556104e-06, |
|
"log_odds_chosen": 1.399209976196289, |
|
"log_odds_ratio": -0.5735031366348267, |
|
"logits/chosen": 3.132289171218872, |
|
"logits/rejected": 3.5427193641662598, |
|
"logps/chosen": -0.5963010191917419, |
|
"logps/rejected": -0.9639393091201782, |
|
"loss": 0.5984, |
|
"nll_loss": 0.5058175325393677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.029815051704645157, |
|
"rewards/margins": 0.018381912261247635, |
|
"rewards/rejected": -0.04819696769118309, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 2.1128856368212917e-06, |
|
"log_odds_chosen": 0.688880443572998, |
|
"log_odds_ratio": -0.4902462959289551, |
|
"logits/chosen": 2.6950721740722656, |
|
"logits/rejected": 3.1528286933898926, |
|
"logps/chosen": -0.6383022665977478, |
|
"logps/rejected": -0.9691828489303589, |
|
"loss": 0.5718, |
|
"nll_loss": 0.4289799630641937, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03191510960459709, |
|
"rewards/margins": 0.016544032841920853, |
|
"rewards/rejected": -0.048459142446517944, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 2.421875, |
|
"learning_rate": 2.0942695414584777e-06, |
|
"log_odds_chosen": 1.3283271789550781, |
|
"log_odds_ratio": -0.3012233078479767, |
|
"logits/chosen": 3.4564871788024902, |
|
"logits/rejected": 4.7043867111206055, |
|
"logps/chosen": -0.6779360771179199, |
|
"logps/rejected": -1.523970365524292, |
|
"loss": 0.6138, |
|
"nll_loss": 0.5768535137176514, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.033896803855895996, |
|
"rewards/margins": 0.042301714420318604, |
|
"rewards/rejected": -0.0761985182762146, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.953125, |
|
"learning_rate": 2.0761369963434992e-06, |
|
"log_odds_chosen": 1.4566174745559692, |
|
"log_odds_ratio": -0.32581037282943726, |
|
"logits/chosen": 2.691676616668701, |
|
"logits/rejected": 4.661564826965332, |
|
"logps/chosen": -0.4493564963340759, |
|
"logps/rejected": -1.0139671564102173, |
|
"loss": 0.5782, |
|
"nll_loss": 0.37120580673217773, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.022467825561761856, |
|
"rewards/margins": 0.028230536729097366, |
|
"rewards/rejected": -0.05069836229085922, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 2.058467423981546e-06, |
|
"log_odds_chosen": 1.0190517902374268, |
|
"log_odds_ratio": -0.5730624198913574, |
|
"logits/chosen": 3.407086133956909, |
|
"logits/rejected": 4.482596397399902, |
|
"logps/chosen": -0.7345553040504456, |
|
"logps/rejected": -0.9309635162353516, |
|
"loss": 0.5723, |
|
"nll_loss": 0.5519307851791382, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03672776371240616, |
|
"rewards/margins": 0.009820410050451756, |
|
"rewards/rejected": -0.04654817283153534, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 2.375, |
|
"learning_rate": 2.0412414523193154e-06, |
|
"log_odds_chosen": 1.107779860496521, |
|
"log_odds_ratio": -0.40593117475509644, |
|
"logits/chosen": 3.215078830718994, |
|
"logits/rejected": 4.503358840942383, |
|
"logps/chosen": -0.663019597530365, |
|
"logps/rejected": -1.2786920070648193, |
|
"loss": 0.5815, |
|
"nll_loss": 0.5633824467658997, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03315097838640213, |
|
"rewards/margins": 0.030783619731664658, |
|
"rewards/rejected": -0.06393460184335709, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 2.0244408254472904e-06, |
|
"log_odds_chosen": 0.7602224349975586, |
|
"log_odds_ratio": -0.5018362998962402, |
|
"logits/chosen": 3.604353666305542, |
|
"logits/rejected": 4.481316089630127, |
|
"logps/chosen": -0.7105517387390137, |
|
"logps/rejected": -1.0740478038787842, |
|
"loss": 0.5873, |
|
"nll_loss": 0.5312780737876892, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.035527586936950684, |
|
"rewards/margins": 0.018174810335040092, |
|
"rewards/rejected": -0.05370239168405533, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 2.0080483222562476e-06, |
|
"log_odds_chosen": 1.3286904096603394, |
|
"log_odds_ratio": -0.36574870347976685, |
|
"logits/chosen": 3.620469331741333, |
|
"logits/rejected": 4.373411655426025, |
|
"logps/chosen": -0.4990506172180176, |
|
"logps/rejected": -0.953050971031189, |
|
"loss": 0.5716, |
|
"nll_loss": 0.5527733564376831, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.024952532723546028, |
|
"rewards/margins": 0.022700021043419838, |
|
"rewards/rejected": -0.047652553766965866, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 1.9920476822239895e-06, |
|
"log_odds_chosen": 0.4847317636013031, |
|
"log_odds_ratio": -0.5640643835067749, |
|
"logits/chosen": 3.125113010406494, |
|
"logits/rejected": 3.340205669403076, |
|
"logps/chosen": -0.8360971212387085, |
|
"logps/rejected": -1.0480194091796875, |
|
"loss": 0.5738, |
|
"nll_loss": 0.6136351823806763, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.041804857552051544, |
|
"rewards/margins": 0.010596117004752159, |
|
"rewards/rejected": -0.05240097641944885, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 1.976423537605237e-06, |
|
"log_odds_chosen": 0.8931509256362915, |
|
"log_odds_ratio": -0.40087467432022095, |
|
"logits/chosen": 3.574153423309326, |
|
"logits/rejected": 4.537802219390869, |
|
"logps/chosen": -0.6440940499305725, |
|
"logps/rejected": -1.088226556777954, |
|
"loss": 0.5846, |
|
"nll_loss": 0.5598152875900269, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.032204702496528625, |
|
"rewards/margins": 0.0222066268324852, |
|
"rewards/rejected": -0.054411329329013824, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 1.961161351381841e-06, |
|
"log_odds_chosen": 1.2053475379943848, |
|
"log_odds_ratio": -0.430248886346817, |
|
"logits/chosen": 2.245370388031006, |
|
"logits/rejected": 3.5309462547302246, |
|
"logps/chosen": -0.5642444491386414, |
|
"logps/rejected": -0.9910544157028198, |
|
"loss": 0.5605, |
|
"nll_loss": 0.45886915922164917, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.028212225064635277, |
|
"rewards/margins": 0.021340493112802505, |
|
"rewards/rejected": -0.04955272004008293, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 651, |
|
"total_flos": 0.0, |
|
"train_loss": 0.812556631554107, |
|
"train_runtime": 4771.9621, |
|
"train_samples_per_second": 4.358, |
|
"train_steps_per_second": 0.136 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 651, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|