|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6543432030099787, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"logits/chosen": -1.9473017454147339, |
|
"logits/rejected": -1.9154374599456787, |
|
"logps/chosen": -178.9344940185547, |
|
"logps/rejected": -157.74179077148438, |
|
"loss": 0.6983, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0024076940026134253, |
|
"rewards/margins": -0.008672237396240234, |
|
"rewards/rejected": 0.011079930700361729, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -1.6428608894348145, |
|
"logits/rejected": -1.720033884048462, |
|
"logps/chosen": -144.77987670898438, |
|
"logps/rejected": -144.8594207763672, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.010233569890260696, |
|
"rewards/margins": 0.00934591330587864, |
|
"rewards/rejected": 0.0008876564679667354, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": -2.0388426780700684, |
|
"logits/rejected": -1.988499402999878, |
|
"logps/chosen": -212.5189208984375, |
|
"logps/rejected": -199.8199462890625, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.010333014652132988, |
|
"rewards/margins": 0.005366659723222256, |
|
"rewards/rejected": 0.004966353997588158, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.9381685256958008, |
|
"logits/rejected": -1.9115777015686035, |
|
"logps/chosen": -141.91700744628906, |
|
"logps/rejected": -141.35037231445312, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.006317043211311102, |
|
"rewards/margins": 0.01223981473594904, |
|
"rewards/rejected": -0.005922770127654076, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.0682923793792725, |
|
"logits/rejected": -2.116490364074707, |
|
"logps/chosen": -144.48883056640625, |
|
"logps/rejected": -136.4881591796875, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004738474264740944, |
|
"rewards/margins": 0.006069636438041925, |
|
"rewards/rejected": -0.0013311614748090506, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -1.8117401599884033, |
|
"logits/rejected": -1.763953447341919, |
|
"logps/chosen": -151.67367553710938, |
|
"logps/rejected": -137.02761840820312, |
|
"loss": 0.6984, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.004828071687370539, |
|
"rewards/margins": -0.008975815027952194, |
|
"rewards/rejected": 0.00414774427190423, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"logits/chosen": -2.0915303230285645, |
|
"logits/rejected": -2.1096673011779785, |
|
"logps/chosen": -146.31625366210938, |
|
"logps/rejected": -154.5668487548828, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.002810073085129261, |
|
"rewards/margins": -0.004074001684784889, |
|
"rewards/rejected": 0.001263928133994341, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.126337766647339, |
|
"logits/rejected": -2.126678228378296, |
|
"logps/chosen": -204.9049072265625, |
|
"logps/rejected": -203.91268920898438, |
|
"loss": 0.6962, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.008610963821411133, |
|
"rewards/margins": -0.005686474964022636, |
|
"rewards/rejected": 0.014297439716756344, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-06, |
|
"logits/chosen": -2.2768051624298096, |
|
"logits/rejected": -2.281789541244507, |
|
"logps/chosen": -153.13116455078125, |
|
"logps/rejected": -150.92642211914062, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01126334723085165, |
|
"rewards/margins": 0.012444520369172096, |
|
"rewards/rejected": -0.0011811736039817333, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -1.9441958665847778, |
|
"logits/rejected": -1.964548110961914, |
|
"logps/chosen": -149.23391723632812, |
|
"logps/rejected": -137.14862060546875, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.020777558907866478, |
|
"rewards/margins": 0.011875724419951439, |
|
"rewards/rejected": 0.008901833556592464, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.500000000000001e-06, |
|
"logits/chosen": -2.147075653076172, |
|
"logits/rejected": -2.1675617694854736, |
|
"logps/chosen": -174.22434997558594, |
|
"logps/rejected": -178.1639404296875, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.013791371136903763, |
|
"rewards/margins": -0.0025409706868231297, |
|
"rewards/rejected": 0.016332341358065605, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-06, |
|
"logits/chosen": -1.9246153831481934, |
|
"logits/rejected": -1.9109158515930176, |
|
"logps/chosen": -150.50784301757812, |
|
"logps/rejected": -180.81753540039062, |
|
"loss": 0.7001, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.012552834115922451, |
|
"rewards/margins": -0.012693023309111595, |
|
"rewards/rejected": 0.00014019012451171875, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"logits/chosen": -1.9389597177505493, |
|
"logits/rejected": -1.9608503580093384, |
|
"logps/chosen": -205.6465606689453, |
|
"logps/rejected": -201.75946044921875, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.014348938129842281, |
|
"rewards/margins": 0.013899493962526321, |
|
"rewards/rejected": 0.00044944253750145435, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.000000000000001e-06, |
|
"logits/chosen": -1.9243704080581665, |
|
"logits/rejected": -1.9085144996643066, |
|
"logps/chosen": -201.07188415527344, |
|
"logps/rejected": -221.47982788085938, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0006500715389847755, |
|
"rewards/margins": 0.013952446170151234, |
|
"rewards/rejected": -0.01460251584649086, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.5e-06, |
|
"logits/chosen": -2.1744699478149414, |
|
"logits/rejected": -2.13222599029541, |
|
"logps/chosen": -204.4735107421875, |
|
"logps/rejected": -200.4049072265625, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.017023086547851562, |
|
"rewards/margins": 0.02616300620138645, |
|
"rewards/rejected": -0.009139918722212315, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -2.1755266189575195, |
|
"logits/rejected": -2.150144338607788, |
|
"logps/chosen": -160.6802215576172, |
|
"logps/rejected": -157.8507080078125, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.007808923255652189, |
|
"rewards/margins": 0.009649563580751419, |
|
"rewards/rejected": -0.0018406407907605171, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.500000000000002e-06, |
|
"logits/chosen": -2.1014106273651123, |
|
"logits/rejected": -2.065537452697754, |
|
"logps/chosen": -137.4163360595703, |
|
"logps/rejected": -137.72653198242188, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.019019413739442825, |
|
"rewards/margins": 0.030721498653292656, |
|
"rewards/rejected": -0.01170208491384983, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9e-06, |
|
"logits/chosen": -2.126314401626587, |
|
"logits/rejected": -2.1292574405670166, |
|
"logps/chosen": -157.85987854003906, |
|
"logps/rejected": -175.0936279296875, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.008974838070571423, |
|
"rewards/margins": 0.007031917572021484, |
|
"rewards/rejected": 0.0019429202657192945, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.5e-06, |
|
"logits/chosen": -1.821614384651184, |
|
"logits/rejected": -1.8386234045028687, |
|
"logps/chosen": -157.74339294433594, |
|
"logps/rejected": -173.5108642578125, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0003479006700217724, |
|
"rewards/margins": 0.016933869570493698, |
|
"rewards/rejected": -0.016585970297455788, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -2.3475935459136963, |
|
"logits/rejected": -2.3527560234069824, |
|
"logps/chosen": -198.3360137939453, |
|
"logps/rejected": -197.68064880371094, |
|
"loss": 0.7084, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.014697913080453873, |
|
"rewards/margins": -0.029072880744934082, |
|
"rewards/rejected": 0.014374972321093082, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.05e-05, |
|
"logits/chosen": -1.9768266677856445, |
|
"logits/rejected": -1.979588508605957, |
|
"logps/chosen": -138.13516235351562, |
|
"logps/rejected": -137.23497009277344, |
|
"loss": 0.7008, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.008711148053407669, |
|
"rewards/margins": -0.014900517649948597, |
|
"rewards/rejected": 0.006189371459186077, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"logits/chosen": -2.020932197570801, |
|
"logits/rejected": -2.0543789863586426, |
|
"logps/chosen": -143.21487426757812, |
|
"logps/rejected": -145.47467041015625, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0006606103852391243, |
|
"rewards/margins": -0.019345475360751152, |
|
"rewards/rejected": 0.0200060848146677, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"logits/chosen": -1.9338953495025635, |
|
"logits/rejected": -1.8950414657592773, |
|
"logps/chosen": -151.0517578125, |
|
"logps/rejected": -144.9283447265625, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0207887664437294, |
|
"rewards/margins": 0.0420105941593647, |
|
"rewards/rejected": -0.02122182957828045, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2e-05, |
|
"logits/chosen": -2.1229336261749268, |
|
"logits/rejected": -2.1787612438201904, |
|
"logps/chosen": -143.4667510986328, |
|
"logps/rejected": -151.78887939453125, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0076406970620155334, |
|
"rewards/margins": -0.001663590781390667, |
|
"rewards/rejected": -0.0059771062806248665, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.25e-05, |
|
"logits/chosen": -1.9385018348693848, |
|
"logits/rejected": -1.8521825075149536, |
|
"logps/chosen": -136.98797607421875, |
|
"logps/rejected": -123.29915618896484, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.016515588387846947, |
|
"rewards/margins": 0.013585926033556461, |
|
"rewards/rejected": 0.0029296651482582092, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"logits/chosen": -2.0494840145111084, |
|
"logits/rejected": -2.0260462760925293, |
|
"logps/chosen": -206.23294067382812, |
|
"logps/rejected": -204.59170532226562, |
|
"loss": 0.7052, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.005298185162246227, |
|
"rewards/margins": -0.022758912295103073, |
|
"rewards/rejected": 0.01746072620153427, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"logits/chosen": -2.185777425765991, |
|
"logits/rejected": -2.16872501373291, |
|
"logps/chosen": -166.58811950683594, |
|
"logps/rejected": -168.48207092285156, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02519994042813778, |
|
"rewards/margins": -0.011203411035239697, |
|
"rewards/rejected": -0.013996529392898083, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"logits/chosen": -2.1363608837127686, |
|
"logits/rejected": -2.1508541107177734, |
|
"logps/chosen": -174.48770141601562, |
|
"logps/rejected": -169.1255645751953, |
|
"loss": 0.7026, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.008833074010908604, |
|
"rewards/margins": -0.01709108054637909, |
|
"rewards/rejected": 0.008258008398115635, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.45e-05, |
|
"logits/chosen": -2.1649487018585205, |
|
"logits/rejected": -2.170478582382202, |
|
"logps/chosen": -166.38059997558594, |
|
"logps/rejected": -170.26541137695312, |
|
"loss": 0.7105, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.028479862958192825, |
|
"rewards/margins": -0.0317809097468853, |
|
"rewards/rejected": 0.0033010481856763363, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.5e-05, |
|
"logits/chosen": -2.190495014190674, |
|
"logits/rejected": -2.2020809650421143, |
|
"logps/chosen": -167.83895874023438, |
|
"logps/rejected": -171.6207275390625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.007887793704867363, |
|
"rewards/margins": 0.0026388168334960938, |
|
"rewards/rejected": 0.005248976871371269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.55e-05, |
|
"logits/chosen": -2.0097718238830566, |
|
"logits/rejected": -1.9927585124969482, |
|
"logps/chosen": -130.2247314453125, |
|
"logps/rejected": -149.3783416748047, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.010196828283369541, |
|
"rewards/margins": 0.008201027289032936, |
|
"rewards/rejected": 0.0019958019256591797, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": -1.9905472993850708, |
|
"logits/rejected": -2.0040249824523926, |
|
"logps/chosen": -176.70541381835938, |
|
"logps/rejected": -200.47962951660156, |
|
"loss": 0.7047, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.0031967416871339083, |
|
"rewards/margins": -0.021031878888607025, |
|
"rewards/rejected": 0.024228623136878014, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.65e-05, |
|
"logits/chosen": -2.0395946502685547, |
|
"logits/rejected": -2.019467353820801, |
|
"logps/chosen": -136.9028778076172, |
|
"logps/rejected": -118.68156433105469, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02108776569366455, |
|
"rewards/margins": 0.013590503484010696, |
|
"rewards/rejected": 0.007497262209653854, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"logits/chosen": -2.0771450996398926, |
|
"logits/rejected": -2.1549808979034424, |
|
"logps/chosen": -133.3074493408203, |
|
"logps/rejected": -141.6013946533203, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.01747903786599636, |
|
"rewards/margins": 0.017702102661132812, |
|
"rewards/rejected": -0.00022306526079773903, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.75e-05, |
|
"logits/chosen": -2.1276822090148926, |
|
"logits/rejected": -2.078389883041382, |
|
"logps/chosen": -154.18087768554688, |
|
"logps/rejected": -135.57997131347656, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.009335566312074661, |
|
"rewards/margins": -0.019415616989135742, |
|
"rewards/rejected": 0.028751183301210403, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8e-05, |
|
"logits/chosen": -2.0852906703948975, |
|
"logits/rejected": -2.04500675201416, |
|
"logps/chosen": -167.43182373046875, |
|
"logps/rejected": -164.1446533203125, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02079620398581028, |
|
"rewards/margins": -0.0038356767036020756, |
|
"rewards/rejected": 0.024631882086396217, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.85e-05, |
|
"logits/chosen": -1.9053428173065186, |
|
"logits/rejected": -1.8860125541687012, |
|
"logps/chosen": -186.79791259765625, |
|
"logps/rejected": -183.456298828125, |
|
"loss": 0.7025, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.009571265429258347, |
|
"rewards/margins": -0.01751232147216797, |
|
"rewards/rejected": 0.027083586901426315, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9e-05, |
|
"logits/chosen": -1.9760335683822632, |
|
"logits/rejected": -2.062265634536743, |
|
"logps/chosen": -131.81793212890625, |
|
"logps/rejected": -126.02755737304688, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.006855441257357597, |
|
"rewards/margins": 0.009298227727413177, |
|
"rewards/rejected": -0.016153670847415924, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"logits/chosen": -1.9339022636413574, |
|
"logits/rejected": -1.9163322448730469, |
|
"logps/chosen": -154.1663818359375, |
|
"logps/rejected": -155.90472412109375, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.028609374538064003, |
|
"rewards/margins": 0.01638217084109783, |
|
"rewards/rejected": 0.012227201834321022, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -1.9966247081756592, |
|
"logits/rejected": -1.949063777923584, |
|
"logps/chosen": -167.77857971191406, |
|
"logps/rejected": -169.52732849121094, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.09298010170459747, |
|
"rewards/margins": -0.005213452503085136, |
|
"rewards/rejected": 0.09819354861974716, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.05e-05, |
|
"logits/chosen": -2.129456043243408, |
|
"logits/rejected": -2.1342248916625977, |
|
"logps/chosen": -133.3363494873047, |
|
"logps/rejected": -137.60580444335938, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.060414716601371765, |
|
"rewards/margins": 0.00036644982174038887, |
|
"rewards/rejected": 0.060048267245292664, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.1e-05, |
|
"logits/chosen": -2.113492012023926, |
|
"logits/rejected": -2.149386167526245, |
|
"logps/chosen": -158.78790283203125, |
|
"logps/rejected": -151.22894287109375, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.042578794062137604, |
|
"rewards/margins": -0.003336430061608553, |
|
"rewards/rejected": 0.04591522365808487, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.15e-05, |
|
"logits/chosen": -2.14461088180542, |
|
"logits/rejected": -2.127142906188965, |
|
"logps/chosen": -161.65231323242188, |
|
"logps/rejected": -193.1698760986328, |
|
"loss": 0.7047, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.04428081959486008, |
|
"rewards/margins": -0.021442033350467682, |
|
"rewards/rejected": 0.06572284549474716, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"logits/chosen": -1.9628074169158936, |
|
"logits/rejected": -1.9140665531158447, |
|
"logps/chosen": -165.8412628173828, |
|
"logps/rejected": -159.8250274658203, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.01591024361550808, |
|
"rewards/margins": -0.009422186762094498, |
|
"rewards/rejected": -0.006488058716058731, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.25e-05, |
|
"logits/chosen": -2.009124517440796, |
|
"logits/rejected": -2.051636219024658, |
|
"logps/chosen": -157.3372039794922, |
|
"logps/rejected": -168.30613708496094, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.06606419384479523, |
|
"rewards/margins": 0.060197923332452774, |
|
"rewards/rejected": 0.0058662667870521545, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"logits/chosen": -1.9960308074951172, |
|
"logits/rejected": -2.013731002807617, |
|
"logps/chosen": -152.45262145996094, |
|
"logps/rejected": -152.74990844726562, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.041900113224983215, |
|
"rewards/margins": 0.005311486776918173, |
|
"rewards/rejected": 0.036588624119758606, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.35e-05, |
|
"logits/chosen": -2.084881544113159, |
|
"logits/rejected": -2.051604747772217, |
|
"logps/chosen": -147.22457885742188, |
|
"logps/rejected": -130.90673828125, |
|
"loss": 0.7003, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.027727343142032623, |
|
"rewards/margins": -0.011356806382536888, |
|
"rewards/rejected": 0.03908415138721466, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.4e-05, |
|
"logits/chosen": -2.172280788421631, |
|
"logits/rejected": -2.161738872528076, |
|
"logps/chosen": -169.7745819091797, |
|
"logps/rejected": -169.87936401367188, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.026907825842499733, |
|
"rewards/margins": -0.0014351843856275082, |
|
"rewards/rejected": 0.02834300883114338, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.45e-05, |
|
"logits/chosen": -2.127919912338257, |
|
"logits/rejected": -2.0433998107910156, |
|
"logps/chosen": -170.56634521484375, |
|
"logps/rejected": -158.96981811523438, |
|
"loss": 0.7064, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.04663754254579544, |
|
"rewards/margins": -0.023569582030177116, |
|
"rewards/rejected": 0.07020711898803711, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5e-05, |
|
"logits/chosen": -2.0349974632263184, |
|
"logits/rejected": -1.9954936504364014, |
|
"logps/chosen": -198.20065307617188, |
|
"logps/rejected": -184.9116973876953, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.031090188771486282, |
|
"rewards/margins": 0.039133429527282715, |
|
"rewards/rejected": -0.008043241687119007, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"logits/chosen": -2.1735117435455322, |
|
"logits/rejected": -2.1993043422698975, |
|
"logps/chosen": -152.16355895996094, |
|
"logps/rejected": -152.68006896972656, |
|
"loss": 0.7076, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.03591465950012207, |
|
"rewards/margins": -0.024077631533145905, |
|
"rewards/rejected": 0.05999229848384857, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"logits/chosen": -1.999128818511963, |
|
"logits/rejected": -2.059635877609253, |
|
"logps/chosen": -201.5443878173828, |
|
"logps/rejected": -227.005859375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02559967152774334, |
|
"rewards/margins": 0.004609701223671436, |
|
"rewards/rejected": 0.02098996937274933, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"logits/chosen": -2.162651538848877, |
|
"logits/rejected": -2.1881916522979736, |
|
"logps/chosen": -124.34934997558594, |
|
"logps/rejected": -130.11549377441406, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.08756237477064133, |
|
"rewards/margins": 0.012067937292158604, |
|
"rewards/rejected": 0.0754944384098053, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"logits/chosen": -1.9938299655914307, |
|
"logits/rejected": -2.034883499145508, |
|
"logps/chosen": -167.63168334960938, |
|
"logps/rejected": -171.09104919433594, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0470796562731266, |
|
"rewards/margins": 0.00963954720646143, |
|
"rewards/rejected": 0.03744010999798775, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"logits/chosen": -2.0790414810180664, |
|
"logits/rejected": -2.1538212299346924, |
|
"logps/chosen": -155.3251953125, |
|
"logps/rejected": -174.05441284179688, |
|
"loss": 0.701, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.041025254875421524, |
|
"rewards/margins": -0.013429548591375351, |
|
"rewards/rejected": 0.054454803466796875, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"logits/chosen": -2.1940407752990723, |
|
"logits/rejected": -2.2308764457702637, |
|
"logps/chosen": -187.58314514160156, |
|
"logps/rejected": -175.3198699951172, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09865675121545792, |
|
"rewards/margins": 0.06462635844945908, |
|
"rewards/rejected": 0.03403039276599884, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"logits/chosen": -2.0591015815734863, |
|
"logits/rejected": -2.084754705429077, |
|
"logps/chosen": -170.54522705078125, |
|
"logps/rejected": -157.01759338378906, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.030951734632253647, |
|
"rewards/margins": -0.004660461097955704, |
|
"rewards/rejected": 0.03561220318078995, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9e-05, |
|
"logits/chosen": -1.9737329483032227, |
|
"logits/rejected": -1.965449333190918, |
|
"logps/chosen": -149.2818603515625, |
|
"logps/rejected": -140.09564208984375, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.07900600135326385, |
|
"rewards/margins": 0.030826283618807793, |
|
"rewards/rejected": 0.04817971587181091, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.95e-05, |
|
"logits/chosen": -1.9386465549468994, |
|
"logits/rejected": -1.9863896369934082, |
|
"logps/chosen": -148.0929718017578, |
|
"logps/rejected": -152.614990234375, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.06615930050611496, |
|
"rewards/margins": 0.014939931221306324, |
|
"rewards/rejected": 0.05121936649084091, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3e-05, |
|
"logits/chosen": -2.1406030654907227, |
|
"logits/rejected": -2.140972852706909, |
|
"logps/chosen": -164.55569458007812, |
|
"logps/rejected": -160.45802307128906, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.07306110858917236, |
|
"rewards/margins": 0.02018454112112522, |
|
"rewards/rejected": 0.05287656933069229, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.05e-05, |
|
"logits/chosen": -2.1821627616882324, |
|
"logits/rejected": -2.23848819732666, |
|
"logps/chosen": -154.3368682861328, |
|
"logps/rejected": -153.51547241210938, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07242944836616516, |
|
"rewards/margins": 9.055249392986298e-05, |
|
"rewards/rejected": 0.07233888655900955, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.1e-05, |
|
"logits/chosen": -2.1834821701049805, |
|
"logits/rejected": -2.132404327392578, |
|
"logps/chosen": -148.86456298828125, |
|
"logps/rejected": -149.2091522216797, |
|
"loss": 0.7058, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.09177599102258682, |
|
"rewards/margins": -0.021642468869686127, |
|
"rewards/rejected": 0.11341846734285355, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.15e-05, |
|
"logits/chosen": -2.037621259689331, |
|
"logits/rejected": -2.0467352867126465, |
|
"logps/chosen": -151.49147033691406, |
|
"logps/rejected": -165.01246643066406, |
|
"loss": 0.7098, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.1084907054901123, |
|
"rewards/margins": -0.023577161133289337, |
|
"rewards/rejected": 0.13206787407398224, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"logits/chosen": -2.062051773071289, |
|
"logits/rejected": -1.9661951065063477, |
|
"logps/chosen": -139.70193481445312, |
|
"logps/rejected": -155.814697265625, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0038987197913229465, |
|
"rewards/margins": 0.0076565514318645, |
|
"rewards/rejected": -0.0037578358314931393, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"logits/chosen": -2.132296562194824, |
|
"logits/rejected": -2.091379404067993, |
|
"logps/chosen": -148.2380828857422, |
|
"logps/rejected": -155.97186279296875, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.061218734830617905, |
|
"rewards/margins": 0.02413906902074814, |
|
"rewards/rejected": 0.037079669535160065, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.3e-05, |
|
"logits/chosen": -1.8095799684524536, |
|
"logits/rejected": -1.8316328525543213, |
|
"logps/chosen": -168.23243713378906, |
|
"logps/rejected": -163.02633666992188, |
|
"loss": 0.7047, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.09200361371040344, |
|
"rewards/margins": -0.017515014857053757, |
|
"rewards/rejected": 0.1095186173915863, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.35e-05, |
|
"logits/chosen": -1.8896549940109253, |
|
"logits/rejected": -1.9064245223999023, |
|
"logps/chosen": -178.73973083496094, |
|
"logps/rejected": -180.1619873046875, |
|
"loss": 0.7115, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.024672793224453926, |
|
"rewards/margins": -0.03270921856164932, |
|
"rewards/rejected": 0.0573820136487484, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"logits/chosen": -1.7941502332687378, |
|
"logits/rejected": -1.8412476778030396, |
|
"logps/chosen": -134.13589477539062, |
|
"logps/rejected": -135.673828125, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.13328862190246582, |
|
"rewards/margins": 0.10187779366970062, |
|
"rewards/rejected": 0.03141083940863609, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.45e-05, |
|
"logits/chosen": -2.1372315883636475, |
|
"logits/rejected": -2.158930778503418, |
|
"logps/chosen": -142.1901397705078, |
|
"logps/rejected": -140.1024169921875, |
|
"loss": 0.7298, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.04432840272784233, |
|
"rewards/margins": -0.06238814443349838, |
|
"rewards/rejected": 0.10671653598546982, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.5e-05, |
|
"logits/chosen": -2.2218027114868164, |
|
"logits/rejected": -2.2194578647613525, |
|
"logps/chosen": -172.63040161132812, |
|
"logps/rejected": -171.96815490722656, |
|
"loss": 0.7005, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.033330656588077545, |
|
"rewards/margins": -0.012100504711270332, |
|
"rewards/rejected": 0.04543116316199303, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.55e-05, |
|
"logits/chosen": -2.012197971343994, |
|
"logits/rejected": -2.0255186557769775, |
|
"logps/chosen": -217.9222869873047, |
|
"logps/rejected": -226.47398376464844, |
|
"loss": 0.6984, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.058617155998945236, |
|
"rewards/margins": -0.005790230818092823, |
|
"rewards/rejected": 0.06440739333629608, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.6e-05, |
|
"logits/chosen": -2.14078688621521, |
|
"logits/rejected": -2.111306667327881, |
|
"logps/chosen": -160.2899169921875, |
|
"logps/rejected": -154.1366424560547, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.074866384267807, |
|
"rewards/margins": 0.01636476442217827, |
|
"rewards/rejected": 0.05850161984562874, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.65e-05, |
|
"logits/chosen": -1.9476792812347412, |
|
"logits/rejected": -1.9206452369689941, |
|
"logps/chosen": -156.6016845703125, |
|
"logps/rejected": -155.9908905029297, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004470587708055973, |
|
"rewards/margins": 0.024280693382024765, |
|
"rewards/rejected": -0.019810102880001068, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7e-05, |
|
"logits/chosen": -2.07848858833313, |
|
"logits/rejected": -2.1027286052703857, |
|
"logps/chosen": -153.7880096435547, |
|
"logps/rejected": -151.38841247558594, |
|
"loss": 0.7136, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.049511026591062546, |
|
"rewards/margins": -0.0270721185952425, |
|
"rewards/rejected": 0.0765831470489502, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"logits/chosen": -2.0560519695281982, |
|
"logits/rejected": -2.0237274169921875, |
|
"logps/chosen": -168.32672119140625, |
|
"logps/rejected": -166.328125, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01440761424601078, |
|
"rewards/margins": 0.0161895789206028, |
|
"rewards/rejected": -0.0017819646745920181, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.8e-05, |
|
"logits/chosen": -1.9260778427124023, |
|
"logits/rejected": -1.9472447633743286, |
|
"logps/chosen": -145.28759765625, |
|
"logps/rejected": -149.98504638671875, |
|
"loss": 0.701, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.034238480031490326, |
|
"rewards/margins": -0.005065919831395149, |
|
"rewards/rejected": 0.039304401725530624, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.85e-05, |
|
"logits/chosen": -1.6790459156036377, |
|
"logits/rejected": -1.6938380002975464, |
|
"logps/chosen": -169.58250427246094, |
|
"logps/rejected": -171.97097778320312, |
|
"loss": 0.7036, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.12939399480819702, |
|
"rewards/margins": -0.008996442891657352, |
|
"rewards/rejected": 0.1383904367685318, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"logits/chosen": -2.2361156940460205, |
|
"logits/rejected": -2.1365509033203125, |
|
"logps/chosen": -207.76535034179688, |
|
"logps/rejected": -197.6790313720703, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.10514166951179504, |
|
"rewards/margins": 0.03811817243695259, |
|
"rewards/rejected": 0.06702349334955215, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"logits/chosen": -2.0133023262023926, |
|
"logits/rejected": -2.0518369674682617, |
|
"logps/chosen": -143.17681884765625, |
|
"logps/rejected": -136.48175048828125, |
|
"loss": 0.6976, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.05453691631555557, |
|
"rewards/margins": -0.002915834542363882, |
|
"rewards/rejected": 0.05745274946093559, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -2.024761438369751, |
|
"logits/rejected": -2.054973840713501, |
|
"logps/chosen": -153.51858520507812, |
|
"logps/rejected": -142.02432250976562, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.01629617251455784, |
|
"rewards/margins": -0.0009380597621202469, |
|
"rewards/rejected": 0.017234232276678085, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.05e-05, |
|
"logits/chosen": -2.0025839805603027, |
|
"logits/rejected": -2.0151402950286865, |
|
"logps/chosen": -159.1929931640625, |
|
"logps/rejected": -168.1624755859375, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.06722717732191086, |
|
"rewards/margins": 0.03626187518239021, |
|
"rewards/rejected": 0.030965294688940048, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.1e-05, |
|
"logits/chosen": -1.9327080249786377, |
|
"logits/rejected": -1.947874665260315, |
|
"logps/chosen": -215.408935546875, |
|
"logps/rejected": -199.43902587890625, |
|
"loss": 0.7457, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.1100463718175888, |
|
"rewards/margins": -0.09321331977844238, |
|
"rewards/rejected": -0.016833044588565826, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.15e-05, |
|
"logits/chosen": -1.9191949367523193, |
|
"logits/rejected": -1.9307305812835693, |
|
"logps/chosen": -170.3133544921875, |
|
"logps/rejected": -154.84254455566406, |
|
"loss": 0.7279, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03949739784002304, |
|
"rewards/margins": -0.06344561278820038, |
|
"rewards/rejected": 0.023948216810822487, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.2e-05, |
|
"logits/chosen": -1.8360068798065186, |
|
"logits/rejected": -1.8425829410552979, |
|
"logps/chosen": -165.39141845703125, |
|
"logps/rejected": -161.62539672851562, |
|
"loss": 0.7216, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.07253064215183258, |
|
"rewards/margins": -0.03998390585184097, |
|
"rewards/rejected": 0.11251455545425415, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.25e-05, |
|
"logits/chosen": -1.9030508995056152, |
|
"logits/rejected": -1.9330065250396729, |
|
"logps/chosen": -209.78103637695312, |
|
"logps/rejected": -220.98043823242188, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0014497279189527035, |
|
"rewards/margins": 0.13583050668239594, |
|
"rewards/rejected": -0.1343807578086853, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.3e-05, |
|
"logits/chosen": -1.9742060899734497, |
|
"logits/rejected": -1.9866974353790283, |
|
"logps/chosen": -171.7286376953125, |
|
"logps/rejected": -168.8912811279297, |
|
"loss": 0.7484, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.025174710899591446, |
|
"rewards/margins": -0.09477367997169495, |
|
"rewards/rejected": 0.1199483871459961, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.35e-05, |
|
"logits/chosen": -2.1695969104766846, |
|
"logits/rejected": -2.252450704574585, |
|
"logps/chosen": -125.7203598022461, |
|
"logps/rejected": -158.59054565429688, |
|
"loss": 0.7255, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.010992627590894699, |
|
"rewards/margins": -0.052416570484638214, |
|
"rewards/rejected": 0.04142393916845322, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"logits/chosen": -1.9488649368286133, |
|
"logits/rejected": -1.9779549837112427, |
|
"logps/chosen": -182.26705932617188, |
|
"logps/rejected": -173.61148071289062, |
|
"loss": 0.7215, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.07614026218652725, |
|
"rewards/margins": -0.042962007224559784, |
|
"rewards/rejected": 0.11910226941108704, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"logits/chosen": -1.9029545783996582, |
|
"logits/rejected": -1.9423730373382568, |
|
"logps/chosen": -200.17457580566406, |
|
"logps/rejected": -173.20709228515625, |
|
"loss": 0.733, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.07196169346570969, |
|
"rewards/margins": -0.0681467056274414, |
|
"rewards/rejected": 0.1401083916425705, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5e-05, |
|
"logits/chosen": -2.3139305114746094, |
|
"logits/rejected": -2.351571798324585, |
|
"logps/chosen": -146.32611083984375, |
|
"logps/rejected": -156.42579650878906, |
|
"loss": 0.7095, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0665954127907753, |
|
"rewards/margins": -0.01309509202837944, |
|
"rewards/rejected": -0.05350032076239586, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.55e-05, |
|
"logits/chosen": -1.9338881969451904, |
|
"logits/rejected": -1.9347970485687256, |
|
"logps/chosen": -143.15924072265625, |
|
"logps/rejected": -152.71804809570312, |
|
"loss": 0.7129, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.010173199698328972, |
|
"rewards/margins": -0.03304898738861084, |
|
"rewards/rejected": 0.022875778377056122, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.600000000000001e-05, |
|
"logits/chosen": -1.7374138832092285, |
|
"logits/rejected": -1.7747021913528442, |
|
"logps/chosen": -187.83636474609375, |
|
"logps/rejected": -160.97560119628906, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06837701797485352, |
|
"rewards/margins": 0.042838774621486664, |
|
"rewards/rejected": 0.02553824707865715, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"logits/chosen": -2.21986722946167, |
|
"logits/rejected": -2.2679433822631836, |
|
"logps/chosen": -147.17849731445312, |
|
"logps/rejected": -152.95431518554688, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08736428618431091, |
|
"rewards/margins": 0.10906204581260681, |
|
"rewards/rejected": -0.021697763353586197, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7e-05, |
|
"logits/chosen": -2.187103271484375, |
|
"logits/rejected": -2.239633083343506, |
|
"logps/chosen": -124.42755126953125, |
|
"logps/rejected": -125.82307434082031, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03520822525024414, |
|
"rewards/margins": 0.14293043315410614, |
|
"rewards/rejected": -0.1077222228050232, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.75e-05, |
|
"logits/chosen": -2.1484622955322266, |
|
"logits/rejected": -2.167619228363037, |
|
"logps/chosen": -139.31021118164062, |
|
"logps/rejected": -148.8551483154297, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.11308951675891876, |
|
"rewards/margins": 0.10877098888158798, |
|
"rewards/rejected": 0.004318520426750183, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8e-05, |
|
"logits/chosen": -2.1004369258880615, |
|
"logits/rejected": -2.0368094444274902, |
|
"logps/chosen": -142.34259033203125, |
|
"logps/rejected": -131.41265869140625, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06739182770252228, |
|
"rewards/margins": 0.0982145369052887, |
|
"rewards/rejected": -0.03082270734012127, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.85e-05, |
|
"logits/chosen": -2.269822597503662, |
|
"logits/rejected": -2.3615293502807617, |
|
"logps/chosen": -159.7416534423828, |
|
"logps/rejected": -158.27255249023438, |
|
"loss": 0.7067, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.042380258440971375, |
|
"rewards/margins": -0.016107436269521713, |
|
"rewards/rejected": -0.02627282217144966, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9e-05, |
|
"logits/chosen": -2.1764469146728516, |
|
"logits/rejected": -2.1454200744628906, |
|
"logps/chosen": -142.2655029296875, |
|
"logps/rejected": -153.2463836669922, |
|
"loss": 0.7423, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0025094300508499146, |
|
"rewards/margins": -0.08575483411550522, |
|
"rewards/rejected": 0.08324538916349411, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"logits/chosen": -2.193749189376831, |
|
"logits/rejected": -2.1985344886779785, |
|
"logps/chosen": -187.168701171875, |
|
"logps/rejected": -197.92352294921875, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06072089821100235, |
|
"rewards/margins": 0.07174615561962128, |
|
"rewards/rejected": -0.011025259271264076, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"logits/chosen": -2.068103551864624, |
|
"logits/rejected": -2.0740597248077393, |
|
"logps/chosen": -144.1133270263672, |
|
"logps/rejected": -148.28744506835938, |
|
"loss": 0.7095, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04808714985847473, |
|
"rewards/margins": -0.004095175303518772, |
|
"rewards/rejected": 0.05218231678009033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.999997432392803e-05, |
|
"logits/chosen": -1.8877215385437012, |
|
"logits/rejected": -1.8843252658843994, |
|
"logps/chosen": -212.08737182617188, |
|
"logps/rejected": -159.77037048339844, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05051098391413689, |
|
"rewards/margins": 0.11454086750745773, |
|
"rewards/rejected": -0.06402988731861115, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9999897295764844e-05, |
|
"logits/chosen": -2.040144205093384, |
|
"logits/rejected": -2.0494396686553955, |
|
"logps/chosen": -160.35443115234375, |
|
"logps/rejected": -169.1414794921875, |
|
"loss": 0.8033, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.07908819615840912, |
|
"rewards/margins": -0.19221995770931244, |
|
"rewards/rejected": 0.11313176155090332, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9999768915668665e-05, |
|
"logits/chosen": -2.148070812225342, |
|
"logits/rejected": -2.1513681411743164, |
|
"logps/chosen": -145.71995544433594, |
|
"logps/rejected": -140.0707550048828, |
|
"loss": 0.7564, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.12240414321422577, |
|
"rewards/margins": -0.10564970970153809, |
|
"rewards/rejected": -0.01675444096326828, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999958918390321e-05, |
|
"logits/chosen": -2.0417237281799316, |
|
"logits/rejected": -1.9771349430084229, |
|
"logps/chosen": -146.751708984375, |
|
"logps/rejected": -139.89523315429688, |
|
"loss": 0.7258, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.053674325346946716, |
|
"rewards/margins": -0.05639982223510742, |
|
"rewards/rejected": 0.0027255089953541756, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999935810083766e-05, |
|
"logits/chosen": -2.003383159637451, |
|
"logits/rejected": -1.9574511051177979, |
|
"logps/chosen": -173.7794647216797, |
|
"logps/rejected": -171.2220916748047, |
|
"loss": 0.7666, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.051795199513435364, |
|
"rewards/margins": -0.1156022921204567, |
|
"rewards/rejected": 0.06380710750818253, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999907566694667e-05, |
|
"logits/chosen": -2.0609560012817383, |
|
"logits/rejected": -2.101062536239624, |
|
"logps/chosen": -143.58428955078125, |
|
"logps/rejected": -162.1249237060547, |
|
"loss": 0.7173, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.04189272224903107, |
|
"rewards/margins": -0.02848353236913681, |
|
"rewards/rejected": -0.013409186154603958, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9998741882810384e-05, |
|
"logits/chosen": -1.862243890762329, |
|
"logits/rejected": -1.8240137100219727, |
|
"logps/chosen": -177.3428192138672, |
|
"logps/rejected": -179.76573181152344, |
|
"loss": 0.7098, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03088255040347576, |
|
"rewards/margins": -0.002100745216012001, |
|
"rewards/rejected": -0.028781799599528313, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999835674911443e-05, |
|
"logits/chosen": -1.7740706205368042, |
|
"logits/rejected": -1.810904860496521, |
|
"logps/chosen": -203.03497314453125, |
|
"logps/rejected": -209.43789672851562, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06684975326061249, |
|
"rewards/margins": 0.08228196948766708, |
|
"rewards/rejected": -0.015432218089699745, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999792026664991e-05, |
|
"logits/chosen": -1.946571707725525, |
|
"logits/rejected": -1.9524767398834229, |
|
"logps/chosen": -160.1234130859375, |
|
"logps/rejected": -150.89892578125, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01813810132443905, |
|
"rewards/margins": 0.042000893503427505, |
|
"rewards/rejected": -0.06013898551464081, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9997432436313384e-05, |
|
"logits/chosen": -2.24985933303833, |
|
"logits/rejected": -2.2947096824645996, |
|
"logps/chosen": -147.19674682617188, |
|
"logps/rejected": -147.4495086669922, |
|
"loss": 0.7232, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14067719876766205, |
|
"rewards/margins": -0.044903866946697235, |
|
"rewards/rejected": -0.09577332437038422, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.99968932591069e-05, |
|
"logits/chosen": -2.289724826812744, |
|
"logits/rejected": -2.1994781494140625, |
|
"logps/chosen": -170.31321716308594, |
|
"logps/rejected": -153.826904296875, |
|
"loss": 0.7918, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.1584378331899643, |
|
"rewards/margins": -0.16181893646717072, |
|
"rewards/rejected": 0.0033811070024967194, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999630273613799e-05, |
|
"logits/chosen": -1.6163581609725952, |
|
"logits/rejected": -1.6122593879699707, |
|
"logps/chosen": -168.62997436523438, |
|
"logps/rejected": -222.83038330078125, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.057224374264478683, |
|
"rewards/margins": 0.09167467057704926, |
|
"rewards/rejected": -0.03445029631257057, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999566086861961e-05, |
|
"logits/chosen": -2.1440043449401855, |
|
"logits/rejected": -2.1370463371276855, |
|
"logps/chosen": -133.85556030273438, |
|
"logps/rejected": -146.81748962402344, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06403251737356186, |
|
"rewards/margins": -0.009389929473400116, |
|
"rewards/rejected": -0.054642580449581146, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999496765787024e-05, |
|
"logits/chosen": -2.1842727661132812, |
|
"logits/rejected": -2.2154009342193604, |
|
"logps/chosen": -151.34469604492188, |
|
"logps/rejected": -153.56678771972656, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.009856656193733215, |
|
"rewards/margins": 0.15723924338817596, |
|
"rewards/rejected": -0.14738260209560394, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9994223105313774e-05, |
|
"logits/chosen": -2.212733745574951, |
|
"logits/rejected": -2.2484166622161865, |
|
"logps/chosen": -129.7595672607422, |
|
"logps/rejected": -129.0322265625, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0931883379817009, |
|
"rewards/margins": 0.12833790481090546, |
|
"rewards/rejected": -0.035149574279785156, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9993427212479606e-05, |
|
"logits/chosen": -2.1278915405273438, |
|
"logits/rejected": -2.1659746170043945, |
|
"logps/chosen": -166.4802703857422, |
|
"logps/rejected": -177.78651428222656, |
|
"loss": 0.7355, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13353124260902405, |
|
"rewards/margins": -0.06453972309827805, |
|
"rewards/rejected": -0.068991519510746, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999257998100254e-05, |
|
"logits/chosen": -2.3552539348602295, |
|
"logits/rejected": -2.4131429195404053, |
|
"logps/chosen": -167.33251953125, |
|
"logps/rejected": -171.97781372070312, |
|
"loss": 0.7883, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13030581176280975, |
|
"rewards/margins": -0.1426038295030594, |
|
"rewards/rejected": 0.012298017740249634, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.999168141262289e-05, |
|
"logits/chosen": -2.211704969406128, |
|
"logits/rejected": -2.204983711242676, |
|
"logps/chosen": -206.1199188232422, |
|
"logps/rejected": -197.96646118164062, |
|
"loss": 0.7095, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04005058482289314, |
|
"rewards/margins": -0.022744348272681236, |
|
"rewards/rejected": -0.017306234687566757, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9990731509186376e-05, |
|
"logits/chosen": -2.1870293617248535, |
|
"logits/rejected": -2.1281723976135254, |
|
"logps/chosen": -155.93276977539062, |
|
"logps/rejected": -145.3558807373047, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005868477746844292, |
|
"rewards/margins": 0.055788375437259674, |
|
"rewards/rejected": -0.049919892102479935, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.998973027264419e-05, |
|
"logits/chosen": -1.9813153743743896, |
|
"logits/rejected": -1.9183677434921265, |
|
"logps/chosen": -150.78817749023438, |
|
"logps/rejected": -147.15357971191406, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.17385157942771912, |
|
"rewards/margins": -0.001015951856970787, |
|
"rewards/rejected": -0.1728356033563614, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.998867770505295e-05, |
|
"logits/chosen": -2.391598701477051, |
|
"logits/rejected": -2.3418595790863037, |
|
"logps/chosen": -167.2589111328125, |
|
"logps/rejected": -178.03309631347656, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0277772955596447, |
|
"rewards/margins": 0.0589999184012413, |
|
"rewards/rejected": -0.0867772102355957, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9987573808574726e-05, |
|
"logits/chosen": -1.9197032451629639, |
|
"logits/rejected": -2.0230495929718018, |
|
"logps/chosen": -114.3215560913086, |
|
"logps/rejected": -125.48574829101562, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02883918769657612, |
|
"rewards/margins": 0.04838750883936882, |
|
"rewards/rejected": -0.0772266834974289, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9986418585477016e-05, |
|
"logits/chosen": -2.1796233654022217, |
|
"logits/rejected": -2.1268649101257324, |
|
"logps/chosen": -144.93719482421875, |
|
"logps/rejected": -125.82449340820312, |
|
"loss": 0.7403, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0862671434879303, |
|
"rewards/margins": -0.07632093131542206, |
|
"rewards/rejected": -0.009946208447217941, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.998521203813274e-05, |
|
"logits/chosen": -1.9971880912780762, |
|
"logits/rejected": -2.0406086444854736, |
|
"logps/chosen": -154.99465942382812, |
|
"logps/rejected": -154.3177947998047, |
|
"loss": 0.753, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1506948173046112, |
|
"rewards/margins": -0.05945264920592308, |
|
"rewards/rejected": -0.09124217927455902, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9983954169020256e-05, |
|
"logits/chosen": -2.0880544185638428, |
|
"logits/rejected": -2.123401165008545, |
|
"logps/chosen": -169.77288818359375, |
|
"logps/rejected": -179.76673889160156, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16007837653160095, |
|
"rewards/margins": 0.12425337731838226, |
|
"rewards/rejected": -0.284331738948822, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9982644980723334e-05, |
|
"logits/chosen": -2.232131242752075, |
|
"logits/rejected": -2.2934720516204834, |
|
"logps/chosen": -159.07867431640625, |
|
"logps/rejected": -156.19357299804688, |
|
"loss": 0.7076, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1424141526222229, |
|
"rewards/margins": 0.0020025279372930527, |
|
"rewards/rejected": -0.1444166749715805, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.998128447593117e-05, |
|
"logits/chosen": -2.07320499420166, |
|
"logits/rejected": -2.0934879779815674, |
|
"logps/chosen": -173.55859375, |
|
"logps/rejected": -171.46405029296875, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.15888014435768127, |
|
"rewards/margins": 0.14077186584472656, |
|
"rewards/rejected": -0.29965201020240784, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997987265743834e-05, |
|
"logits/chosen": -2.186350107192993, |
|
"logits/rejected": -2.153662919998169, |
|
"logps/chosen": -137.228271484375, |
|
"logps/rejected": -150.18634033203125, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.06281783431768417, |
|
"rewards/margins": 0.16370470821857452, |
|
"rewards/rejected": -0.2265225499868393, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997840952814484e-05, |
|
"logits/chosen": -2.0112736225128174, |
|
"logits/rejected": -2.0177900791168213, |
|
"logps/chosen": -194.06292724609375, |
|
"logps/rejected": -184.00973510742188, |
|
"loss": 0.7179, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.24062253534793854, |
|
"rewards/margins": -0.023746546357870102, |
|
"rewards/rejected": -0.21687600016593933, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9976895091056075e-05, |
|
"logits/chosen": -2.066251277923584, |
|
"logits/rejected": -2.070246458053589, |
|
"logps/chosen": -136.72486877441406, |
|
"logps/rejected": -135.44017028808594, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02474498748779297, |
|
"rewards/margins": 0.1634208709001541, |
|
"rewards/rejected": -0.13867586851119995, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9975329349282826e-05, |
|
"logits/chosen": -1.9598355293273926, |
|
"logits/rejected": -2.0056591033935547, |
|
"logps/chosen": -123.45001220703125, |
|
"logps/rejected": -134.2187042236328, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.014127110131084919, |
|
"rewards/margins": 0.1591949462890625, |
|
"rewards/rejected": -0.17332205176353455, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9973712306041256e-05, |
|
"logits/chosen": -2.207669734954834, |
|
"logits/rejected": -2.261643886566162, |
|
"logps/chosen": -143.61402893066406, |
|
"logps/rejected": -150.94285583496094, |
|
"loss": 0.7032, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08026294410228729, |
|
"rewards/margins": -0.0010632979683578014, |
|
"rewards/rejected": -0.0791996419429779, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997204396465292e-05, |
|
"logits/chosen": -2.2536189556121826, |
|
"logits/rejected": -2.234713554382324, |
|
"logps/chosen": -166.07725524902344, |
|
"logps/rejected": -153.78976440429688, |
|
"loss": 0.7129, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.194389209151268, |
|
"rewards/margins": -0.020291997119784355, |
|
"rewards/rejected": -0.1740972250699997, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.997032432854472e-05, |
|
"logits/chosen": -2.1735360622406006, |
|
"logits/rejected": -2.2178611755371094, |
|
"logps/chosen": -196.8380126953125, |
|
"logps/rejected": -206.2840118408203, |
|
"loss": 0.7158, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.007880209013819695, |
|
"rewards/margins": -0.0179959274828434, |
|
"rewards/rejected": 0.010115718469023705, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996855340124894e-05, |
|
"logits/chosen": -2.069317579269409, |
|
"logits/rejected": -2.0932023525238037, |
|
"logps/chosen": -135.76324462890625, |
|
"logps/rejected": -132.4375, |
|
"loss": 0.7887, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.13397419452667236, |
|
"rewards/margins": -0.15053679049015045, |
|
"rewards/rejected": 0.016562584787607193, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996673118640323e-05, |
|
"logits/chosen": -2.2751736640930176, |
|
"logits/rejected": -2.262275218963623, |
|
"logps/chosen": -144.22671508789062, |
|
"logps/rejected": -146.7581787109375, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.08663605898618698, |
|
"rewards/margins": 0.018158910796046257, |
|
"rewards/rejected": -0.10479498654603958, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996485768775055e-05, |
|
"logits/chosen": -2.041444778442383, |
|
"logits/rejected": -2.091449022293091, |
|
"logps/chosen": -143.69158935546875, |
|
"logps/rejected": -157.0668182373047, |
|
"loss": 0.7353, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.209483340382576, |
|
"rewards/margins": -0.06017661839723587, |
|
"rewards/rejected": -0.14930672943592072, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.996293290913926e-05, |
|
"logits/chosen": -1.9326127767562866, |
|
"logits/rejected": -2.0368032455444336, |
|
"logps/chosen": -147.84547424316406, |
|
"logps/rejected": -203.94833374023438, |
|
"loss": 0.7451, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20097234845161438, |
|
"rewards/margins": -0.03583552688360214, |
|
"rewards/rejected": -0.16513679921627045, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9960956854522986e-05, |
|
"logits/chosen": -2.267336368560791, |
|
"logits/rejected": -2.2412469387054443, |
|
"logps/chosen": -134.1835174560547, |
|
"logps/rejected": -136.10162353515625, |
|
"loss": 0.7371, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.18377897143363953, |
|
"rewards/margins": -0.07501955330371857, |
|
"rewards/rejected": -0.10875942558050156, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.995892952796074e-05, |
|
"logits/chosen": -2.3473498821258545, |
|
"logits/rejected": -2.2889528274536133, |
|
"logps/chosen": -179.09548950195312, |
|
"logps/rejected": -176.6771697998047, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.167083278298378, |
|
"rewards/margins": 0.044769808650016785, |
|
"rewards/rejected": -0.21185307204723358, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.995685093361682e-05, |
|
"logits/chosen": -2.046065330505371, |
|
"logits/rejected": -2.105637311935425, |
|
"logps/chosen": -156.4433135986328, |
|
"logps/rejected": -193.69764709472656, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10867753624916077, |
|
"rewards/margins": 0.12942247092723846, |
|
"rewards/rejected": -0.23810002207756042, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9954721075760824e-05, |
|
"logits/chosen": -2.135222911834717, |
|
"logits/rejected": -2.0551419258117676, |
|
"logps/chosen": -160.54461669921875, |
|
"logps/rejected": -154.21876525878906, |
|
"loss": 0.7472, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3932434618473053, |
|
"rewards/margins": -0.07500467449426651, |
|
"rewards/rejected": -0.318238765001297, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.995253995876767e-05, |
|
"logits/chosen": -2.36671781539917, |
|
"logits/rejected": -2.3601675033569336, |
|
"logps/chosen": -161.03819274902344, |
|
"logps/rejected": -143.7089385986328, |
|
"loss": 0.811, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.37680506706237793, |
|
"rewards/margins": -0.192378431558609, |
|
"rewards/rejected": -0.18442663550376892, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.995030758711756e-05, |
|
"logits/chosen": -1.7832953929901123, |
|
"logits/rejected": -1.7478176355361938, |
|
"logps/chosen": -143.82127380371094, |
|
"logps/rejected": -146.1196746826172, |
|
"loss": 0.7393, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.32953861355781555, |
|
"rewards/margins": -0.02949490211904049, |
|
"rewards/rejected": -0.3000437021255493, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.994802396539598e-05, |
|
"logits/chosen": -1.5314689874649048, |
|
"logits/rejected": -1.5684059858322144, |
|
"logps/chosen": -239.08958435058594, |
|
"logps/rejected": -253.18838500976562, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31847676634788513, |
|
"rewards/margins": 0.07425765693187714, |
|
"rewards/rejected": -0.3927344083786011, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.994568909829368e-05, |
|
"logits/chosen": -2.0874621868133545, |
|
"logits/rejected": -2.074917793273926, |
|
"logps/chosen": -161.60157775878906, |
|
"logps/rejected": -152.967529296875, |
|
"loss": 0.7313, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.36861109733581543, |
|
"rewards/margins": -0.05294780433177948, |
|
"rewards/rejected": -0.31566327810287476, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9943302990606684e-05, |
|
"logits/chosen": -1.996845006942749, |
|
"logits/rejected": -1.9692479372024536, |
|
"logps/chosen": -157.77352905273438, |
|
"logps/rejected": -132.0946044921875, |
|
"loss": 0.8205, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.32108697295188904, |
|
"rewards/margins": -0.1798648089170456, |
|
"rewards/rejected": -0.14122214913368225, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.994086564723626e-05, |
|
"logits/chosen": -2.1677422523498535, |
|
"logits/rejected": -2.18229341506958, |
|
"logps/chosen": -160.99119567871094, |
|
"logps/rejected": -180.38742065429688, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5339494943618774, |
|
"rewards/margins": 0.05808330327272415, |
|
"rewards/rejected": -0.592032790184021, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9938377073188905e-05, |
|
"logits/chosen": -2.201998233795166, |
|
"logits/rejected": -2.1696887016296387, |
|
"logps/chosen": -155.02134704589844, |
|
"logps/rejected": -140.8427734375, |
|
"loss": 0.7243, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.29963669180870056, |
|
"rewards/margins": -0.020382262766361237, |
|
"rewards/rejected": -0.2792544364929199, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.993583727357638e-05, |
|
"logits/chosen": -2.0553178787231445, |
|
"logits/rejected": -2.0267903804779053, |
|
"logps/chosen": -148.95738220214844, |
|
"logps/rejected": -158.5399932861328, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2186848223209381, |
|
"rewards/margins": 0.20554782450199127, |
|
"rewards/rejected": -0.4242326319217682, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.993324625361565e-05, |
|
"logits/chosen": -2.2440099716186523, |
|
"logits/rejected": -2.1926894187927246, |
|
"logps/chosen": -185.78314208984375, |
|
"logps/rejected": -184.54876708984375, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.2549251317977905, |
|
"rewards/margins": 0.24536924064159393, |
|
"rewards/rejected": -0.5002943873405457, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.993060401862888e-05, |
|
"logits/chosen": -1.9377892017364502, |
|
"logits/rejected": -1.967207908630371, |
|
"logps/chosen": -145.74510192871094, |
|
"logps/rejected": -142.37782287597656, |
|
"loss": 0.7606, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3630293905735016, |
|
"rewards/margins": -0.08116517215967178, |
|
"rewards/rejected": -0.281864196062088, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9927910574043465e-05, |
|
"logits/chosen": -2.01324462890625, |
|
"logits/rejected": -2.029172658920288, |
|
"logps/chosen": -123.41838836669922, |
|
"logps/rejected": -119.00106048583984, |
|
"loss": 0.7249, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.20013180375099182, |
|
"rewards/margins": -0.028365857899188995, |
|
"rewards/rejected": -0.17176595330238342, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.992516592539196e-05, |
|
"logits/chosen": -1.8588542938232422, |
|
"logits/rejected": -1.865120530128479, |
|
"logps/chosen": -193.2555389404297, |
|
"logps/rejected": -200.77032470703125, |
|
"loss": 0.7573, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.4952069818973541, |
|
"rewards/margins": -0.08547386527061462, |
|
"rewards/rejected": -0.4097330868244171, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9922370078312105e-05, |
|
"logits/chosen": -2.0865638256073, |
|
"logits/rejected": -2.0685646533966064, |
|
"logps/chosen": -156.38755798339844, |
|
"logps/rejected": -151.69525146484375, |
|
"loss": 0.7112, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.34599804878234863, |
|
"rewards/margins": -0.008161775767803192, |
|
"rewards/rejected": -0.33783626556396484, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.991952303854682e-05, |
|
"logits/chosen": -2.1627612113952637, |
|
"logits/rejected": -2.19240665435791, |
|
"logps/chosen": -149.98419189453125, |
|
"logps/rejected": -151.00982666015625, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3006647229194641, |
|
"rewards/margins": 0.12499865889549255, |
|
"rewards/rejected": -0.42566338181495667, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9916624811944175e-05, |
|
"logits/chosen": -1.9495761394500732, |
|
"logits/rejected": -2.0748660564422607, |
|
"logps/chosen": -159.08567810058594, |
|
"logps/rejected": -180.21310424804688, |
|
"loss": 0.707, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.39853742718696594, |
|
"rewards/margins": 0.021911904215812683, |
|
"rewards/rejected": -0.42044931650161743, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.991367540445735e-05, |
|
"logits/chosen": -2.0296339988708496, |
|
"logits/rejected": -1.9863965511322021, |
|
"logps/chosen": -164.78256225585938, |
|
"logps/rejected": -165.06405639648438, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22124487161636353, |
|
"rewards/margins": 0.14064420759677887, |
|
"rewards/rejected": -0.3618890643119812, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.991067482214471e-05, |
|
"logits/chosen": -1.9162077903747559, |
|
"logits/rejected": -1.9429746866226196, |
|
"logps/chosen": -177.34408569335938, |
|
"logps/rejected": -191.58206176757812, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5006837844848633, |
|
"rewards/margins": 0.1467215120792389, |
|
"rewards/rejected": -0.6474053263664246, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9907623071169686e-05, |
|
"logits/chosen": -1.9484643936157227, |
|
"logits/rejected": -1.9940898418426514, |
|
"logps/chosen": -135.9336700439453, |
|
"logps/rejected": -147.29998779296875, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2519877552986145, |
|
"rewards/margins": 0.03291063383221626, |
|
"rewards/rejected": -0.28489840030670166, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.990452015780085e-05, |
|
"logits/chosen": -1.9855961799621582, |
|
"logits/rejected": -2.01076340675354, |
|
"logps/chosen": -140.24473571777344, |
|
"logps/rejected": -147.60067749023438, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.20326684415340424, |
|
"rewards/margins": 0.11952166259288788, |
|
"rewards/rejected": -0.3227885365486145, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9901366088411846e-05, |
|
"logits/chosen": -1.8572781085968018, |
|
"logits/rejected": -1.886858582496643, |
|
"logps/chosen": -159.31396484375, |
|
"logps/rejected": -150.73434448242188, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3573068380355835, |
|
"rewards/margins": 0.08972961455583572, |
|
"rewards/rejected": -0.447036474943161, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.98981608694814e-05, |
|
"logits/chosen": -2.1316683292388916, |
|
"logits/rejected": -2.108705759048462, |
|
"logps/chosen": -166.07952880859375, |
|
"logps/rejected": -165.3894805908203, |
|
"loss": 0.7855, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.4258570075035095, |
|
"rewards/margins": -0.1352803260087967, |
|
"rewards/rejected": -0.29057663679122925, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9894904507593316e-05, |
|
"logits/chosen": -2.1435980796813965, |
|
"logits/rejected": -2.0952234268188477, |
|
"logps/chosen": -179.2945098876953, |
|
"logps/rejected": -177.13780212402344, |
|
"loss": 0.8496, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6829274892807007, |
|
"rewards/margins": -0.2457571029663086, |
|
"rewards/rejected": -0.4371703267097473, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.989159700943643e-05, |
|
"logits/chosen": -2.087015151977539, |
|
"logits/rejected": -2.0333518981933594, |
|
"logps/chosen": -156.10824584960938, |
|
"logps/rejected": -135.26223754882812, |
|
"loss": 0.7424, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5498830676078796, |
|
"rewards/margins": -0.06063465029001236, |
|
"rewards/rejected": -0.4892484247684479, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.988823838180464e-05, |
|
"logits/chosen": -2.076122999191284, |
|
"logits/rejected": -1.985759973526001, |
|
"logps/chosen": -157.4197540283203, |
|
"logps/rejected": -160.52862548828125, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21956738829612732, |
|
"rewards/margins": 0.1264350414276123, |
|
"rewards/rejected": -0.3460024297237396, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.988482863159684e-05, |
|
"logits/chosen": -1.8897199630737305, |
|
"logits/rejected": -1.9176274538040161, |
|
"logps/chosen": -185.94778442382812, |
|
"logps/rejected": -197.8125, |
|
"loss": 0.7095, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.23862342536449432, |
|
"rewards/margins": 0.02010211907327175, |
|
"rewards/rejected": -0.2587255537509918, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.988136776581696e-05, |
|
"logits/chosen": -2.0815958976745605, |
|
"logits/rejected": -2.0887794494628906, |
|
"logps/chosen": -173.06820678710938, |
|
"logps/rejected": -177.52330017089844, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.41561299562454224, |
|
"rewards/margins": 0.10110354423522949, |
|
"rewards/rejected": -0.5167165994644165, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9877855791573915e-05, |
|
"logits/chosen": -1.8423527479171753, |
|
"logits/rejected": -1.9063888788223267, |
|
"logps/chosen": -123.87655639648438, |
|
"logps/rejected": -143.618408203125, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08650312572717667, |
|
"rewards/margins": 0.08998794108629227, |
|
"rewards/rejected": -0.17649102210998535, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9874292716081595e-05, |
|
"logits/chosen": -1.977996587753296, |
|
"logits/rejected": -2.031097650527954, |
|
"logps/chosen": -147.41297912597656, |
|
"logps/rejected": -145.47198486328125, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14451590180397034, |
|
"rewards/margins": 0.10963588207960129, |
|
"rewards/rejected": -0.2541517913341522, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9870678546658865e-05, |
|
"logits/chosen": -1.904013752937317, |
|
"logits/rejected": -1.9502118825912476, |
|
"logps/chosen": -149.9442596435547, |
|
"logps/rejected": -156.0302734375, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.31452393531799316, |
|
"rewards/margins": 0.06542593240737915, |
|
"rewards/rejected": -0.3799498677253723, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9867013290729535e-05, |
|
"logits/chosen": -1.8673038482666016, |
|
"logits/rejected": -1.8840340375900269, |
|
"logps/chosen": -136.29080200195312, |
|
"logps/rejected": -140.873046875, |
|
"loss": 0.7688, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3597078323364258, |
|
"rewards/margins": -0.09261485189199448, |
|
"rewards/rejected": -0.2670930027961731, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.986329695582237e-05, |
|
"logits/chosen": -1.965585708618164, |
|
"logits/rejected": -1.9712939262390137, |
|
"logps/chosen": -170.4312286376953, |
|
"logps/rejected": -177.6762237548828, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2586846947669983, |
|
"rewards/margins": 0.04440504312515259, |
|
"rewards/rejected": -0.3030897378921509, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.985952954957103e-05, |
|
"logits/chosen": -1.9653695821762085, |
|
"logits/rejected": -1.9432342052459717, |
|
"logps/chosen": -145.86233520507812, |
|
"logps/rejected": -127.70154571533203, |
|
"loss": 0.7328, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.14063119888305664, |
|
"rewards/margins": -0.0184502974152565, |
|
"rewards/rejected": -0.12218090891838074, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.985571107971408e-05, |
|
"logits/chosen": -2.048206090927124, |
|
"logits/rejected": -2.040398359298706, |
|
"logps/chosen": -150.42953491210938, |
|
"logps/rejected": -161.55064392089844, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.23834238946437836, |
|
"rewards/margins": 0.059960223734378815, |
|
"rewards/rejected": -0.29830265045166016, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9851841554095e-05, |
|
"logits/chosen": -2.0315334796905518, |
|
"logits/rejected": -2.008814811706543, |
|
"logps/chosen": -171.3214569091797, |
|
"logps/rejected": -145.38833618164062, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2100733369588852, |
|
"rewards/margins": 0.09207681566476822, |
|
"rewards/rejected": -0.3021501302719116, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9847920980662134e-05, |
|
"logits/chosen": -2.062809467315674, |
|
"logits/rejected": -2.053818941116333, |
|
"logps/chosen": -139.9019775390625, |
|
"logps/rejected": -137.94232177734375, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2767728269100189, |
|
"rewards/margins": 0.011704735457897186, |
|
"rewards/rejected": -0.2884775400161743, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.984394936746865e-05, |
|
"logits/chosen": -2.2114696502685547, |
|
"logits/rejected": -2.3026535511016846, |
|
"logps/chosen": -155.966552734375, |
|
"logps/rejected": -169.798583984375, |
|
"loss": 0.786, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.33644577860832214, |
|
"rewards/margins": -0.15074411034584045, |
|
"rewards/rejected": -0.1857016235589981, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.98399267226726e-05, |
|
"logits/chosen": -2.354398727416992, |
|
"logits/rejected": -2.3184590339660645, |
|
"logps/chosen": -175.42050170898438, |
|
"logps/rejected": -143.98519897460938, |
|
"loss": 0.7219, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3392341136932373, |
|
"rewards/margins": -0.028377681970596313, |
|
"rewards/rejected": -0.3108564615249634, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9835853054536846e-05, |
|
"logits/chosen": -1.9018394947052002, |
|
"logits/rejected": -1.85094153881073, |
|
"logps/chosen": -199.07778930664062, |
|
"logps/rejected": -232.95819091796875, |
|
"loss": 0.7405, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3375914692878723, |
|
"rewards/margins": -0.021947531029582024, |
|
"rewards/rejected": -0.31564390659332275, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9831728371429046e-05, |
|
"logits/chosen": -2.243464231491089, |
|
"logits/rejected": -2.2446773052215576, |
|
"logps/chosen": -133.00210571289062, |
|
"logps/rejected": -138.46485900878906, |
|
"loss": 0.7021, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.23482827842235565, |
|
"rewards/margins": 0.007431086152791977, |
|
"rewards/rejected": -0.24225935339927673, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.982755268182164e-05, |
|
"logits/chosen": -2.26414155960083, |
|
"logits/rejected": -2.140368700027466, |
|
"logps/chosen": -149.4698028564453, |
|
"logps/rejected": -123.35488891601562, |
|
"loss": 0.8467, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3118973970413208, |
|
"rewards/margins": -0.22676469385623932, |
|
"rewards/rejected": -0.0851326733827591, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.982332599429187e-05, |
|
"logits/chosen": -2.044692039489746, |
|
"logits/rejected": -1.9895075559616089, |
|
"logps/chosen": -143.49749755859375, |
|
"logps/rejected": -170.93624877929688, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.05278599262237549, |
|
"rewards/margins": 0.0477883517742157, |
|
"rewards/rejected": -0.10057434439659119, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.981904831752171e-05, |
|
"logits/chosen": -2.3891890048980713, |
|
"logits/rejected": -2.4430036544799805, |
|
"logps/chosen": -135.30807495117188, |
|
"logps/rejected": -145.2589874267578, |
|
"loss": 0.7079, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.05791349709033966, |
|
"rewards/margins": 0.005677835550159216, |
|
"rewards/rejected": -0.06359133124351501, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.981471966029787e-05, |
|
"logits/chosen": -2.2154502868652344, |
|
"logits/rejected": -2.205904006958008, |
|
"logps/chosen": -185.5220947265625, |
|
"logps/rejected": -211.70501708984375, |
|
"loss": 0.7164, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.21913209557533264, |
|
"rewards/margins": 0.028864163905382156, |
|
"rewards/rejected": -0.24799621105194092, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.981034003151178e-05, |
|
"logits/chosen": -2.2102999687194824, |
|
"logits/rejected": -2.261056900024414, |
|
"logps/chosen": -200.81346130371094, |
|
"logps/rejected": -215.66226196289062, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.18210916221141815, |
|
"rewards/margins": 0.13367104530334473, |
|
"rewards/rejected": -0.3157802224159241, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.980590944015958e-05, |
|
"logits/chosen": -2.360344648361206, |
|
"logits/rejected": -2.3027474880218506, |
|
"logps/chosen": -161.29354858398438, |
|
"logps/rejected": -146.9613037109375, |
|
"loss": 0.7363, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11364208161830902, |
|
"rewards/margins": -0.03879079967737198, |
|
"rewards/rejected": -0.07485126703977585, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.98014278953421e-05, |
|
"logits/chosen": -2.300621747970581, |
|
"logits/rejected": -2.2820940017700195, |
|
"logps/chosen": -166.09739685058594, |
|
"logps/rejected": -163.3979034423828, |
|
"loss": 0.6378, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.017802832648158073, |
|
"rewards/margins": 0.2077583521604538, |
|
"rewards/rejected": -0.18995548784732819, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.979689540626479e-05, |
|
"logits/chosen": -2.056394577026367, |
|
"logits/rejected": -2.069194793701172, |
|
"logps/chosen": -138.0771942138672, |
|
"logps/rejected": -125.778076171875, |
|
"loss": 0.7441, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.13161520659923553, |
|
"rewards/margins": -0.06547226756811142, |
|
"rewards/rejected": -0.06614293903112411, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9792311982237774e-05, |
|
"logits/chosen": -1.7692331075668335, |
|
"logits/rejected": -1.78682541847229, |
|
"logps/chosen": -184.9560546875, |
|
"logps/rejected": -196.3815155029297, |
|
"loss": 0.7329, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2540504038333893, |
|
"rewards/margins": -0.049651261419057846, |
|
"rewards/rejected": -0.20439916849136353, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9787677632675825e-05, |
|
"logits/chosen": -2.1416497230529785, |
|
"logits/rejected": -2.1960196495056152, |
|
"logps/chosen": -161.20358276367188, |
|
"logps/rejected": -173.38148498535156, |
|
"loss": 0.7561, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.16931605339050293, |
|
"rewards/margins": -0.0933171734213829, |
|
"rewards/rejected": -0.07599887996912003, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.978299236709826e-05, |
|
"logits/chosen": -2.149383544921875, |
|
"logits/rejected": -2.1642093658447266, |
|
"logps/chosen": -184.3213348388672, |
|
"logps/rejected": -146.78817749023438, |
|
"loss": 0.712, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.02038545347750187, |
|
"rewards/margins": 0.031157677993178368, |
|
"rewards/rejected": -0.051543138921260834, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.977825619512904e-05, |
|
"logits/chosen": -2.1460795402526855, |
|
"logits/rejected": -2.201239824295044, |
|
"logps/chosen": -147.57321166992188, |
|
"logps/rejected": -139.89291381835938, |
|
"loss": 0.7989, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1287820041179657, |
|
"rewards/margins": -0.1620349884033203, |
|
"rewards/rejected": 0.033253006637096405, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.977346912649666e-05, |
|
"logits/chosen": -2.237581729888916, |
|
"logits/rejected": -2.2825090885162354, |
|
"logps/chosen": -138.7524871826172, |
|
"logps/rejected": -174.55783081054688, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.032103441655635834, |
|
"rewards/margins": 0.143732950091362, |
|
"rewards/rejected": -0.17583641409873962, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9768631171034175e-05, |
|
"logits/chosen": -2.133234739303589, |
|
"logits/rejected": -2.1712846755981445, |
|
"logps/chosen": -168.8503875732422, |
|
"logps/rejected": -162.818603515625, |
|
"loss": 0.8217, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11681299656629562, |
|
"rewards/margins": -0.1929503083229065, |
|
"rewards/rejected": 0.07613730430603027, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9763742338679145e-05, |
|
"logits/chosen": -2.1170601844787598, |
|
"logits/rejected": -2.1072490215301514, |
|
"logps/chosen": -161.21279907226562, |
|
"logps/rejected": -178.30638122558594, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.10452104359865189, |
|
"rewards/margins": 0.06742212921380997, |
|
"rewards/rejected": -0.17194317281246185, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.975880263947367e-05, |
|
"logits/chosen": -2.262809991836548, |
|
"logits/rejected": -2.249521017074585, |
|
"logps/chosen": -142.46505737304688, |
|
"logps/rejected": -142.214599609375, |
|
"loss": 0.7602, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10644787549972534, |
|
"rewards/margins": -0.08289748430252075, |
|
"rewards/rejected": -0.02355036698281765, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9753812083564304e-05, |
|
"logits/chosen": -2.270545482635498, |
|
"logits/rejected": -2.2995519638061523, |
|
"logps/chosen": -180.8372039794922, |
|
"logps/rejected": -184.7505340576172, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.05116385966539383, |
|
"rewards/margins": 0.05982666835188866, |
|
"rewards/rejected": -0.008662798441946507, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.974877068120208e-05, |
|
"logits/chosen": -1.9270200729370117, |
|
"logits/rejected": -1.962209701538086, |
|
"logps/chosen": -186.33587646484375, |
|
"logps/rejected": -227.984619140625, |
|
"loss": 0.7574, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.28816187381744385, |
|
"rewards/margins": -0.04220404103398323, |
|
"rewards/rejected": -0.24595780670642853, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.974367844274248e-05, |
|
"logits/chosen": -2.477126121520996, |
|
"logits/rejected": -2.4897990226745605, |
|
"logps/chosen": -120.29115295410156, |
|
"logps/rejected": -115.61780548095703, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002930285409092903, |
|
"rewards/margins": 0.039310526102781296, |
|
"rewards/rejected": -0.04224081337451935, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.973853537864538e-05, |
|
"logits/chosen": -2.2658283710479736, |
|
"logits/rejected": -2.26222562789917, |
|
"logps/chosen": -129.32443237304688, |
|
"logps/rejected": -130.61666870117188, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06606045365333557, |
|
"rewards/margins": 0.13228707015514374, |
|
"rewards/rejected": -0.06622661650180817, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.973334149947508e-05, |
|
"logits/chosen": -2.133833408355713, |
|
"logits/rejected": -2.12565279006958, |
|
"logps/chosen": -169.78045654296875, |
|
"logps/rejected": -154.434814453125, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02759992890059948, |
|
"rewards/margins": 0.031831562519073486, |
|
"rewards/rejected": -0.0042316243052482605, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.972809681590026e-05, |
|
"logits/chosen": -2.2754271030426025, |
|
"logits/rejected": -2.2876760959625244, |
|
"logps/chosen": -195.24818420410156, |
|
"logps/rejected": -206.7046661376953, |
|
"loss": 0.7125, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.03773926943540573, |
|
"rewards/margins": -0.024865150451660156, |
|
"rewards/rejected": 0.06260443478822708, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.972280133869396e-05, |
|
"logits/chosen": -2.323434829711914, |
|
"logits/rejected": -2.3305745124816895, |
|
"logps/chosen": -160.62684631347656, |
|
"logps/rejected": -162.4305419921875, |
|
"loss": 0.7052, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.10412123799324036, |
|
"rewards/margins": -0.009756050072610378, |
|
"rewards/rejected": -0.0943651795387268, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.971745507873352e-05, |
|
"logits/chosen": -2.3126134872436523, |
|
"logits/rejected": -2.3449103832244873, |
|
"logps/chosen": -164.61329650878906, |
|
"logps/rejected": -156.84730529785156, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07602532207965851, |
|
"rewards/margins": 0.11013013869524002, |
|
"rewards/rejected": -0.03410482034087181, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.971205804700063e-05, |
|
"logits/chosen": -1.984785556793213, |
|
"logits/rejected": -2.0369019508361816, |
|
"logps/chosen": -160.10220336914062, |
|
"logps/rejected": -179.91224670410156, |
|
"loss": 0.8494, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.1521720588207245, |
|
"rewards/margins": -0.21498528122901917, |
|
"rewards/rejected": 0.06281323730945587, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.970661025458125e-05, |
|
"logits/chosen": -2.309006452560425, |
|
"logits/rejected": -2.3352603912353516, |
|
"logps/chosen": -161.5703887939453, |
|
"logps/rejected": -163.7167205810547, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.05860462412238121, |
|
"rewards/margins": 0.11608318984508514, |
|
"rewards/rejected": -0.05747856944799423, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9701111712665625e-05, |
|
"logits/chosen": -2.227006673812866, |
|
"logits/rejected": -2.1848740577697754, |
|
"logps/chosen": -177.54269409179688, |
|
"logps/rejected": -171.7179718017578, |
|
"loss": 0.7767, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.17108993232250214, |
|
"rewards/margins": -0.11503319442272186, |
|
"rewards/rejected": -0.05605673789978027, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.969556243254822e-05, |
|
"logits/chosen": -2.226109743118286, |
|
"logits/rejected": -2.2309632301330566, |
|
"logps/chosen": -126.68124389648438, |
|
"logps/rejected": -135.05209350585938, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.12173338234424591, |
|
"rewards/margins": 0.05880265310406685, |
|
"rewards/rejected": 0.06293072551488876, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.968996242562774e-05, |
|
"logits/chosen": -2.1414482593536377, |
|
"logits/rejected": -2.112384796142578, |
|
"logps/chosen": -162.0722198486328, |
|
"logps/rejected": -151.81146240234375, |
|
"loss": 0.8236, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04249238967895508, |
|
"rewards/margins": -0.19530200958251953, |
|
"rewards/rejected": 0.2377944141626358, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.968431170340706e-05, |
|
"logits/chosen": -2.2804222106933594, |
|
"logits/rejected": -2.297311782836914, |
|
"logps/chosen": -130.9635772705078, |
|
"logps/rejected": -132.71075439453125, |
|
"loss": 0.7031, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05266380310058594, |
|
"rewards/margins": 0.007896373979747295, |
|
"rewards/rejected": -0.06056017801165581, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9678610277493275e-05, |
|
"logits/chosen": -2.324260711669922, |
|
"logits/rejected": -2.287720203399658, |
|
"logps/chosen": -144.84410095214844, |
|
"logps/rejected": -141.16293334960938, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.13754111528396606, |
|
"rewards/margins": 0.018452219665050507, |
|
"rewards/rejected": 0.11908888816833496, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.967285815959759e-05, |
|
"logits/chosen": -2.1252338886260986, |
|
"logits/rejected": -2.181086778640747, |
|
"logps/chosen": -165.26145935058594, |
|
"logps/rejected": -185.48065185546875, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.05688684061169624, |
|
"rewards/margins": 0.2325521856546402, |
|
"rewards/rejected": -0.17566533386707306, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9667055361535354e-05, |
|
"logits/chosen": -2.0358633995056152, |
|
"logits/rejected": -2.037306308746338, |
|
"logps/chosen": -158.7928466796875, |
|
"logps/rejected": -174.2408905029297, |
|
"loss": 0.7384, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20100197196006775, |
|
"rewards/margins": -0.03293357789516449, |
|
"rewards/rejected": -0.16806840896606445, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9661201895226e-05, |
|
"logits/chosen": -2.1538403034210205, |
|
"logits/rejected": -2.2139194011688232, |
|
"logps/chosen": -131.5411376953125, |
|
"logps/rejected": -151.45513916015625, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.008547332137823105, |
|
"rewards/margins": 0.0680898129940033, |
|
"rewards/rejected": -0.059542469680309296, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.965529777269306e-05, |
|
"logits/chosen": -2.2412662506103516, |
|
"logits/rejected": -2.2856147289276123, |
|
"logps/chosen": -117.49397277832031, |
|
"logps/rejected": -120.62272644042969, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.228101447224617, |
|
"rewards/margins": 0.008968396112322807, |
|
"rewards/rejected": -0.23706983029842377, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.964934300606411e-05, |
|
"logits/chosen": -2.066718816757202, |
|
"logits/rejected": -2.138875722885132, |
|
"logps/chosen": -153.00021362304688, |
|
"logps/rejected": -164.0558624267578, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10881123691797256, |
|
"rewards/margins": 0.06324261426925659, |
|
"rewards/rejected": -0.17205384373664856, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.964333760757074e-05, |
|
"logits/chosen": -2.3143880367279053, |
|
"logits/rejected": -2.184577703475952, |
|
"logps/chosen": -161.26583862304688, |
|
"logps/rejected": -178.37884521484375, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.09615574032068253, |
|
"rewards/margins": 0.0893949344754219, |
|
"rewards/rejected": -0.18555067479610443, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.963728158954856e-05, |
|
"logits/chosen": -2.3663442134857178, |
|
"logits/rejected": -2.404465675354004, |
|
"logps/chosen": -140.4896240234375, |
|
"logps/rejected": -160.0462188720703, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03150591999292374, |
|
"rewards/margins": 0.02258572168648243, |
|
"rewards/rejected": -0.05409163981676102, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.963117496443715e-05, |
|
"logits/chosen": -2.264538526535034, |
|
"logits/rejected": -2.2544782161712646, |
|
"logps/chosen": -180.01846313476562, |
|
"logps/rejected": -177.05178833007812, |
|
"loss": 0.7132, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13371305167675018, |
|
"rewards/margins": -0.008653441444039345, |
|
"rewards/rejected": -0.1250596046447754, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9625017744780045e-05, |
|
"logits/chosen": -2.1741209030151367, |
|
"logits/rejected": -2.1468567848205566, |
|
"logps/chosen": -164.97640991210938, |
|
"logps/rejected": -169.90350341796875, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10341596603393555, |
|
"rewards/margins": 0.06766539812088013, |
|
"rewards/rejected": -0.17108136415481567, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.96188099432247e-05, |
|
"logits/chosen": -2.02036714553833, |
|
"logits/rejected": -2.0622000694274902, |
|
"logps/chosen": -196.87257385253906, |
|
"logps/rejected": -192.37307739257812, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.21568839251995087, |
|
"rewards/margins": 0.0388670451939106, |
|
"rewards/rejected": -0.25455543398857117, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9612551572522464e-05, |
|
"logits/chosen": -2.2688815593719482, |
|
"logits/rejected": -2.305769920349121, |
|
"logps/chosen": -143.64569091796875, |
|
"logps/rejected": -154.7953338623047, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16745011508464813, |
|
"rewards/margins": 0.1968405842781067, |
|
"rewards/rejected": -0.36429068446159363, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.960624264552858e-05, |
|
"logits/chosen": -2.271151065826416, |
|
"logits/rejected": -2.328986167907715, |
|
"logps/chosen": -133.5254364013672, |
|
"logps/rejected": -172.62962341308594, |
|
"loss": 0.7838, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.2519568204879761, |
|
"rewards/margins": -0.10307664424180984, |
|
"rewards/rejected": -0.14888018369674683, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9599883175202124e-05, |
|
"logits/chosen": -2.3042290210723877, |
|
"logits/rejected": -2.285111427307129, |
|
"logps/chosen": -114.66839599609375, |
|
"logps/rejected": -120.16878509521484, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0025178715586662292, |
|
"rewards/margins": 0.055260900408029556, |
|
"rewards/rejected": -0.052743006497621536, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9593473174605974e-05, |
|
"logits/chosen": -2.4031026363372803, |
|
"logits/rejected": -2.3971669673919678, |
|
"logps/chosen": -180.76190185546875, |
|
"logps/rejected": -189.8751220703125, |
|
"loss": 0.7679, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2216603308916092, |
|
"rewards/margins": -0.037253011018037796, |
|
"rewards/rejected": -0.1844073235988617, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.958701265690685e-05, |
|
"logits/chosen": -2.2369589805603027, |
|
"logits/rejected": -2.213113784790039, |
|
"logps/chosen": -163.1219482421875, |
|
"logps/rejected": -163.7172393798828, |
|
"loss": 0.7187, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.29494044184684753, |
|
"rewards/margins": -0.002360118553042412, |
|
"rewards/rejected": -0.29258033633232117, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.958050163537519e-05, |
|
"logits/chosen": -2.4455294609069824, |
|
"logits/rejected": -2.393801212310791, |
|
"logps/chosen": -143.6052703857422, |
|
"logps/rejected": -128.57278442382812, |
|
"loss": 0.7247, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2546530067920685, |
|
"rewards/margins": -0.035662561655044556, |
|
"rewards/rejected": -0.2189904749393463, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.957394012338519e-05, |
|
"logits/chosen": -2.242324113845825, |
|
"logits/rejected": -2.276301383972168, |
|
"logps/chosen": -154.13751220703125, |
|
"logps/rejected": -155.56005859375, |
|
"loss": 0.75, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3772943615913391, |
|
"rewards/margins": -0.08221397548913956, |
|
"rewards/rejected": -0.29508039355278015, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.956732813441477e-05, |
|
"logits/chosen": -2.4516844749450684, |
|
"logits/rejected": -2.397124767303467, |
|
"logps/chosen": -142.58702087402344, |
|
"logps/rejected": -139.663330078125, |
|
"loss": 0.8001, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2911987900733948, |
|
"rewards/margins": -0.15551666915416718, |
|
"rewards/rejected": -0.1356821358203888, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.956066568204552e-05, |
|
"logits/chosen": -2.039741039276123, |
|
"logits/rejected": -2.023181200027466, |
|
"logps/chosen": -154.21315002441406, |
|
"logps/rejected": -135.28794860839844, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0628127008676529, |
|
"rewards/margins": 0.22161367535591125, |
|
"rewards/rejected": -0.28442639112472534, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.955395277996268e-05, |
|
"logits/chosen": -2.2360501289367676, |
|
"logits/rejected": -2.2908437252044678, |
|
"logps/chosen": -184.1702423095703, |
|
"logps/rejected": -185.9180145263672, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20668905973434448, |
|
"rewards/margins": 0.1489761769771576, |
|
"rewards/rejected": -0.3556652069091797, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.954718944195512e-05, |
|
"logits/chosen": -2.2392733097076416, |
|
"logits/rejected": -2.256279706954956, |
|
"logps/chosen": -140.51260375976562, |
|
"logps/rejected": -144.2230682373047, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18107274174690247, |
|
"rewards/margins": 0.16339734196662903, |
|
"rewards/rejected": -0.3444700539112091, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.954037568191534e-05, |
|
"logits/chosen": -2.2857413291931152, |
|
"logits/rejected": -2.2823734283447266, |
|
"logps/chosen": -139.91226196289062, |
|
"logps/rejected": -140.5911407470703, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10547606647014618, |
|
"rewards/margins": 0.11846562474966049, |
|
"rewards/rejected": -0.22394171357154846, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9533511513839384e-05, |
|
"logits/chosen": -2.052267551422119, |
|
"logits/rejected": -2.02097487449646, |
|
"logps/chosen": -135.19448852539062, |
|
"logps/rejected": -150.51913452148438, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.1995100975036621, |
|
"rewards/margins": 0.07375679910182953, |
|
"rewards/rejected": -0.27326688170433044, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9526596951826824e-05, |
|
"logits/chosen": -2.1060378551483154, |
|
"logits/rejected": -2.1363437175750732, |
|
"logps/chosen": -189.6680145263672, |
|
"logps/rejected": -194.1062774658203, |
|
"loss": 0.7673, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.20259276032447815, |
|
"rewards/margins": -0.09232301265001297, |
|
"rewards/rejected": -0.11026974767446518, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.951963201008076e-05, |
|
"logits/chosen": -2.146965265274048, |
|
"logits/rejected": -2.1230671405792236, |
|
"logps/chosen": -161.82183837890625, |
|
"logps/rejected": -152.2267303466797, |
|
"loss": 0.785, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.17847128212451935, |
|
"rewards/margins": -0.1518484354019165, |
|
"rewards/rejected": -0.026622820645570755, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.951261670290781e-05, |
|
"logits/chosen": -2.0150346755981445, |
|
"logits/rejected": -2.04280424118042, |
|
"logps/chosen": -204.8814239501953, |
|
"logps/rejected": -211.47410583496094, |
|
"loss": 0.8185, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.35267120599746704, |
|
"rewards/margins": -0.18207934498786926, |
|
"rewards/rejected": -0.17059186100959778, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.950555104471799e-05, |
|
"logits/chosen": -2.0696749687194824, |
|
"logits/rejected": -2.0690653324127197, |
|
"logps/chosen": -180.72787475585938, |
|
"logps/rejected": -167.14132690429688, |
|
"loss": 0.701, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1277678906917572, |
|
"rewards/margins": 0.01966879889369011, |
|
"rewards/rejected": -0.14743672311306, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.949843505002477e-05, |
|
"logits/chosen": -2.276556968688965, |
|
"logits/rejected": -2.307004928588867, |
|
"logps/chosen": -139.89166259765625, |
|
"logps/rejected": -149.29598999023438, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.11064109206199646, |
|
"rewards/margins": 0.22020389139652252, |
|
"rewards/rejected": -0.10956278443336487, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9491268733445034e-05, |
|
"logits/chosen": -2.3263444900512695, |
|
"logits/rejected": -2.216820240020752, |
|
"logps/chosen": -176.23760986328125, |
|
"logps/rejected": -181.00119018554688, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14714036881923676, |
|
"rewards/margins": 0.2764008939266205, |
|
"rewards/rejected": -0.4235413074493408, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9484052109698984e-05, |
|
"logits/chosen": -2.2672414779663086, |
|
"logits/rejected": -2.29109525680542, |
|
"logps/chosen": -146.66981506347656, |
|
"logps/rejected": -148.75990295410156, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.010637687519192696, |
|
"rewards/margins": 0.07865148037672043, |
|
"rewards/rejected": -0.08928915858268738, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.947678519361021e-05, |
|
"logits/chosen": -2.173220634460449, |
|
"logits/rejected": -2.1617910861968994, |
|
"logps/chosen": -157.75787353515625, |
|
"logps/rejected": -161.2376251220703, |
|
"loss": 0.7178, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1290948987007141, |
|
"rewards/margins": 0.04089619964361191, |
|
"rewards/rejected": -0.16999109089374542, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.946946800010556e-05, |
|
"logits/chosen": -2.089167833328247, |
|
"logits/rejected": -2.098552942276001, |
|
"logps/chosen": -167.84510803222656, |
|
"logps/rejected": -177.17660522460938, |
|
"loss": 0.7749, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.23455895483493805, |
|
"rewards/margins": -0.11604375392198563, |
|
"rewards/rejected": -0.11851520091295242, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.946210054421518e-05, |
|
"logits/chosen": -2.3614742755889893, |
|
"logits/rejected": -2.367582321166992, |
|
"logps/chosen": -138.63021850585938, |
|
"logps/rejected": -138.73843383789062, |
|
"loss": 0.7186, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0001854933798313141, |
|
"rewards/margins": -0.002179570496082306, |
|
"rewards/rejected": 0.001994088292121887, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.945468284107246e-05, |
|
"logits/chosen": -2.033447265625, |
|
"logits/rejected": -2.047463893890381, |
|
"logps/chosen": -163.36306762695312, |
|
"logps/rejected": -178.01649475097656, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.026612814515829086, |
|
"rewards/margins": 0.12761405110359192, |
|
"rewards/rejected": -0.10100121051073074, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.944721490591401e-05, |
|
"logits/chosen": -2.2349605560302734, |
|
"logits/rejected": -2.2344400882720947, |
|
"logps/chosen": -153.67425537109375, |
|
"logps/rejected": -140.24844360351562, |
|
"loss": 0.7657, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.08484932780265808, |
|
"rewards/margins": -0.11917441338300705, |
|
"rewards/rejected": 0.034325070679187775, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9439696754079595e-05, |
|
"logits/chosen": -2.026840925216675, |
|
"logits/rejected": -2.0905795097351074, |
|
"logps/chosen": -153.32894897460938, |
|
"logps/rejected": -157.39907836914062, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1143321692943573, |
|
"rewards/margins": 0.21959424018859863, |
|
"rewards/rejected": -0.10526210069656372, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9432128401012144e-05, |
|
"logits/chosen": -2.2150955200195312, |
|
"logits/rejected": -2.1640408039093018, |
|
"logps/chosen": -176.57774353027344, |
|
"logps/rejected": -177.27870178222656, |
|
"loss": 0.8383, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.25025683641433716, |
|
"rewards/margins": -0.19921274483203888, |
|
"rewards/rejected": -0.05104408413171768, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9424509862257706e-05, |
|
"logits/chosen": -1.9792041778564453, |
|
"logits/rejected": -1.9765446186065674, |
|
"logps/chosen": -134.66946411132812, |
|
"logps/rejected": -144.38739013671875, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0672139897942543, |
|
"rewards/margins": 0.08019405603408813, |
|
"rewards/rejected": -0.14740802347660065, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.941684115346541e-05, |
|
"logits/chosen": -2.319556474685669, |
|
"logits/rejected": -2.351644277572632, |
|
"logps/chosen": -173.3380126953125, |
|
"logps/rejected": -170.59963989257812, |
|
"loss": 0.7212, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.019015267491340637, |
|
"rewards/margins": -0.03376225382089615, |
|
"rewards/rejected": 0.05277752876281738, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.940912229038745e-05, |
|
"logits/chosen": -2.0435500144958496, |
|
"logits/rejected": -2.022526502609253, |
|
"logps/chosen": -135.12265014648438, |
|
"logps/rejected": -125.80049133300781, |
|
"loss": 0.7075, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.021306686103343964, |
|
"rewards/margins": 0.02372751012444496, |
|
"rewards/rejected": -0.045034196227788925, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9401353288879024e-05, |
|
"logits/chosen": -1.973745584487915, |
|
"logits/rejected": -1.964308738708496, |
|
"logps/chosen": -148.7356719970703, |
|
"logps/rejected": -140.4853515625, |
|
"loss": 0.9136, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.35221704840660095, |
|
"rewards/margins": -0.27745121717453003, |
|
"rewards/rejected": -0.07476583123207092, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9393534164898335e-05, |
|
"logits/chosen": -2.2897889614105225, |
|
"logits/rejected": -2.2667198181152344, |
|
"logps/chosen": -169.73867797851562, |
|
"logps/rejected": -154.4757080078125, |
|
"loss": 0.7961, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.01284090057015419, |
|
"rewards/margins": -0.14840884506702423, |
|
"rewards/rejected": 0.13556794822216034, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9385664934506526e-05, |
|
"logits/chosen": -2.320432186126709, |
|
"logits/rejected": -2.2988882064819336, |
|
"logps/chosen": -144.0506591796875, |
|
"logps/rejected": -145.1551055908203, |
|
"loss": 0.8377, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.11610221862792969, |
|
"rewards/margins": -0.22257395088672638, |
|
"rewards/rejected": 0.33867618441581726, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.937774561386768e-05, |
|
"logits/chosen": -2.1970014572143555, |
|
"logits/rejected": -2.222613573074341, |
|
"logps/chosen": -138.17433166503906, |
|
"logps/rejected": -143.5152130126953, |
|
"loss": 0.7075, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0024851299822330475, |
|
"rewards/margins": 0.022421889007091522, |
|
"rewards/rejected": -0.01993674784898758, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.936977621924875e-05, |
|
"logits/chosen": -2.1020843982696533, |
|
"logits/rejected": -2.0904481410980225, |
|
"logps/chosen": -148.5655517578125, |
|
"logps/rejected": -153.73941040039062, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003152729943394661, |
|
"rewards/margins": 0.18040181696414948, |
|
"rewards/rejected": -0.17724908888339996, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9361756767019564e-05, |
|
"logits/chosen": -1.8264960050582886, |
|
"logits/rejected": -1.792884349822998, |
|
"logps/chosen": -168.45526123046875, |
|
"logps/rejected": -160.93218994140625, |
|
"loss": 0.7427, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.034992799162864685, |
|
"rewards/margins": -0.04541083052754402, |
|
"rewards/rejected": 0.010418036952614784, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.935368727365276e-05, |
|
"logits/chosen": -2.033273458480835, |
|
"logits/rejected": -2.0581772327423096, |
|
"logps/chosen": -151.52731323242188, |
|
"logps/rejected": -166.76388549804688, |
|
"loss": 0.7695, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.07278600335121155, |
|
"rewards/margins": -0.08771771937608719, |
|
"rewards/rejected": 0.014931721612811089, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.934556775572377e-05, |
|
"logits/chosen": -2.1122307777404785, |
|
"logits/rejected": -2.1686928272247314, |
|
"logps/chosen": -158.33444213867188, |
|
"logps/rejected": -155.8374481201172, |
|
"loss": 0.7004, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11077702045440674, |
|
"rewards/margins": 0.048048801720142365, |
|
"rewards/rejected": -0.1588258296251297, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9337398229910784e-05, |
|
"logits/chosen": -2.0639896392822266, |
|
"logits/rejected": -2.0493862628936768, |
|
"logps/chosen": -140.65463256835938, |
|
"logps/rejected": -143.4046173095703, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.332168847322464, |
|
"rewards/margins": 0.21151331067085266, |
|
"rewards/rejected": 0.12065552175045013, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.932917871299471e-05, |
|
"logits/chosen": -2.1873834133148193, |
|
"logits/rejected": -2.1482584476470947, |
|
"logps/chosen": -156.8861846923828, |
|
"logps/rejected": -152.4180450439453, |
|
"loss": 0.7677, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.044190216809511185, |
|
"rewards/margins": -0.10401745140552521, |
|
"rewards/rejected": 0.1482076644897461, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9320909221859134e-05, |
|
"logits/chosen": -2.1388399600982666, |
|
"logits/rejected": -2.1468729972839355, |
|
"logps/chosen": -150.1500701904297, |
|
"logps/rejected": -152.03726196289062, |
|
"loss": 0.7504, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.08211887627840042, |
|
"rewards/margins": -0.07361260801553726, |
|
"rewards/rejected": 0.15573148429393768, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9312589773490304e-05, |
|
"logits/chosen": -1.996654987335205, |
|
"logits/rejected": -1.9872137308120728, |
|
"logps/chosen": -150.1443328857422, |
|
"logps/rejected": -155.907958984375, |
|
"loss": 0.7484, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.17925025522708893, |
|
"rewards/margins": -0.05667828768491745, |
|
"rewards/rejected": 0.23592855036258698, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.930422038497708e-05, |
|
"logits/chosen": -2.2004756927490234, |
|
"logits/rejected": -2.2537848949432373, |
|
"logps/chosen": -150.181640625, |
|
"logps/rejected": -177.4576416015625, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.28161391615867615, |
|
"rewards/margins": 0.11887946724891663, |
|
"rewards/rejected": 0.16273444890975952, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.92958010735109e-05, |
|
"logits/chosen": -2.1067912578582764, |
|
"logits/rejected": -2.1667635440826416, |
|
"logps/chosen": -122.0634765625, |
|
"logps/rejected": -125.26715850830078, |
|
"loss": 0.7781, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.10816986113786697, |
|
"rewards/margins": -0.11183619499206543, |
|
"rewards/rejected": 0.2200060486793518, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.928733185638575e-05, |
|
"logits/chosen": -2.1284282207489014, |
|
"logits/rejected": -2.1388614177703857, |
|
"logps/chosen": -145.78765869140625, |
|
"logps/rejected": -164.8605194091797, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06616201996803284, |
|
"rewards/margins": 0.05039716139435768, |
|
"rewards/rejected": 0.01576484926044941, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.927881275099815e-05, |
|
"logits/chosen": -2.2518081665039062, |
|
"logits/rejected": -2.2290408611297607, |
|
"logps/chosen": -153.83200073242188, |
|
"logps/rejected": -160.59776306152344, |
|
"loss": 0.7557, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.1310650259256363, |
|
"rewards/margins": -0.07667630910873413, |
|
"rewards/rejected": 0.20774134993553162, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.927024377484705e-05, |
|
"logits/chosen": -2.0690414905548096, |
|
"logits/rejected": -2.096713066101074, |
|
"logps/chosen": -168.7005157470703, |
|
"logps/rejected": -168.45872497558594, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.16334614157676697, |
|
"rewards/margins": 0.09711476415395737, |
|
"rewards/rejected": 0.06623139977455139, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9261624945533855e-05, |
|
"logits/chosen": -1.6682740449905396, |
|
"logits/rejected": -1.611401915550232, |
|
"logps/chosen": -170.30328369140625, |
|
"logps/rejected": -174.10769653320312, |
|
"loss": 0.8147, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.10478762537240982, |
|
"rewards/margins": -0.1818379908800125, |
|
"rewards/rejected": 0.07705036550760269, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.925295628076241e-05, |
|
"logits/chosen": -1.9349985122680664, |
|
"logits/rejected": -1.9530787467956543, |
|
"logps/chosen": -130.31883239746094, |
|
"logps/rejected": -152.9124755859375, |
|
"loss": 0.8018, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0537966787815094, |
|
"rewards/margins": -0.1305672526359558, |
|
"rewards/rejected": 0.1843639612197876, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9244237798338866e-05, |
|
"logits/chosen": -2.0011062622070312, |
|
"logits/rejected": -1.9965519905090332, |
|
"logps/chosen": -164.77838134765625, |
|
"logps/rejected": -192.95458984375, |
|
"loss": 0.7462, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0018954463303089142, |
|
"rewards/margins": -0.058521248400211334, |
|
"rewards/rejected": 0.056625787168741226, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.923546951617175e-05, |
|
"logits/chosen": -2.117265224456787, |
|
"logits/rejected": -2.1309285163879395, |
|
"logps/chosen": -157.73707580566406, |
|
"logps/rejected": -156.6586456298828, |
|
"loss": 0.7046, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.09399585425853729, |
|
"rewards/margins": 0.057814061641693115, |
|
"rewards/rejected": 0.03618178144097328, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.922665145227187e-05, |
|
"logits/chosen": -2.2057220935821533, |
|
"logits/rejected": -2.1393046379089355, |
|
"logps/chosen": -182.5475311279297, |
|
"logps/rejected": -164.41293334960938, |
|
"loss": 0.8501, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.0016194283962249756, |
|
"rewards/margins": -0.25813543796539307, |
|
"rewards/rejected": 0.2565160095691681, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9217783624752266e-05, |
|
"logits/chosen": -2.2684426307678223, |
|
"logits/rejected": -2.243990659713745, |
|
"logps/chosen": -128.5909881591797, |
|
"logps/rejected": -125.42144775390625, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07768978923559189, |
|
"rewards/margins": 0.20721395313739777, |
|
"rewards/rejected": -0.12952415645122528, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.920886605182823e-05, |
|
"logits/chosen": -2.2352826595306396, |
|
"logits/rejected": -2.317643404006958, |
|
"logps/chosen": -147.9606170654297, |
|
"logps/rejected": -151.64022827148438, |
|
"loss": 0.7825, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.1962936520576477, |
|
"rewards/margins": -0.0966653823852539, |
|
"rewards/rejected": 0.2929590344429016, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.919989875181722e-05, |
|
"logits/chosen": -2.0506582260131836, |
|
"logits/rejected": -2.056596517562866, |
|
"logps/chosen": -133.77655029296875, |
|
"logps/rejected": -142.32601928710938, |
|
"loss": 0.698, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.06665589660406113, |
|
"rewards/margins": 0.017916321754455566, |
|
"rewards/rejected": 0.04873957484960556, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.919088174313884e-05, |
|
"logits/chosen": -2.1315078735351562, |
|
"logits/rejected": -2.1690430641174316, |
|
"logps/chosen": -166.51275634765625, |
|
"logps/rejected": -171.87623596191406, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.17828050255775452, |
|
"rewards/margins": 0.1265142858028412, |
|
"rewards/rejected": 0.05176621302962303, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.91818150443148e-05, |
|
"logits/chosen": -2.316528797149658, |
|
"logits/rejected": -2.3489573001861572, |
|
"logps/chosen": -158.4711456298828, |
|
"logps/rejected": -156.23777770996094, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.19824568927288055, |
|
"rewards/margins": 0.19765932857990265, |
|
"rewards/rejected": 0.0005863434635102749, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.917269867396886e-05, |
|
"logits/chosen": -2.1244661808013916, |
|
"logits/rejected": -2.0779430866241455, |
|
"logps/chosen": -164.66851806640625, |
|
"logps/rejected": -145.93212890625, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0432266928255558, |
|
"rewards/margins": 0.07876121997833252, |
|
"rewards/rejected": -0.03553451970219612, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.916353265082686e-05, |
|
"logits/chosen": -2.397843837738037, |
|
"logits/rejected": -2.4166927337646484, |
|
"logps/chosen": -214.45030212402344, |
|
"logps/rejected": -215.8428497314453, |
|
"loss": 0.7811, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009777067229151726, |
|
"rewards/margins": -0.09677901118993759, |
|
"rewards/rejected": 0.10655608028173447, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9154316993716565e-05, |
|
"logits/chosen": -2.245692491531372, |
|
"logits/rejected": -2.245835781097412, |
|
"logps/chosen": -156.67173767089844, |
|
"logps/rejected": -149.71917724609375, |
|
"loss": 0.7229, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.05182276666164398, |
|
"rewards/margins": -0.009018277749419212, |
|
"rewards/rejected": 0.06084103882312775, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9145051721567734e-05, |
|
"logits/chosen": -2.2843127250671387, |
|
"logits/rejected": -2.2395739555358887, |
|
"logps/chosen": -167.3928985595703, |
|
"logps/rejected": -162.44593811035156, |
|
"loss": 0.7401, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.057207878679037094, |
|
"rewards/margins": -0.031176520511507988, |
|
"rewards/rejected": 0.08838438242673874, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.913573685341205e-05, |
|
"logits/chosen": -2.0592617988586426, |
|
"logits/rejected": -2.108931064605713, |
|
"logps/chosen": -189.07752990722656, |
|
"logps/rejected": -181.4488983154297, |
|
"loss": 0.8664, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.3183850646018982, |
|
"rewards/margins": -0.25120943784713745, |
|
"rewards/rejected": -0.06717558205127716, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9126372408383025e-05, |
|
"logits/chosen": -1.9963423013687134, |
|
"logits/rejected": -1.9505279064178467, |
|
"logps/chosen": -149.78964233398438, |
|
"logps/rejected": -148.13140869140625, |
|
"loss": 0.7801, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.006365638226270676, |
|
"rewards/margins": -0.0903141051530838, |
|
"rewards/rejected": 0.09667973965406418, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.911695840571605e-05, |
|
"logits/chosen": -2.259364604949951, |
|
"logits/rejected": -2.2460901737213135, |
|
"logps/chosen": -173.76356506347656, |
|
"logps/rejected": -162.274169921875, |
|
"loss": 0.7328, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.26215800642967224, |
|
"rewards/margins": -0.027749449014663696, |
|
"rewards/rejected": -0.23440855741500854, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.910749486474828e-05, |
|
"logits/chosen": -2.1809744834899902, |
|
"logits/rejected": -2.1365909576416016, |
|
"logps/chosen": -173.26553344726562, |
|
"logps/rejected": -168.60324096679688, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13890515267848969, |
|
"rewards/margins": 0.1398504674434662, |
|
"rewards/rejected": -0.2787555754184723, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.909798180491865e-05, |
|
"logits/chosen": -2.2078680992126465, |
|
"logits/rejected": -2.202317714691162, |
|
"logps/chosen": -161.5787353515625, |
|
"logps/rejected": -148.63571166992188, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0784807801246643, |
|
"rewards/margins": 0.04119132459163666, |
|
"rewards/rejected": 0.037289444357156754, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9088419245767803e-05, |
|
"logits/chosen": -2.323357582092285, |
|
"logits/rejected": -2.331782817840576, |
|
"logps/chosen": -154.3722686767578, |
|
"logps/rejected": -142.60247802734375, |
|
"loss": 0.7108, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.03877534344792366, |
|
"rewards/margins": 0.031510498374700546, |
|
"rewards/rejected": 0.007264849729835987, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.907880720693804e-05, |
|
"logits/chosen": -2.3022680282592773, |
|
"logits/rejected": -2.242792844772339, |
|
"logps/chosen": -143.08497619628906, |
|
"logps/rejected": -142.6212158203125, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.19559280574321747, |
|
"rewards/margins": 0.14293703436851501, |
|
"rewards/rejected": 0.052655745297670364, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9069145708173324e-05, |
|
"logits/chosen": -2.24276065826416, |
|
"logits/rejected": -2.2772743701934814, |
|
"logps/chosen": -141.13607788085938, |
|
"logps/rejected": -143.68528747558594, |
|
"loss": 0.749, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13126200437545776, |
|
"rewards/margins": -0.08278003334999084, |
|
"rewards/rejected": -0.04848198592662811, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9059434769319205e-05, |
|
"logits/chosen": -1.9449436664581299, |
|
"logits/rejected": -1.8548604249954224, |
|
"logps/chosen": -182.99716186523438, |
|
"logps/rejected": -157.7788848876953, |
|
"loss": 0.7759, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11397290229797363, |
|
"rewards/margins": -0.0827551856637001, |
|
"rewards/rejected": -0.031217724084854126, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.904967441032278e-05, |
|
"logits/chosen": -2.3559162616729736, |
|
"logits/rejected": -2.351862907409668, |
|
"logps/chosen": -156.9031982421875, |
|
"logps/rejected": -151.17758178710938, |
|
"loss": 0.7666, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.030413679778575897, |
|
"rewards/margins": -0.07400794327259064, |
|
"rewards/rejected": 0.04359426349401474, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.903986465123266e-05, |
|
"logits/chosen": -2.013231039047241, |
|
"logits/rejected": -2.1264541149139404, |
|
"logps/chosen": -142.36181640625, |
|
"logps/rejected": -151.9750213623047, |
|
"loss": 0.7971, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.08235807716846466, |
|
"rewards/margins": -0.12663468718528748, |
|
"rewards/rejected": 0.04427662491798401, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.903000551219894e-05, |
|
"logits/chosen": -2.1607303619384766, |
|
"logits/rejected": -2.1577768325805664, |
|
"logps/chosen": -180.66165161132812, |
|
"logps/rejected": -193.0799102783203, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.06296667456626892, |
|
"rewards/margins": 0.2222498655319214, |
|
"rewards/rejected": -0.15928319096565247, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.902009701347313e-05, |
|
"logits/chosen": -2.039292335510254, |
|
"logits/rejected": -2.075709342956543, |
|
"logps/chosen": -139.63912963867188, |
|
"logps/rejected": -145.09732055664062, |
|
"loss": 0.7106, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06535654515028, |
|
"rewards/margins": 0.01611308380961418, |
|
"rewards/rejected": -0.08146963268518448, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.901013917540814e-05, |
|
"logits/chosen": -2.17149019241333, |
|
"logits/rejected": -2.1316046714782715, |
|
"logps/chosen": -149.57943725585938, |
|
"logps/rejected": -135.62716674804688, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.09369993209838867, |
|
"rewards/margins": 0.16279715299606323, |
|
"rewards/rejected": -0.06909724324941635, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.900013201845821e-05, |
|
"logits/chosen": -2.245067834854126, |
|
"logits/rejected": -2.3142921924591064, |
|
"logps/chosen": -249.42477416992188, |
|
"logps/rejected": -244.9073486328125, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03764531761407852, |
|
"rewards/margins": 0.16476468741893768, |
|
"rewards/rejected": -0.2024100124835968, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.899007556317893e-05, |
|
"logits/chosen": -2.1344449520111084, |
|
"logits/rejected": -2.114758014678955, |
|
"logps/chosen": -149.18663024902344, |
|
"logps/rejected": -157.50579833984375, |
|
"loss": 0.7001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.05027259886264801, |
|
"rewards/margins": 0.06326065212488174, |
|
"rewards/rejected": -0.11353327333927155, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8979969830227086e-05, |
|
"logits/chosen": -2.040646553039551, |
|
"logits/rejected": -2.045273780822754, |
|
"logps/chosen": -202.0292510986328, |
|
"logps/rejected": -214.07327270507812, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.050717130303382874, |
|
"rewards/margins": 0.17365548014640808, |
|
"rewards/rejected": -0.22437259554862976, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.896981484036074e-05, |
|
"logits/chosen": -2.013162612915039, |
|
"logits/rejected": -1.9795409440994263, |
|
"logps/chosen": -146.5740509033203, |
|
"logps/rejected": -149.60594177246094, |
|
"loss": 0.6182, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.060083091259002686, |
|
"rewards/margins": 0.2053542286157608, |
|
"rewards/rejected": -0.2654373347759247, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.895961061443911e-05, |
|
"logits/chosen": -2.287144184112549, |
|
"logits/rejected": -2.292527914047241, |
|
"logps/chosen": -157.79879760742188, |
|
"logps/rejected": -156.17396545410156, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.018628746271133423, |
|
"rewards/margins": 0.05455555394291878, |
|
"rewards/rejected": -0.035926803946495056, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.894935717342255e-05, |
|
"logits/chosen": -1.8966100215911865, |
|
"logits/rejected": -1.9237083196640015, |
|
"logps/chosen": -139.45045471191406, |
|
"logps/rejected": -163.8885955810547, |
|
"loss": 0.7204, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2020660787820816, |
|
"rewards/margins": 0.042556531727313995, |
|
"rewards/rejected": -0.2446226328611374, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8939054538372496e-05, |
|
"logits/chosen": -2.121617317199707, |
|
"logits/rejected": -2.1448612213134766, |
|
"logps/chosen": -237.39845275878906, |
|
"logps/rejected": -254.28359985351562, |
|
"loss": 0.7159, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0699370950460434, |
|
"rewards/margins": 0.008224628865718842, |
|
"rewards/rejected": 0.06171245500445366, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8928702730451456e-05, |
|
"logits/chosen": -1.949974775314331, |
|
"logits/rejected": -1.870169997215271, |
|
"logps/chosen": -153.4409942626953, |
|
"logps/rejected": -156.4421844482422, |
|
"loss": 0.7495, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.12804728746414185, |
|
"rewards/margins": -0.07111337780952454, |
|
"rewards/rejected": -0.0569339245557785, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.891830177092294e-05, |
|
"logits/chosen": -2.206598997116089, |
|
"logits/rejected": -2.1638033390045166, |
|
"logps/chosen": -151.28504943847656, |
|
"logps/rejected": -148.6924285888672, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.030383877456188202, |
|
"rewards/margins": 0.2497837245464325, |
|
"rewards/rejected": -0.2801675796508789, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8907851681151396e-05, |
|
"logits/chosen": -1.9466733932495117, |
|
"logits/rejected": -2.0422286987304688, |
|
"logps/chosen": -147.58941650390625, |
|
"logps/rejected": -163.70912170410156, |
|
"loss": 0.7529, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.18631082773208618, |
|
"rewards/margins": -0.07437282055616379, |
|
"rewards/rejected": -0.1119379922747612, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.889735248260221e-05, |
|
"logits/chosen": -1.9457015991210938, |
|
"logits/rejected": -1.9883947372436523, |
|
"logps/chosen": -168.67608642578125, |
|
"logps/rejected": -175.51678466796875, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1923048198223114, |
|
"rewards/margins": 0.08353283256292343, |
|
"rewards/rejected": -0.27583765983581543, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8886804196841626e-05, |
|
"logits/chosen": -2.105576515197754, |
|
"logits/rejected": -2.0363874435424805, |
|
"logps/chosen": -154.8956756591797, |
|
"logps/rejected": -155.22312927246094, |
|
"loss": 0.7898, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08425075560808182, |
|
"rewards/margins": -0.13373690843582153, |
|
"rewards/rejected": 0.04948614165186882, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.887620684553674e-05, |
|
"logits/chosen": -2.1795973777770996, |
|
"logits/rejected": -2.2052454948425293, |
|
"logps/chosen": -132.4536895751953, |
|
"logps/rejected": -134.7356719970703, |
|
"loss": 0.8224, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.002498343586921692, |
|
"rewards/margins": -0.15511931478977203, |
|
"rewards/rejected": 0.15761765837669373, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.886556045045542e-05, |
|
"logits/chosen": -2.068981409072876, |
|
"logits/rejected": -2.0851194858551025, |
|
"logps/chosen": -146.88949584960938, |
|
"logps/rejected": -162.4698028564453, |
|
"loss": 0.7558, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.15774422883987427, |
|
"rewards/margins": -0.06368841230869293, |
|
"rewards/rejected": -0.09405580163002014, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8854865033466275e-05, |
|
"logits/chosen": -1.7522428035736084, |
|
"logits/rejected": -1.729603886604309, |
|
"logps/chosen": -217.58309936523438, |
|
"logps/rejected": -244.42282104492188, |
|
"loss": 0.7803, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.23889069259166718, |
|
"rewards/margins": -0.09420409798622131, |
|
"rewards/rejected": -0.14468660950660706, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.88441206165386e-05, |
|
"logits/chosen": -2.1225035190582275, |
|
"logits/rejected": -2.0949532985687256, |
|
"logps/chosen": -177.24856567382812, |
|
"logps/rejected": -175.7102813720703, |
|
"loss": 0.7095, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.12725457549095154, |
|
"rewards/margins": 0.04284512996673584, |
|
"rewards/rejected": -0.17009973526000977, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8833327221742356e-05, |
|
"logits/chosen": -2.0727334022521973, |
|
"logits/rejected": -2.134239435195923, |
|
"logps/chosen": -131.3041534423828, |
|
"logps/rejected": -136.77574157714844, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.037539076060056686, |
|
"rewards/margins": 0.08883160352706909, |
|
"rewards/rejected": -0.0512925386428833, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.88224848712481e-05, |
|
"logits/chosen": -1.953855276107788, |
|
"logits/rejected": -1.9414265155792236, |
|
"logps/chosen": -187.20455932617188, |
|
"logps/rejected": -181.2246551513672, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1832740604877472, |
|
"rewards/margins": 0.2624707818031311, |
|
"rewards/rejected": -0.07919671386480331, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.881159358732694e-05, |
|
"logits/chosen": -1.8854598999023438, |
|
"logits/rejected": -1.8628448247909546, |
|
"logps/chosen": -156.7054443359375, |
|
"logps/rejected": -158.3913116455078, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11108584702014923, |
|
"rewards/margins": 0.05867990851402283, |
|
"rewards/rejected": -0.16976574063301086, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8800653392350526e-05, |
|
"logits/chosen": -1.9753170013427734, |
|
"logits/rejected": -2.036269426345825, |
|
"logps/chosen": -162.7784423828125, |
|
"logps/rejected": -176.72535705566406, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21797724068164825, |
|
"rewards/margins": 0.17236095666885376, |
|
"rewards/rejected": -0.3903381824493408, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8789664308790936e-05, |
|
"logits/chosen": -1.8070260286331177, |
|
"logits/rejected": -1.8477225303649902, |
|
"logps/chosen": -221.89990234375, |
|
"logps/rejected": -199.6925048828125, |
|
"loss": 0.7534, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5042496919631958, |
|
"rewards/margins": 0.0008268915116786957, |
|
"rewards/rejected": -0.5050765872001648, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8778626359220715e-05, |
|
"logits/chosen": -1.9517831802368164, |
|
"logits/rejected": -1.9943265914916992, |
|
"logps/chosen": -157.50552368164062, |
|
"logps/rejected": -189.5386962890625, |
|
"loss": 0.7944, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.07389702647924423, |
|
"rewards/margins": -0.1338617205619812, |
|
"rewards/rejected": 0.059964705258607864, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8767539566312734e-05, |
|
"logits/chosen": -2.0427169799804688, |
|
"logits/rejected": -1.9722158908843994, |
|
"logps/chosen": -153.33877563476562, |
|
"logps/rejected": -152.97161865234375, |
|
"loss": 0.9077, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.18528270721435547, |
|
"rewards/margins": -0.3350525200366974, |
|
"rewards/rejected": 0.14976979792118073, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.875640395284023e-05, |
|
"logits/chosen": -2.2405035495758057, |
|
"logits/rejected": -2.180358648300171, |
|
"logps/chosen": -149.2640838623047, |
|
"logps/rejected": -144.21060180664062, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.029363110661506653, |
|
"rewards/margins": 0.07899504899978638, |
|
"rewards/rejected": -0.10835815221071243, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.874521954167671e-05, |
|
"logits/chosen": -2.171722888946533, |
|
"logits/rejected": -2.134547710418701, |
|
"logps/chosen": -142.7288818359375, |
|
"logps/rejected": -133.72573852539062, |
|
"loss": 0.7838, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.31318625807762146, |
|
"rewards/margins": -0.14739766716957092, |
|
"rewards/rejected": -0.16578857600688934, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8733986355795905e-05, |
|
"logits/chosen": -1.8786143064498901, |
|
"logits/rejected": -1.8554753065109253, |
|
"logps/chosen": -202.17709350585938, |
|
"logps/rejected": -205.63951110839844, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.08190246671438217, |
|
"rewards/margins": 0.2607104182243347, |
|
"rewards/rejected": -0.17880797386169434, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8722704418271745e-05, |
|
"logits/chosen": -2.1331138610839844, |
|
"logits/rejected": -2.1795654296875, |
|
"logps/chosen": -140.4710693359375, |
|
"logps/rejected": -143.41455078125, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11869597434997559, |
|
"rewards/margins": 0.0922723338007927, |
|
"rewards/rejected": -0.21096831560134888, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.871137375227829e-05, |
|
"logits/chosen": -1.9935777187347412, |
|
"logits/rejected": -1.9692142009735107, |
|
"logps/chosen": -297.49334716796875, |
|
"logps/rejected": -299.84173583984375, |
|
"loss": 0.7695, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12770779430866241, |
|
"rewards/margins": -0.043505311012268066, |
|
"rewards/rejected": -0.08420247584581375, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.869999438108971e-05, |
|
"logits/chosen": -2.244154691696167, |
|
"logits/rejected": -2.2609543800354004, |
|
"logps/chosen": -148.76113891601562, |
|
"logps/rejected": -137.73040771484375, |
|
"loss": 0.7324, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.16916662454605103, |
|
"rewards/margins": -0.04308079555630684, |
|
"rewards/rejected": -0.12608584761619568, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.8688566328080215e-05, |
|
"logits/chosen": -2.1407310962677, |
|
"logits/rejected": -2.119994640350342, |
|
"logps/chosen": -167.45089721679688, |
|
"logps/rejected": -159.93060302734375, |
|
"loss": 0.7649, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3510248363018036, |
|
"rewards/margins": -0.09942013025283813, |
|
"rewards/rejected": -0.25160470604896545, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.867708961672399e-05, |
|
"logits/chosen": -2.185452699661255, |
|
"logits/rejected": -2.1928515434265137, |
|
"logps/chosen": -186.91505432128906, |
|
"logps/rejected": -185.78733825683594, |
|
"loss": 0.7766, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.09309504181146622, |
|
"rewards/margins": -0.11240511387586594, |
|
"rewards/rejected": 0.01931007206439972, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.866556427059519e-05, |
|
"logits/chosen": -2.1376357078552246, |
|
"logits/rejected": -2.2185373306274414, |
|
"logps/chosen": -166.6249542236328, |
|
"logps/rejected": -159.41290283203125, |
|
"loss": 0.8817, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1689298152923584, |
|
"rewards/margins": -0.18555757403373718, |
|
"rewards/rejected": 0.016627788543701172, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.865399031336787e-05, |
|
"logits/chosen": -1.9547293186187744, |
|
"logits/rejected": -2.0602641105651855, |
|
"logps/chosen": -159.6194610595703, |
|
"logps/rejected": -187.00814819335938, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08318576216697693, |
|
"rewards/margins": 0.3107747733592987, |
|
"rewards/rejected": -0.22758902609348297, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.8642367768815936e-05, |
|
"logits/chosen": -2.290693998336792, |
|
"logits/rejected": -2.2612967491149902, |
|
"logps/chosen": -204.46893310546875, |
|
"logps/rejected": -196.72003173828125, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.20541870594024658, |
|
"rewards/margins": 0.22441859543323517, |
|
"rewards/rejected": -0.42983728647232056, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.863069666081307e-05, |
|
"logits/chosen": -2.1638312339782715, |
|
"logits/rejected": -2.109403133392334, |
|
"logps/chosen": -138.51168823242188, |
|
"logps/rejected": -137.55712890625, |
|
"loss": 0.7573, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.020162537693977356, |
|
"rewards/margins": -0.048720985651016235, |
|
"rewards/rejected": 0.028558451682329178, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.861897701333274e-05, |
|
"logits/chosen": -2.2542800903320312, |
|
"logits/rejected": -2.258378744125366, |
|
"logps/chosen": -134.4961700439453, |
|
"logps/rejected": -148.88816833496094, |
|
"loss": 0.7321, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.294142484664917, |
|
"rewards/margins": -0.008809719234704971, |
|
"rewards/rejected": -0.2853327691555023, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.86072088504481e-05, |
|
"logits/chosen": -2.152188539505005, |
|
"logits/rejected": -2.193638324737549, |
|
"logps/chosen": -138.4483184814453, |
|
"logps/rejected": -143.88937377929688, |
|
"loss": 0.7268, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.020661018788814545, |
|
"rewards/margins": 0.026195645332336426, |
|
"rewards/rejected": -0.00553460419178009, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.859539219633199e-05, |
|
"logits/chosen": -2.293026924133301, |
|
"logits/rejected": -2.278402805328369, |
|
"logps/chosen": -163.4123077392578, |
|
"logps/rejected": -180.30471801757812, |
|
"loss": 0.7216, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1497456282377243, |
|
"rewards/margins": -0.013850819319486618, |
|
"rewards/rejected": -0.1358948051929474, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.8583527075256804e-05, |
|
"logits/chosen": -2.0941126346588135, |
|
"logits/rejected": -2.131237745285034, |
|
"logps/chosen": -153.41043090820312, |
|
"logps/rejected": -145.23020935058594, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.00170879065990448, |
|
"rewards/margins": 0.09639380127191544, |
|
"rewards/rejected": -0.09810256958007812, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.857161351159454e-05, |
|
"logits/chosen": -1.8922992944717407, |
|
"logits/rejected": -1.7540662288665771, |
|
"logps/chosen": -142.38389587402344, |
|
"logps/rejected": -148.71514892578125, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02964305877685547, |
|
"rewards/margins": 0.07973094284534454, |
|
"rewards/rejected": -0.10937398672103882, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.8559651529816664e-05, |
|
"logits/chosen": -2.170006513595581, |
|
"logits/rejected": -2.202960252761841, |
|
"logps/chosen": -144.66648864746094, |
|
"logps/rejected": -136.111083984375, |
|
"loss": 0.7541, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.14397282898426056, |
|
"rewards/margins": -0.059857327491045, |
|
"rewards/rejected": -0.08411550521850586, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.854764115449411e-05, |
|
"logits/chosen": -2.098104238510132, |
|
"logits/rejected": -2.05627179145813, |
|
"logps/chosen": -138.97332763671875, |
|
"logps/rejected": -143.80482482910156, |
|
"loss": 0.7632, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13671807944774628, |
|
"rewards/margins": -0.11267251521348953, |
|
"rewards/rejected": -0.024045560508966446, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.853558241029723e-05, |
|
"logits/chosen": -2.045684337615967, |
|
"logits/rejected": -2.010531425476074, |
|
"logps/chosen": -158.4129180908203, |
|
"logps/rejected": -169.3193817138672, |
|
"loss": 0.7891, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13485963642597198, |
|
"rewards/margins": -0.10890495777130127, |
|
"rewards/rejected": -0.025954678654670715, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.8523475321995715e-05, |
|
"logits/chosen": -2.199786901473999, |
|
"logits/rejected": -2.0181658267974854, |
|
"logps/chosen": -189.12388610839844, |
|
"logps/rejected": -139.44427490234375, |
|
"loss": 0.7936, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.2502678632736206, |
|
"rewards/margins": -0.15687265992164612, |
|
"rewards/rejected": -0.09339523315429688, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.8511319914458555e-05, |
|
"logits/chosen": -2.1136443614959717, |
|
"logits/rejected": -2.1628942489624023, |
|
"logps/chosen": -153.02317810058594, |
|
"logps/rejected": -147.45071411132812, |
|
"loss": 0.7746, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.15984635055065155, |
|
"rewards/margins": -0.07732124626636505, |
|
"rewards/rejected": -0.0825251117348671, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.849911621265401e-05, |
|
"logits/chosen": -2.0306332111358643, |
|
"logits/rejected": -2.0360593795776367, |
|
"logps/chosen": -153.94784545898438, |
|
"logps/rejected": -162.53622436523438, |
|
"loss": 0.7162, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1451529562473297, |
|
"rewards/margins": 0.013205336406826973, |
|
"rewards/rejected": -0.15835829079151154, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.848686424164953e-05, |
|
"logits/chosen": -2.1170244216918945, |
|
"logits/rejected": -2.0833611488342285, |
|
"logps/chosen": -140.57786560058594, |
|
"logps/rejected": -133.22264099121094, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.15281759202480316, |
|
"rewards/margins": 0.23962977528572083, |
|
"rewards/rejected": -0.08681218326091766, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.84745640266117e-05, |
|
"logits/chosen": -2.1526424884796143, |
|
"logits/rejected": -2.1702723503112793, |
|
"logps/chosen": -152.15927124023438, |
|
"logps/rejected": -158.22642517089844, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.09753294289112091, |
|
"rewards/margins": 0.03131475672125816, |
|
"rewards/rejected": -0.12884768843650818, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.846221559280624e-05, |
|
"logits/chosen": -2.025162696838379, |
|
"logits/rejected": -2.069690704345703, |
|
"logps/chosen": -147.05039978027344, |
|
"logps/rejected": -162.94961547851562, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.15327642858028412, |
|
"rewards/margins": 0.11415009945631027, |
|
"rewards/rejected": 0.03912632539868355, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.844981896559787e-05, |
|
"logits/chosen": -2.208263635635376, |
|
"logits/rejected": -2.176539659500122, |
|
"logps/chosen": -212.233154296875, |
|
"logps/rejected": -209.1885223388672, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31684011220932007, |
|
"rewards/margins": 0.08533424139022827, |
|
"rewards/rejected": -0.40217435359954834, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8437374170450344e-05, |
|
"logits/chosen": -2.2429494857788086, |
|
"logits/rejected": -2.2173945903778076, |
|
"logps/chosen": -199.77957153320312, |
|
"logps/rejected": -156.09060668945312, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.056611090898513794, |
|
"rewards/margins": 0.11540088802576065, |
|
"rewards/rejected": -0.17201198637485504, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.842488123292632e-05, |
|
"logits/chosen": -2.088970422744751, |
|
"logits/rejected": -2.0711379051208496, |
|
"logps/chosen": -161.3831787109375, |
|
"logps/rejected": -153.05728149414062, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.25492504239082336, |
|
"rewards/margins": 0.13332128524780273, |
|
"rewards/rejected": 0.12160372734069824, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8412340178687374e-05, |
|
"logits/chosen": -2.0223379135131836, |
|
"logits/rejected": -2.0152769088745117, |
|
"logps/chosen": -131.5534210205078, |
|
"logps/rejected": -137.33941650390625, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1717926263809204, |
|
"rewards/margins": 0.12639762461185455, |
|
"rewards/rejected": -0.29819023609161377, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.839975103349391e-05, |
|
"logits/chosen": -2.299764633178711, |
|
"logits/rejected": -2.2954838275909424, |
|
"logps/chosen": -177.5594024658203, |
|
"logps/rejected": -162.41683959960938, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12795095145702362, |
|
"rewards/margins": 0.15119165182113647, |
|
"rewards/rejected": -0.2791425883769989, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8387113823205096e-05, |
|
"logits/chosen": -2.0409903526306152, |
|
"logits/rejected": -2.0102832317352295, |
|
"logps/chosen": -171.60342407226562, |
|
"logps/rejected": -162.114013671875, |
|
"loss": 0.7369, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04189925640821457, |
|
"rewards/margins": 0.03296327590942383, |
|
"rewards/rejected": -0.074862539768219, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8374428573778864e-05, |
|
"logits/chosen": -2.1743125915527344, |
|
"logits/rejected": -2.2494208812713623, |
|
"logps/chosen": -154.51788330078125, |
|
"logps/rejected": -161.42127990722656, |
|
"loss": 0.7557, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.04235090687870979, |
|
"rewards/margins": -0.0858534649014473, |
|
"rewards/rejected": 0.1282043755054474, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8361695311271795e-05, |
|
"logits/chosen": -1.9694074392318726, |
|
"logits/rejected": -1.9777883291244507, |
|
"logps/chosen": -172.72686767578125, |
|
"logps/rejected": -187.39076232910156, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23133322596549988, |
|
"rewards/margins": 0.19938969612121582, |
|
"rewards/rejected": -0.4307229220867157, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.83489140618391e-05, |
|
"logits/chosen": -2.125094175338745, |
|
"logits/rejected": -2.069443464279175, |
|
"logps/chosen": -177.20260620117188, |
|
"logps/rejected": -166.99986267089844, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.07136122137308121, |
|
"rewards/margins": 0.09348535537719727, |
|
"rewards/rejected": -0.02212415263056755, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.833608485173457e-05, |
|
"logits/chosen": -2.26041841506958, |
|
"logits/rejected": -2.3254926204681396, |
|
"logps/chosen": -142.5237274169922, |
|
"logps/rejected": -150.29586791992188, |
|
"loss": 0.7873, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.08850278705358505, |
|
"rewards/margins": -0.12702789902687073, |
|
"rewards/rejected": 0.03852510452270508, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8323207707310496e-05, |
|
"logits/chosen": -2.112971782684326, |
|
"logits/rejected": -2.1613588333129883, |
|
"logps/chosen": -170.9886932373047, |
|
"logps/rejected": -180.46363830566406, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.04462575912475586, |
|
"rewards/margins": 0.08605735003948212, |
|
"rewards/rejected": -0.13068309426307678, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.831028265501764e-05, |
|
"logits/chosen": -1.8144406080245972, |
|
"logits/rejected": -1.8706724643707275, |
|
"logps/chosen": -172.77764892578125, |
|
"logps/rejected": -187.60598754882812, |
|
"loss": 0.7243, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18522275984287262, |
|
"rewards/margins": 0.02290777862071991, |
|
"rewards/rejected": -0.20813053846359253, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.829730972140517e-05, |
|
"logits/chosen": -2.0918076038360596, |
|
"logits/rejected": -2.0828185081481934, |
|
"logps/chosen": -133.16778564453125, |
|
"logps/rejected": -139.5255889892578, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05682823061943054, |
|
"rewards/margins": 0.1471494734287262, |
|
"rewards/rejected": -0.09032122790813446, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8284288933120594e-05, |
|
"logits/chosen": -2.0551576614379883, |
|
"logits/rejected": -2.0017828941345215, |
|
"logps/chosen": -173.2173309326172, |
|
"logps/rejected": -187.19105529785156, |
|
"loss": 0.6964, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13149763643741608, |
|
"rewards/margins": 0.09333821386098862, |
|
"rewards/rejected": -0.2248358577489853, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8271220316909735e-05, |
|
"logits/chosen": -2.1928532123565674, |
|
"logits/rejected": -2.184141159057617, |
|
"logps/chosen": -200.8461456298828, |
|
"logps/rejected": -204.56504821777344, |
|
"loss": 0.7748, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.022504782304167747, |
|
"rewards/margins": -0.08254070580005646, |
|
"rewards/rejected": 0.06003589183092117, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.825810389961666e-05, |
|
"logits/chosen": -2.254242181777954, |
|
"logits/rejected": -2.261145830154419, |
|
"logps/chosen": -160.71571350097656, |
|
"logps/rejected": -142.36376953125, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.024668443948030472, |
|
"rewards/margins": 0.13744822144508362, |
|
"rewards/rejected": -0.16211667656898499, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8244939708183596e-05, |
|
"logits/chosen": -2.1350438594818115, |
|
"logits/rejected": -2.1070897579193115, |
|
"logps/chosen": -160.7988739013672, |
|
"logps/rejected": -157.40176391601562, |
|
"loss": 0.7235, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01498755719512701, |
|
"rewards/margins": -0.015977520495653152, |
|
"rewards/rejected": 0.03096509724855423, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.823172776965094e-05, |
|
"logits/chosen": -2.2463290691375732, |
|
"logits/rejected": -2.2258195877075195, |
|
"logps/chosen": -136.88392639160156, |
|
"logps/rejected": -128.96087646484375, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.055554769933223724, |
|
"rewards/margins": 0.06438815593719482, |
|
"rewards/rejected": -0.11994291841983795, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.821846811115713e-05, |
|
"logits/chosen": -1.9229159355163574, |
|
"logits/rejected": -1.839210867881775, |
|
"logps/chosen": -173.51405334472656, |
|
"logps/rejected": -153.61341857910156, |
|
"loss": 0.7675, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.1366410106420517, |
|
"rewards/margins": -0.06360301375389099, |
|
"rewards/rejected": 0.20024403929710388, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.820516075993865e-05, |
|
"logits/chosen": -2.0157222747802734, |
|
"logits/rejected": -2.0540072917938232, |
|
"logps/chosen": -141.3604278564453, |
|
"logps/rejected": -146.56663513183594, |
|
"loss": 0.7431, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.15194594860076904, |
|
"rewards/margins": 0.032469067722558975, |
|
"rewards/rejected": -0.1844150424003601, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.819180574332994e-05, |
|
"logits/chosen": -2.1887617111206055, |
|
"logits/rejected": -2.1101431846618652, |
|
"logps/chosen": -164.18893432617188, |
|
"logps/rejected": -160.06475830078125, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.02116868458688259, |
|
"rewards/margins": 0.028199315071105957, |
|
"rewards/rejected": -0.0493679977953434, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8178403088763355e-05, |
|
"logits/chosen": -2.2744359970092773, |
|
"logits/rejected": -2.291210651397705, |
|
"logps/chosen": -168.8428955078125, |
|
"logps/rejected": -171.43356323242188, |
|
"loss": 0.8289, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.059717368334531784, |
|
"rewards/margins": -0.18386751413345337, |
|
"rewards/rejected": 0.12415014207363129, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8164952823769085e-05, |
|
"logits/chosen": -1.8740739822387695, |
|
"logits/rejected": -1.8469749689102173, |
|
"logps/chosen": -135.71493530273438, |
|
"logps/rejected": -149.68069458007812, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.029844455420970917, |
|
"rewards/margins": 0.11711962521076202, |
|
"rewards/rejected": -0.14696410298347473, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.815145497597514e-05, |
|
"logits/chosen": -2.102431297302246, |
|
"logits/rejected": -2.032944917678833, |
|
"logps/chosen": -173.45106506347656, |
|
"logps/rejected": -159.73878479003906, |
|
"loss": 0.9183, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.022377396002411842, |
|
"rewards/margins": -0.20098015666007996, |
|
"rewards/rejected": 0.22335757315158844, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8137909573107246e-05, |
|
"logits/chosen": -2.3416833877563477, |
|
"logits/rejected": -2.3538575172424316, |
|
"logps/chosen": -167.69911193847656, |
|
"logps/rejected": -157.8156280517578, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.004629455506801605, |
|
"rewards/margins": 0.09061210602521896, |
|
"rewards/rejected": -0.08598263561725616, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.812431664298883e-05, |
|
"logits/chosen": -2.17607045173645, |
|
"logits/rejected": -2.1720361709594727, |
|
"logps/chosen": -166.7891845703125, |
|
"logps/rejected": -166.55804443359375, |
|
"loss": 0.7444, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.178691565990448, |
|
"rewards/margins": -0.029032886028289795, |
|
"rewards/rejected": -0.1496586799621582, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.811067621354094e-05, |
|
"logits/chosen": -2.0844247341156006, |
|
"logits/rejected": -2.150310516357422, |
|
"logps/chosen": -160.33216857910156, |
|
"logps/rejected": -183.65805053710938, |
|
"loss": 0.738, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.019554704427719116, |
|
"rewards/margins": -0.00408715195953846, |
|
"rewards/rejected": -0.015467546880245209, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"logits/chosen": -2.148946762084961, |
|
"logits/rejected": -2.091033458709717, |
|
"logps/chosen": -163.8697052001953, |
|
"logps/rejected": -177.90403747558594, |
|
"loss": 0.7519, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.020494531840085983, |
|
"rewards/margins": -0.09285805374383926, |
|
"rewards/rejected": 0.11335259675979614, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8083252968828665e-05, |
|
"logits/chosen": -2.034929037094116, |
|
"logits/rejected": -2.0719597339630127, |
|
"logps/chosen": -154.4242401123047, |
|
"logps/rejected": -157.4151611328125, |
|
"loss": 0.7685, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.11203078925609589, |
|
"rewards/margins": -0.1181333065032959, |
|
"rewards/rejected": 0.006102517247200012, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8069470209893974e-05, |
|
"logits/chosen": -2.1990954875946045, |
|
"logits/rejected": -2.1845507621765137, |
|
"logps/chosen": -167.10202026367188, |
|
"logps/rejected": -173.89694213867188, |
|
"loss": 0.7338, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.048778437077999115, |
|
"rewards/margins": -0.013265417888760567, |
|
"rewards/rejected": -0.0355130136013031, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8055640064289086e-05, |
|
"logits/chosen": -2.0750880241394043, |
|
"logits/rejected": -2.0607268810272217, |
|
"logps/chosen": -145.49095153808594, |
|
"logps/rejected": -146.54730224609375, |
|
"loss": 0.7064, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.12515582144260406, |
|
"rewards/margins": 0.004622337408363819, |
|
"rewards/rejected": -0.1297781616449356, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.80417625604223e-05, |
|
"logits/chosen": -2.065021276473999, |
|
"logits/rejected": -1.985723853111267, |
|
"logps/chosen": -168.58168029785156, |
|
"logps/rejected": -154.1855010986328, |
|
"loss": 0.7135, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.2825557291507721, |
|
"rewards/margins": -0.0009878575801849365, |
|
"rewards/rejected": 0.28354358673095703, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.8027837726799205e-05, |
|
"logits/chosen": -2.0231850147247314, |
|
"logits/rejected": -2.0219273567199707, |
|
"logps/chosen": -177.43508911132812, |
|
"logps/rejected": -180.77137756347656, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.008570343255996704, |
|
"rewards/margins": 0.08320408314466476, |
|
"rewards/rejected": -0.07463373243808746, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.801386559202259e-05, |
|
"logits/chosen": -2.098315477371216, |
|
"logits/rejected": -2.0270769596099854, |
|
"logps/chosen": -174.91799926757812, |
|
"logps/rejected": -150.36114501953125, |
|
"loss": 0.7361, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.12160081416368484, |
|
"rewards/margins": 0.009557720273733139, |
|
"rewards/rejected": 0.112043097615242, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.799984618479242e-05, |
|
"logits/chosen": -1.9548274278640747, |
|
"logits/rejected": -1.9769983291625977, |
|
"logps/chosen": -142.3773193359375, |
|
"logps/rejected": -138.9167938232422, |
|
"loss": 0.8095, |
|
"rewards/accuracies": 0.125, |
|
"rewards/chosen": -0.1921553909778595, |
|
"rewards/margins": -0.17860561609268188, |
|
"rewards/rejected": -0.01354978233575821, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.798577953390577e-05, |
|
"logits/chosen": -2.0205070972442627, |
|
"logits/rejected": -2.027376413345337, |
|
"logps/chosen": -195.26025390625, |
|
"logps/rejected": -219.80625915527344, |
|
"loss": 0.7086, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.27936846017837524, |
|
"rewards/margins": 0.01577301323413849, |
|
"rewards/rejected": -0.29514145851135254, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.797166566825675e-05, |
|
"logits/chosen": -2.071674346923828, |
|
"logits/rejected": -2.0323328971862793, |
|
"logps/chosen": -151.1609344482422, |
|
"logps/rejected": -169.61581420898438, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.14004096388816833, |
|
"rewards/margins": 0.09525477886199951, |
|
"rewards/rejected": 0.04478616267442703, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.795750461683644e-05, |
|
"logits/chosen": -2.1325645446777344, |
|
"logits/rejected": -2.059990882873535, |
|
"logps/chosen": -154.6941680908203, |
|
"logps/rejected": -157.33233642578125, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07734552025794983, |
|
"rewards/margins": 0.0484672486782074, |
|
"rewards/rejected": 0.028878264129161835, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.794329640873285e-05, |
|
"logits/chosen": -2.094820737838745, |
|
"logits/rejected": -2.146373987197876, |
|
"logps/chosen": -145.1197509765625, |
|
"logps/rejected": -144.53829956054688, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.18119190633296967, |
|
"rewards/margins": 0.4195486307144165, |
|
"rewards/rejected": -0.23835672438144684, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7929041073130867e-05, |
|
"logits/chosen": -2.0487372875213623, |
|
"logits/rejected": -1.9535651206970215, |
|
"logps/chosen": -146.88800048828125, |
|
"logps/rejected": -146.015380859375, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05707988515496254, |
|
"rewards/margins": 0.39438337087631226, |
|
"rewards/rejected": -0.33730348944664, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7914738639312165e-05, |
|
"logits/chosen": -1.9662736654281616, |
|
"logits/rejected": -2.0507540702819824, |
|
"logps/chosen": -159.54833984375, |
|
"logps/rejected": -181.677978515625, |
|
"loss": 0.8444, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1504884660243988, |
|
"rewards/margins": -0.18733000755310059, |
|
"rewards/rejected": 0.036841537803411484, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.790038913665519e-05, |
|
"logits/chosen": -1.8254969120025635, |
|
"logits/rejected": -1.8466860055923462, |
|
"logps/chosen": -121.59519958496094, |
|
"logps/rejected": -131.51698303222656, |
|
"loss": 0.7607, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.16594046354293823, |
|
"rewards/margins": -0.08683046698570251, |
|
"rewards/rejected": 0.25277090072631836, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.788599259463502e-05, |
|
"logits/chosen": -2.0490996837615967, |
|
"logits/rejected": -2.088806390762329, |
|
"logps/chosen": -144.5177459716797, |
|
"logps/rejected": -151.15444946289062, |
|
"loss": 0.7034, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.16910819709300995, |
|
"rewards/margins": 0.07333025336265564, |
|
"rewards/rejected": 0.09577794373035431, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.787154904282341e-05, |
|
"logits/chosen": -2.2665562629699707, |
|
"logits/rejected": -2.2620351314544678, |
|
"logps/chosen": -133.22760009765625, |
|
"logps/rejected": -147.43667602539062, |
|
"loss": 0.7657, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18194647133350372, |
|
"rewards/margins": -0.09383340179920197, |
|
"rewards/rejected": -0.08811306953430176, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7857058510888645e-05, |
|
"logits/chosen": -2.2096052169799805, |
|
"logits/rejected": -2.216580867767334, |
|
"logps/chosen": -155.67662048339844, |
|
"logps/rejected": -169.5216064453125, |
|
"loss": 0.7002, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0033222604542970657, |
|
"rewards/margins": 0.02931857667863369, |
|
"rewards/rejected": -0.02599630132317543, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7842521028595526e-05, |
|
"logits/chosen": -2.1179494857788086, |
|
"logits/rejected": -2.172715187072754, |
|
"logps/chosen": -139.75497436523438, |
|
"logps/rejected": -149.2156982421875, |
|
"loss": 0.7134, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.20826546847820282, |
|
"rewards/margins": 0.03352481871843338, |
|
"rewards/rejected": -0.2417902648448944, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7827936625805284e-05, |
|
"logits/chosen": -2.1453120708465576, |
|
"logits/rejected": -2.2276957035064697, |
|
"logps/chosen": -160.8773651123047, |
|
"logps/rejected": -182.9378204345703, |
|
"loss": 0.715, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.20616355538368225, |
|
"rewards/margins": 0.011941194534301758, |
|
"rewards/rejected": -0.21810473501682281, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7813305332475535e-05, |
|
"logits/chosen": -1.5362883806228638, |
|
"logits/rejected": -1.4986250400543213, |
|
"logps/chosen": -256.8396301269531, |
|
"logps/rejected": -269.427001953125, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.024325193837285042, |
|
"rewards/margins": 0.11790871620178223, |
|
"rewards/rejected": -0.14223390817642212, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.77986271786602e-05, |
|
"logits/chosen": -2.1201870441436768, |
|
"logits/rejected": -2.150663137435913, |
|
"logps/chosen": -144.35189819335938, |
|
"logps/rejected": -152.51849365234375, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.19814497232437134, |
|
"rewards/margins": 0.2016732394695282, |
|
"rewards/rejected": -0.39981821179389954, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.778390219450949e-05, |
|
"logits/chosen": -1.9574625492095947, |
|
"logits/rejected": -1.9414085149765015, |
|
"logps/chosen": -206.0396728515625, |
|
"logps/rejected": -230.67552185058594, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.0024340637028217316, |
|
"rewards/margins": 0.2473735809326172, |
|
"rewards/rejected": -0.24493952095508575, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.776913041026976e-05, |
|
"logits/chosen": -2.2131030559539795, |
|
"logits/rejected": -2.1374146938323975, |
|
"logps/chosen": -157.73483276367188, |
|
"logps/rejected": -147.059326171875, |
|
"loss": 0.6997, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1409614086151123, |
|
"rewards/margins": 0.018417831510305405, |
|
"rewards/rejected": -0.15937921404838562, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.775431185628353e-05, |
|
"logits/chosen": -1.8067915439605713, |
|
"logits/rejected": -1.7414895296096802, |
|
"logps/chosen": -192.5210418701172, |
|
"logps/rejected": -151.17227172851562, |
|
"loss": 0.8156, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.41330477595329285, |
|
"rewards/margins": -0.11488999426364899, |
|
"rewards/rejected": -0.29841476678848267, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7739446562989384e-05, |
|
"logits/chosen": -2.2105062007904053, |
|
"logits/rejected": -2.2545359134674072, |
|
"logps/chosen": -171.10385131835938, |
|
"logps/rejected": -176.2754364013672, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5258097648620605, |
|
"rewards/margins": 0.18083709478378296, |
|
"rewards/rejected": -0.7066469192504883, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.772453456092191e-05, |
|
"logits/chosen": -2.2050392627716064, |
|
"logits/rejected": -2.1882925033569336, |
|
"logps/chosen": -181.8564453125, |
|
"logps/rejected": -180.79759216308594, |
|
"loss": 0.7227, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.48925238847732544, |
|
"rewards/margins": -0.004873424768447876, |
|
"rewards/rejected": -0.4843789339065552, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7709575880711634e-05, |
|
"logits/chosen": -2.2256252765655518, |
|
"logits/rejected": -2.2812557220458984, |
|
"logps/chosen": -113.94125366210938, |
|
"logps/rejected": -123.2302474975586, |
|
"loss": 0.7755, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.23328812420368195, |
|
"rewards/margins": -0.1003262847661972, |
|
"rewards/rejected": -0.13296185433864594, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.769457055308497e-05, |
|
"logits/chosen": -2.215670108795166, |
|
"logits/rejected": -2.2150087356567383, |
|
"logps/chosen": -166.19100952148438, |
|
"logps/rejected": -174.61366271972656, |
|
"loss": 0.7388, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3156086504459381, |
|
"rewards/margins": -0.022609539330005646, |
|
"rewards/rejected": -0.29299911856651306, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.767951860886415e-05, |
|
"logits/chosen": -1.9413666725158691, |
|
"logits/rejected": -2.0344936847686768, |
|
"logps/chosen": -140.91476440429688, |
|
"logps/rejected": -164.15542602539062, |
|
"loss": 0.7518, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.37001854181289673, |
|
"rewards/margins": -0.07773025333881378, |
|
"rewards/rejected": -0.29228830337524414, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.766442007896715e-05, |
|
"logits/chosen": -2.185791254043579, |
|
"logits/rejected": -2.158402681350708, |
|
"logps/chosen": -174.4066925048828, |
|
"logps/rejected": -174.22225952148438, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.426675409078598, |
|
"rewards/margins": 0.14762084186077118, |
|
"rewards/rejected": -0.5742962956428528, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.764927499440767e-05, |
|
"logits/chosen": -1.982418417930603, |
|
"logits/rejected": -1.9350054264068604, |
|
"logps/chosen": -178.34976196289062, |
|
"logps/rejected": -180.20494079589844, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3919566869735718, |
|
"rewards/margins": 0.19768588244915009, |
|
"rewards/rejected": -0.5896425247192383, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.763408338629498e-05, |
|
"logits/chosen": -2.2213985919952393, |
|
"logits/rejected": -2.2709920406341553, |
|
"logps/chosen": -143.23550415039062, |
|
"logps/rejected": -147.50332641601562, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20936697721481323, |
|
"rewards/margins": 0.1896706074476242, |
|
"rewards/rejected": -0.39903756976127625, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.761884528583396e-05, |
|
"logits/chosen": -2.2714898586273193, |
|
"logits/rejected": -2.1939728260040283, |
|
"logps/chosen": -182.6309356689453, |
|
"logps/rejected": -193.048583984375, |
|
"loss": 0.7323, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4604118764400482, |
|
"rewards/margins": 0.03731643036007881, |
|
"rewards/rejected": -0.49772825837135315, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.760356072432498e-05, |
|
"logits/chosen": -2.160900115966797, |
|
"logits/rejected": -2.0733065605163574, |
|
"logps/chosen": -184.3874053955078, |
|
"logps/rejected": -181.5076904296875, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.27709901332855225, |
|
"rewards/margins": 0.06253170967102051, |
|
"rewards/rejected": -0.33963072299957275, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.7588229733163834e-05, |
|
"logits/chosen": -1.7874306440353394, |
|
"logits/rejected": -1.8620976209640503, |
|
"logps/chosen": -207.32083129882812, |
|
"logps/rejected": -205.32540893554688, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5504447817802429, |
|
"rewards/margins": 0.07099004089832306, |
|
"rewards/rejected": -0.6214348077774048, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.757285234384169e-05, |
|
"logits/chosen": -2.2311081886291504, |
|
"logits/rejected": -2.1636242866516113, |
|
"logps/chosen": -165.6267852783203, |
|
"logps/rejected": -168.37962341308594, |
|
"loss": 0.7484, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8211495280265808, |
|
"rewards/margins": -0.0740201398730278, |
|
"rewards/rejected": -0.7471294403076172, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.755742858794503e-05, |
|
"logits/chosen": -2.26678729057312, |
|
"logits/rejected": -2.2344679832458496, |
|
"logps/chosen": -157.74423217773438, |
|
"logps/rejected": -152.1922149658203, |
|
"loss": 0.7029, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.18589326739311218, |
|
"rewards/margins": 0.01804957166314125, |
|
"rewards/rejected": -0.20394286513328552, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.754195849715557e-05, |
|
"logits/chosen": -2.143720865249634, |
|
"logits/rejected": -2.176121711730957, |
|
"logps/chosen": -171.01498413085938, |
|
"logps/rejected": -174.25668334960938, |
|
"loss": 0.7835, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.34361717104911804, |
|
"rewards/margins": -0.003480616956949234, |
|
"rewards/rejected": -0.3401365578174591, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.75264421032502e-05, |
|
"logits/chosen": -2.2774815559387207, |
|
"logits/rejected": -2.2418551445007324, |
|
"logps/chosen": -183.46746826171875, |
|
"logps/rejected": -172.81829833984375, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.40849414467811584, |
|
"rewards/margins": 0.09351891279220581, |
|
"rewards/rejected": -0.5020129680633545, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.751087943810093e-05, |
|
"logits/chosen": -1.9202462434768677, |
|
"logits/rejected": -1.9200413227081299, |
|
"logps/chosen": -178.16827392578125, |
|
"logps/rejected": -163.49049377441406, |
|
"loss": 0.7876, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6583839058876038, |
|
"rewards/margins": -0.0725487470626831, |
|
"rewards/rejected": -0.5858351588249207, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.749527053367481e-05, |
|
"logits/chosen": -2.3440024852752686, |
|
"logits/rejected": -2.306640386581421, |
|
"logps/chosen": -204.37374877929688, |
|
"logps/rejected": -196.37139892578125, |
|
"loss": 0.7817, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3725470006465912, |
|
"rewards/margins": -0.10262566059827805, |
|
"rewards/rejected": -0.26992136240005493, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.747961542203386e-05, |
|
"logits/chosen": -2.206644296646118, |
|
"logits/rejected": -2.216529607772827, |
|
"logps/chosen": -145.55783081054688, |
|
"logps/rejected": -149.01528930664062, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.29852521419525146, |
|
"rewards/margins": 0.12249046564102173, |
|
"rewards/rejected": -0.42101573944091797, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.746391413533503e-05, |
|
"logits/chosen": -1.9940528869628906, |
|
"logits/rejected": -1.959697961807251, |
|
"logps/chosen": -162.80947875976562, |
|
"logps/rejected": -181.39344787597656, |
|
"loss": 0.6996, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5685576796531677, |
|
"rewards/margins": 0.06293636560440063, |
|
"rewards/rejected": -0.6314940452575684, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.74481667058301e-05, |
|
"logits/chosen": -2.064054250717163, |
|
"logits/rejected": -2.1215896606445312, |
|
"logps/chosen": -154.61865234375, |
|
"logps/rejected": -155.92892456054688, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4008215069770813, |
|
"rewards/margins": 0.12303955852985382, |
|
"rewards/rejected": -0.5238610506057739, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.743237316586564e-05, |
|
"logits/chosen": -2.2184903621673584, |
|
"logits/rejected": -2.3070037364959717, |
|
"logps/chosen": -149.61441040039062, |
|
"logps/rejected": -151.4566650390625, |
|
"loss": 0.7559, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3738159239292145, |
|
"rewards/margins": -0.02443253993988037, |
|
"rewards/rejected": -0.3493833541870117, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.741653354788295e-05, |
|
"logits/chosen": -2.168128252029419, |
|
"logits/rejected": -2.1225526332855225, |
|
"logps/chosen": -160.09083557128906, |
|
"logps/rejected": -169.74386596679688, |
|
"loss": 0.7091, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5666425228118896, |
|
"rewards/margins": 0.06661619246006012, |
|
"rewards/rejected": -0.6332587003707886, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.7400647884417956e-05, |
|
"logits/chosen": -1.9577604532241821, |
|
"logits/rejected": -1.9241951704025269, |
|
"logps/chosen": -207.59442138671875, |
|
"logps/rejected": -217.36038208007812, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.37186557054519653, |
|
"rewards/margins": 0.10641665011644363, |
|
"rewards/rejected": -0.47828227281570435, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.7384716208101166e-05, |
|
"logits/chosen": -2.064387559890747, |
|
"logits/rejected": -2.022218942642212, |
|
"logps/chosen": -189.0228271484375, |
|
"logps/rejected": -185.25689697265625, |
|
"loss": 0.6958, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4377724528312683, |
|
"rewards/margins": 0.09676932543516159, |
|
"rewards/rejected": -0.5345417857170105, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.736873855165762e-05, |
|
"logits/chosen": -2.2576003074645996, |
|
"logits/rejected": -2.309368848800659, |
|
"logps/chosen": -180.17724609375, |
|
"logps/rejected": -186.60797119140625, |
|
"loss": 0.7273, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.19769221544265747, |
|
"rewards/margins": 0.021774642169475555, |
|
"rewards/rejected": -0.21946686506271362, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.735271494790678e-05, |
|
"logits/chosen": -2.0674479007720947, |
|
"logits/rejected": -2.043943166732788, |
|
"logps/chosen": -189.95338439941406, |
|
"logps/rejected": -188.52682495117188, |
|
"loss": 0.8214, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.30729472637176514, |
|
"rewards/margins": -0.16690713167190552, |
|
"rewards/rejected": -0.14038759469985962, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.733664542976253e-05, |
|
"logits/chosen": -2.2818522453308105, |
|
"logits/rejected": -2.243852138519287, |
|
"logps/chosen": -139.22390747070312, |
|
"logps/rejected": -138.7141876220703, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1333819478750229, |
|
"rewards/margins": 0.24985717236995697, |
|
"rewards/rejected": -0.38323909044265747, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.732053003023301e-05, |
|
"logits/chosen": -2.3318722248077393, |
|
"logits/rejected": -2.331906795501709, |
|
"logps/chosen": -170.11148071289062, |
|
"logps/rejected": -154.28684997558594, |
|
"loss": 0.8989, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.4540347456932068, |
|
"rewards/margins": -0.20150336623191833, |
|
"rewards/rejected": -0.25253134965896606, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.730436878242064e-05, |
|
"logits/chosen": -2.0526726245880127, |
|
"logits/rejected": -2.094531536102295, |
|
"logps/chosen": -148.9898681640625, |
|
"logps/rejected": -150.51864624023438, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.34212395548820496, |
|
"rewards/margins": 0.1897253841161728, |
|
"rewards/rejected": -0.5318493247032166, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7288161719522016e-05, |
|
"logits/chosen": -2.161623954772949, |
|
"logits/rejected": -2.1211910247802734, |
|
"logps/chosen": -151.83523559570312, |
|
"logps/rejected": -140.38848876953125, |
|
"loss": 0.7175, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.545448899269104, |
|
"rewards/margins": 0.03246283903717995, |
|
"rewards/rejected": -0.5779117345809937, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.727190887482783e-05, |
|
"logits/chosen": -2.1172046661376953, |
|
"logits/rejected": -2.05790376663208, |
|
"logps/chosen": -141.56634521484375, |
|
"logps/rejected": -152.59849548339844, |
|
"loss": 0.7776, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.29310688376426697, |
|
"rewards/margins": 0.007600661367177963, |
|
"rewards/rejected": -0.30070751905441284, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.725561028172282e-05, |
|
"logits/chosen": -2.2756776809692383, |
|
"logits/rejected": -2.261565685272217, |
|
"logps/chosen": -121.21385955810547, |
|
"logps/rejected": -120.08661651611328, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.21489115059375763, |
|
"rewards/margins": 0.12781137228012085, |
|
"rewards/rejected": -0.3427025377750397, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7239265973685696e-05, |
|
"logits/chosen": -2.1066219806671143, |
|
"logits/rejected": -2.201887845993042, |
|
"logps/chosen": -129.29530334472656, |
|
"logps/rejected": -147.51333618164062, |
|
"loss": 0.7801, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6134562492370605, |
|
"rewards/margins": -0.030932441353797913, |
|
"rewards/rejected": -0.5825238227844238, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.722287598428907e-05, |
|
"logits/chosen": -2.0897793769836426, |
|
"logits/rejected": -2.091312885284424, |
|
"logps/chosen": -150.09129333496094, |
|
"logps/rejected": -157.86312866210938, |
|
"loss": 0.7574, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5798418521881104, |
|
"rewards/margins": -0.0995514839887619, |
|
"rewards/rejected": -0.48029035329818726, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.720644034719938e-05, |
|
"logits/chosen": -2.1701114177703857, |
|
"logits/rejected": -2.149559259414673, |
|
"logps/chosen": -178.9230499267578, |
|
"logps/rejected": -163.4266815185547, |
|
"loss": 0.7959, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5811534523963928, |
|
"rewards/margins": -0.08480577170848846, |
|
"rewards/rejected": -0.49634772539138794, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7189959096176825e-05, |
|
"logits/chosen": -2.404374837875366, |
|
"logits/rejected": -2.407968521118164, |
|
"logps/chosen": -149.89866638183594, |
|
"logps/rejected": -149.10494995117188, |
|
"loss": 0.7978, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.4539230465888977, |
|
"rewards/margins": -0.16405440866947174, |
|
"rewards/rejected": -0.2898685932159424, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7173432265075334e-05, |
|
"logits/chosen": -2.3623311519622803, |
|
"logits/rejected": -2.344980239868164, |
|
"logps/chosen": -233.85638427734375, |
|
"logps/rejected": -233.85716247558594, |
|
"loss": 0.7299, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4674505591392517, |
|
"rewards/margins": 0.010146500542759895, |
|
"rewards/rejected": -0.47759705781936646, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7156859887842416e-05, |
|
"logits/chosen": -2.444455623626709, |
|
"logits/rejected": -2.4147419929504395, |
|
"logps/chosen": -157.481201171875, |
|
"logps/rejected": -155.94912719726562, |
|
"loss": 0.8272, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4809521436691284, |
|
"rewards/margins": -0.1707230508327484, |
|
"rewards/rejected": -0.3102290630340576, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.714024199851915e-05, |
|
"logits/chosen": -2.13613224029541, |
|
"logits/rejected": -2.11737060546875, |
|
"logps/chosen": -196.46791076660156, |
|
"logps/rejected": -206.36705017089844, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.41040414571762085, |
|
"rewards/margins": 0.20290464162826538, |
|
"rewards/rejected": -0.613308846950531, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.712357863124013e-05, |
|
"logits/chosen": -2.302917003631592, |
|
"logits/rejected": -2.30071759223938, |
|
"logps/chosen": -153.2189483642578, |
|
"logps/rejected": -148.35671997070312, |
|
"loss": 0.8639, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5300933122634888, |
|
"rewards/margins": -0.23618030548095703, |
|
"rewards/rejected": -0.29391294717788696, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.710686982023332e-05, |
|
"logits/chosen": -2.1037135124206543, |
|
"logits/rejected": -2.084882974624634, |
|
"logps/chosen": -171.69317626953125, |
|
"logps/rejected": -184.76455688476562, |
|
"loss": 0.8317, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7027316093444824, |
|
"rewards/margins": -0.20845447480678558, |
|
"rewards/rejected": -0.49427708983421326, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.709011559982006e-05, |
|
"logits/chosen": -2.315164566040039, |
|
"logits/rejected": -2.35860538482666, |
|
"logps/chosen": -142.0930938720703, |
|
"logps/rejected": -144.05953979492188, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2587888836860657, |
|
"rewards/margins": 0.43140506744384766, |
|
"rewards/rejected": -0.6901938915252686, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.707331600441495e-05, |
|
"logits/chosen": -2.025707483291626, |
|
"logits/rejected": -2.1452269554138184, |
|
"logps/chosen": -135.1129608154297, |
|
"logps/rejected": -180.877685546875, |
|
"loss": 0.7269, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.48439598083496094, |
|
"rewards/margins": 0.12395285069942474, |
|
"rewards/rejected": -0.6083488464355469, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.705647106852581e-05, |
|
"logits/chosen": -2.1537985801696777, |
|
"logits/rejected": -2.1138670444488525, |
|
"logps/chosen": -161.801025390625, |
|
"logps/rejected": -152.15338134765625, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5599794983863831, |
|
"rewards/margins": 0.14886192977428436, |
|
"rewards/rejected": -0.7088414430618286, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7039580826753564e-05, |
|
"logits/chosen": -1.9758260250091553, |
|
"logits/rejected": -2.015881299972534, |
|
"logps/chosen": -184.20703125, |
|
"logps/rejected": -191.9273681640625, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.572418749332428, |
|
"rewards/margins": 0.2520812749862671, |
|
"rewards/rejected": -0.8244999647140503, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7022645313792235e-05, |
|
"logits/chosen": -2.2235710620880127, |
|
"logits/rejected": -2.2551536560058594, |
|
"logps/chosen": -152.44741821289062, |
|
"logps/rejected": -160.27525329589844, |
|
"loss": 0.792, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.53495192527771, |
|
"rewards/margins": -0.10131815075874329, |
|
"rewards/rejected": -0.4336337447166443, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.700566456442882e-05, |
|
"logits/chosen": -2.0654869079589844, |
|
"logits/rejected": -2.1601076126098633, |
|
"logps/chosen": -129.15142822265625, |
|
"logps/rejected": -140.05062866210938, |
|
"loss": 0.7985, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.4286402463912964, |
|
"rewards/margins": -0.08634166419506073, |
|
"rewards/rejected": -0.3422985374927521, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.6988638613543216e-05, |
|
"logits/chosen": -2.106463670730591, |
|
"logits/rejected": -2.1899609565734863, |
|
"logps/chosen": -207.76812744140625, |
|
"logps/rejected": -217.55172729492188, |
|
"loss": 0.7031, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.49319323897361755, |
|
"rewards/margins": 0.05841163173317909, |
|
"rewards/rejected": -0.5516048669815063, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.6971567496108206e-05, |
|
"logits/chosen": -2.287020683288574, |
|
"logits/rejected": -2.302515745162964, |
|
"logps/chosen": -146.24276733398438, |
|
"logps/rejected": -157.62596130371094, |
|
"loss": 0.7239, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.35381338000297546, |
|
"rewards/margins": -0.004093277268111706, |
|
"rewards/rejected": -0.3497200906276703, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.695445124718931e-05, |
|
"logits/chosen": -2.316082000732422, |
|
"logits/rejected": -2.2804858684539795, |
|
"logps/chosen": -172.0008087158203, |
|
"logps/rejected": -177.33901977539062, |
|
"loss": 0.7188, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4350382685661316, |
|
"rewards/margins": -0.01686153933405876, |
|
"rewards/rejected": -0.41817668080329895, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.693728990194479e-05, |
|
"logits/chosen": -2.356797695159912, |
|
"logits/rejected": -2.373162269592285, |
|
"logps/chosen": -187.43911743164062, |
|
"logps/rejected": -201.7132568359375, |
|
"loss": 0.6999, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3604944050312042, |
|
"rewards/margins": 0.06350287795066833, |
|
"rewards/rejected": -0.42399728298187256, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.692008349562551e-05, |
|
"logits/chosen": -2.3371644020080566, |
|
"logits/rejected": -2.244069814682007, |
|
"logps/chosen": -157.56373596191406, |
|
"logps/rejected": -158.41690063476562, |
|
"loss": 0.7745, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7513232827186584, |
|
"rewards/margins": -0.11284235119819641, |
|
"rewards/rejected": -0.6384809613227844, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.690283206357491e-05, |
|
"logits/chosen": -1.9031703472137451, |
|
"logits/rejected": -1.8866480588912964, |
|
"logps/chosen": -137.91941833496094, |
|
"logps/rejected": -149.28627014160156, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.44801104068756104, |
|
"rewards/margins": 0.23882272839546204, |
|
"rewards/rejected": -0.6868337392807007, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.6885535641228904e-05, |
|
"logits/chosen": -2.1832618713378906, |
|
"logits/rejected": -2.121121406555176, |
|
"logps/chosen": -158.70449829101562, |
|
"logps/rejected": -139.44195556640625, |
|
"loss": 0.7216, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.542538046836853, |
|
"rewards/margins": -0.03218982741236687, |
|
"rewards/rejected": -0.5103481411933899, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.6868194264115833e-05, |
|
"logits/chosen": -2.2379095554351807, |
|
"logits/rejected": -2.124793291091919, |
|
"logps/chosen": -171.65277099609375, |
|
"logps/rejected": -158.50015258789062, |
|
"loss": 0.7441, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5649640560150146, |
|
"rewards/margins": -0.034692008048295975, |
|
"rewards/rejected": -0.5302720665931702, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.685080796785637e-05, |
|
"logits/chosen": -2.2778732776641846, |
|
"logits/rejected": -2.2594337463378906, |
|
"logps/chosen": -151.48411560058594, |
|
"logps/rejected": -149.89930725097656, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.48167526721954346, |
|
"rewards/margins": 0.17855703830718994, |
|
"rewards/rejected": -0.6602323651313782, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.683337678816345e-05, |
|
"logits/chosen": -2.148597478866577, |
|
"logits/rejected": -2.1484172344207764, |
|
"logps/chosen": -143.36769104003906, |
|
"logps/rejected": -158.61419677734375, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5770382881164551, |
|
"rewards/margins": 0.10789009928703308, |
|
"rewards/rejected": -0.6849284172058105, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.6815900760842236e-05, |
|
"logits/chosen": -2.4137473106384277, |
|
"logits/rejected": -2.4021241664886475, |
|
"logps/chosen": -154.04502868652344, |
|
"logps/rejected": -157.95596313476562, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4364089369773865, |
|
"rewards/margins": 0.2789525091648102, |
|
"rewards/rejected": -0.715361475944519, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.679837992178996e-05, |
|
"logits/chosen": -2.3135428428649902, |
|
"logits/rejected": -2.2878835201263428, |
|
"logps/chosen": -181.33885192871094, |
|
"logps/rejected": -168.310546875, |
|
"loss": 0.8835, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7887184619903564, |
|
"rewards/margins": -0.2950814366340637, |
|
"rewards/rejected": -0.4936370253562927, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.678081430699594e-05, |
|
"logits/chosen": -2.263319492340088, |
|
"logits/rejected": -2.260213613510132, |
|
"logps/chosen": -159.97354125976562, |
|
"logps/rejected": -173.87554931640625, |
|
"loss": 0.7764, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6154264211654663, |
|
"rewards/margins": 0.04768490046262741, |
|
"rewards/rejected": -0.6631112694740295, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.676320395254146e-05, |
|
"logits/chosen": -2.1689982414245605, |
|
"logits/rejected": -2.0917975902557373, |
|
"logps/chosen": -199.61489868164062, |
|
"logps/rejected": -203.17724609375, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5080626606941223, |
|
"rewards/margins": 0.14702026546001434, |
|
"rewards/rejected": -0.6550828218460083, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.674554889459968e-05, |
|
"logits/chosen": -2.080754518508911, |
|
"logits/rejected": -2.030247449874878, |
|
"logps/chosen": -142.29052734375, |
|
"logps/rejected": -165.71142578125, |
|
"loss": 0.7408, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3904760479927063, |
|
"rewards/margins": 0.15203596651554108, |
|
"rewards/rejected": -0.542512059211731, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.672784916943562e-05, |
|
"logits/chosen": -1.9848957061767578, |
|
"logits/rejected": -1.9429978132247925, |
|
"logps/chosen": -186.40225219726562, |
|
"logps/rejected": -179.22129821777344, |
|
"loss": 0.7617, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8649554252624512, |
|
"rewards/margins": -0.06034373492002487, |
|
"rewards/rejected": -0.8046116828918457, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.6710104813406034e-05, |
|
"logits/chosen": -2.0926647186279297, |
|
"logits/rejected": -2.121751070022583, |
|
"logps/chosen": -170.80023193359375, |
|
"logps/rejected": -175.02835083007812, |
|
"loss": 0.7499, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.61098712682724, |
|
"rewards/margins": 0.014241974800825119, |
|
"rewards/rejected": -0.6252290606498718, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.669231586295934e-05, |
|
"logits/chosen": -1.9556676149368286, |
|
"logits/rejected": -1.9728052616119385, |
|
"logps/chosen": -213.68606567382812, |
|
"logps/rejected": -174.6243896484375, |
|
"loss": 0.8219, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6639366149902344, |
|
"rewards/margins": -0.16740846633911133, |
|
"rewards/rejected": -0.49652814865112305, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.667448235463557e-05, |
|
"logits/chosen": -2.060239553451538, |
|
"logits/rejected": -2.061102867126465, |
|
"logps/chosen": -137.50906372070312, |
|
"logps/rejected": -128.2222137451172, |
|
"loss": 0.818, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.2569347321987152, |
|
"rewards/margins": -0.15142616629600525, |
|
"rewards/rejected": -0.10550854355096817, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.665660432506629e-05, |
|
"logits/chosen": -2.1114845275878906, |
|
"logits/rejected": -2.12737774848938, |
|
"logps/chosen": -169.5026397705078, |
|
"logps/rejected": -141.35501098632812, |
|
"loss": 0.7919, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.30224573612213135, |
|
"rewards/margins": -0.09046731889247894, |
|
"rewards/rejected": -0.2117784172296524, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.6638681810974496e-05, |
|
"logits/chosen": -2.0212507247924805, |
|
"logits/rejected": -2.1288084983825684, |
|
"logps/chosen": -147.18545532226562, |
|
"logps/rejected": -166.57452392578125, |
|
"loss": 0.7137, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.31484949588775635, |
|
"rewards/margins": 0.05684095248579979, |
|
"rewards/rejected": -0.37169045209884644, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.6620714849174576e-05, |
|
"logits/chosen": -2.2373719215393066, |
|
"logits/rejected": -2.2259066104888916, |
|
"logps/chosen": -221.53643798828125, |
|
"logps/rejected": -209.2789306640625, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18473269045352936, |
|
"rewards/margins": 0.11960664391517639, |
|
"rewards/rejected": -0.30433934926986694, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.660270347657219e-05, |
|
"logits/chosen": -2.235313892364502, |
|
"logits/rejected": -2.207859992980957, |
|
"logps/chosen": -145.51107788085938, |
|
"logps/rejected": -132.99411010742188, |
|
"loss": 0.8067, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4317854642868042, |
|
"rewards/margins": -0.16359470784664154, |
|
"rewards/rejected": -0.26819074153900146, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.658464773016428e-05, |
|
"logits/chosen": -2.1742892265319824, |
|
"logits/rejected": -2.266177177429199, |
|
"logps/chosen": -142.66419982910156, |
|
"logps/rejected": -159.65928649902344, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.25551578402519226, |
|
"rewards/margins": 0.11744444072246552, |
|
"rewards/rejected": -0.3729602098464966, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6566547647038864e-05, |
|
"logits/chosen": -2.1665616035461426, |
|
"logits/rejected": -2.1410114765167236, |
|
"logps/chosen": -172.94570922851562, |
|
"logps/rejected": -167.9188995361328, |
|
"loss": 0.7223, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.34934595227241516, |
|
"rewards/margins": 0.025624670088291168, |
|
"rewards/rejected": -0.37497058510780334, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6548403264375074e-05, |
|
"logits/chosen": -2.055755615234375, |
|
"logits/rejected": -2.020624876022339, |
|
"logps/chosen": -150.6020965576172, |
|
"logps/rejected": -185.5800323486328, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5740702748298645, |
|
"rewards/margins": 0.09851770102977753, |
|
"rewards/rejected": -0.6725879907608032, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6530214619443037e-05, |
|
"logits/chosen": -2.0328664779663086, |
|
"logits/rejected": -2.0425925254821777, |
|
"logps/chosen": -167.3757781982422, |
|
"logps/rejected": -173.4803466796875, |
|
"loss": 0.715, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4150983989238739, |
|
"rewards/margins": 0.005986988544464111, |
|
"rewards/rejected": -0.421085387468338, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6511981749603775e-05, |
|
"logits/chosen": -1.9052200317382812, |
|
"logits/rejected": -1.998968243598938, |
|
"logps/chosen": -123.46515655517578, |
|
"logps/rejected": -141.20614624023438, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.01827160082757473, |
|
"rewards/margins": 0.1650063842535019, |
|
"rewards/rejected": -0.14673477411270142, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6493704692309175e-05, |
|
"logits/chosen": -2.1721608638763428, |
|
"logits/rejected": -2.181016445159912, |
|
"logps/chosen": -169.97547912597656, |
|
"logps/rejected": -174.20501708984375, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.34234559535980225, |
|
"rewards/margins": 0.039180606603622437, |
|
"rewards/rejected": -0.3815262019634247, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.647538348510189e-05, |
|
"logits/chosen": -2.042267084121704, |
|
"logits/rejected": -2.0465734004974365, |
|
"logps/chosen": -141.28273010253906, |
|
"logps/rejected": -137.6270294189453, |
|
"loss": 0.7715, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.45353037118911743, |
|
"rewards/margins": -0.10618551820516586, |
|
"rewards/rejected": -0.34734484553337097, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.645701816561523e-05, |
|
"logits/chosen": -2.080418825149536, |
|
"logits/rejected": -2.0591931343078613, |
|
"logps/chosen": -156.5729217529297, |
|
"logps/rejected": -160.69155883789062, |
|
"loss": 0.8914, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.3119348883628845, |
|
"rewards/margins": -0.33930426836013794, |
|
"rewards/rejected": 0.027369357645511627, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.643860877157314e-05, |
|
"logits/chosen": -1.639522671699524, |
|
"logits/rejected": -1.6502141952514648, |
|
"logps/chosen": -178.24334716796875, |
|
"logps/rejected": -169.12493896484375, |
|
"loss": 0.7049, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.44444751739501953, |
|
"rewards/margins": 0.014572631567716599, |
|
"rewards/rejected": -0.45902013778686523, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.642015534079012e-05, |
|
"logits/chosen": -2.047992467880249, |
|
"logits/rejected": -2.0363173484802246, |
|
"logps/chosen": -134.27391052246094, |
|
"logps/rejected": -123.03015899658203, |
|
"loss": 0.7044, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.21198835968971252, |
|
"rewards/margins": 0.025383614003658295, |
|
"rewards/rejected": -0.23737198114395142, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.640165791117106e-05, |
|
"logits/chosen": -2.0618531703948975, |
|
"logits/rejected": -2.0514118671417236, |
|
"logps/chosen": -152.69686889648438, |
|
"logps/rejected": -135.8447265625, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04251031577587128, |
|
"rewards/margins": 0.09286715090274811, |
|
"rewards/rejected": -0.13537748157978058, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.63831165207113e-05, |
|
"logits/chosen": -2.015552282333374, |
|
"logits/rejected": -1.9658578634262085, |
|
"logps/chosen": -143.47520446777344, |
|
"logps/rejected": -146.58863830566406, |
|
"loss": 0.8025, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.14033322036266327, |
|
"rewards/margins": -0.1578764021396637, |
|
"rewards/rejected": 0.01754317432641983, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.6364531207496426e-05, |
|
"logits/chosen": -2.1612448692321777, |
|
"logits/rejected": -2.099830389022827, |
|
"logps/chosen": -161.23220825195312, |
|
"logps/rejected": -143.65408325195312, |
|
"loss": 0.7213, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.29882240295410156, |
|
"rewards/margins": -0.008122054859995842, |
|
"rewards/rejected": -0.2907003164291382, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.634590200970227e-05, |
|
"logits/chosen": -1.8700077533721924, |
|
"logits/rejected": -1.9340232610702515, |
|
"logps/chosen": -160.71038818359375, |
|
"logps/rejected": -151.01939392089844, |
|
"loss": 0.7197, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3910747170448303, |
|
"rewards/margins": 0.08369296789169312, |
|
"rewards/rejected": -0.47476768493652344, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.632722896559481e-05, |
|
"logits/chosen": -2.1195974349975586, |
|
"logits/rejected": -2.1336312294006348, |
|
"logps/chosen": -151.23971557617188, |
|
"logps/rejected": -160.65672302246094, |
|
"loss": 0.78, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.08127432316541672, |
|
"rewards/margins": -0.11872512847185135, |
|
"rewards/rejected": 0.19999945163726807, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.630851211353007e-05, |
|
"logits/chosen": -2.0612592697143555, |
|
"logits/rejected": -2.0779013633728027, |
|
"logps/chosen": -131.81069946289062, |
|
"logps/rejected": -129.8345489501953, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03394623100757599, |
|
"rewards/margins": 0.08058298379182816, |
|
"rewards/rejected": -0.11452920734882355, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.628975149195407e-05, |
|
"logits/chosen": -1.9617186784744263, |
|
"logits/rejected": -1.9468779563903809, |
|
"logps/chosen": -130.2001495361328, |
|
"logps/rejected": -125.31912994384766, |
|
"loss": 0.8841, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.41198205947875977, |
|
"rewards/margins": -0.24151001870632172, |
|
"rewards/rejected": -0.17047205567359924, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6270947139402744e-05, |
|
"logits/chosen": -2.0373647212982178, |
|
"logits/rejected": -2.0272598266601562, |
|
"logps/chosen": -161.21817016601562, |
|
"logps/rejected": -160.80905151367188, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23811860382556915, |
|
"rewards/margins": 0.3550880253314972, |
|
"rewards/rejected": -0.5932066440582275, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6252099094501834e-05, |
|
"logits/chosen": -2.263495922088623, |
|
"logits/rejected": -2.220659017562866, |
|
"logps/chosen": -150.18930053710938, |
|
"logps/rejected": -147.73287963867188, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.039855338633060455, |
|
"rewards/margins": 0.12497195601463318, |
|
"rewards/rejected": -0.16482731699943542, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.623320739596685e-05, |
|
"logits/chosen": -2.1021156311035156, |
|
"logits/rejected": -2.120116949081421, |
|
"logps/chosen": -153.40098571777344, |
|
"logps/rejected": -157.4463653564453, |
|
"loss": 0.7669, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09802110493183136, |
|
"rewards/margins": -0.008432537317276001, |
|
"rewards/rejected": -0.08958857506513596, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.621427208260296e-05, |
|
"logits/chosen": -1.9562160968780518, |
|
"logits/rejected": -2.006755828857422, |
|
"logps/chosen": -165.9380340576172, |
|
"logps/rejected": -173.2657928466797, |
|
"loss": 0.7826, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6031227111816406, |
|
"rewards/margins": -0.10150633007287979, |
|
"rewards/rejected": -0.501616358757019, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6195293193304915e-05, |
|
"logits/chosen": -1.9626574516296387, |
|
"logits/rejected": -1.9933884143829346, |
|
"logps/chosen": -187.50564575195312, |
|
"logps/rejected": -184.9365234375, |
|
"loss": 0.7809, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.05860882252454758, |
|
"rewards/margins": -0.11062480509281158, |
|
"rewards/rejected": 0.05201598256826401, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6176270767056976e-05, |
|
"logits/chosen": -2.0054826736450195, |
|
"logits/rejected": -2.002007484436035, |
|
"logps/chosen": -172.8408966064453, |
|
"logps/rejected": -165.1022491455078, |
|
"loss": 0.7222, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1988084763288498, |
|
"rewards/margins": -0.013871286064386368, |
|
"rewards/rejected": -0.18493719398975372, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.615720484293286e-05, |
|
"logits/chosen": -2.0947346687316895, |
|
"logits/rejected": -2.138517379760742, |
|
"logps/chosen": -148.93450927734375, |
|
"logps/rejected": -144.9428253173828, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12516680359840393, |
|
"rewards/margins": 0.15067778527736664, |
|
"rewards/rejected": -0.2758445739746094, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.613809546009558e-05, |
|
"logits/chosen": -2.115553140640259, |
|
"logits/rejected": -2.149724006652832, |
|
"logps/chosen": -153.2530059814453, |
|
"logps/rejected": -160.34811401367188, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14684545993804932, |
|
"rewards/margins": 0.21833759546279907, |
|
"rewards/rejected": -0.365183025598526, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.611894265779748e-05, |
|
"logits/chosen": -2.068711280822754, |
|
"logits/rejected": -2.126939058303833, |
|
"logps/chosen": -139.7198486328125, |
|
"logps/rejected": -151.35397338867188, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08141559362411499, |
|
"rewards/margins": 0.1623344123363495, |
|
"rewards/rejected": -0.24375002086162567, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.609974647538003e-05, |
|
"logits/chosen": -2.225661516189575, |
|
"logits/rejected": -2.150395393371582, |
|
"logps/chosen": -132.438232421875, |
|
"logps/rejected": -128.7378692626953, |
|
"loss": 0.7277, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.009231429547071457, |
|
"rewards/margins": -0.0193635243922472, |
|
"rewards/rejected": 0.02859494648873806, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.608050695227385e-05, |
|
"logits/chosen": -2.0139918327331543, |
|
"logits/rejected": -2.0279946327209473, |
|
"logps/chosen": -163.32350158691406, |
|
"logps/rejected": -148.53485107421875, |
|
"loss": 0.7111, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.04781512916088104, |
|
"rewards/margins": 0.12081319838762283, |
|
"rewards/rejected": -0.16862833499908447, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.606122412799857e-05, |
|
"logits/chosen": -2.3113937377929688, |
|
"logits/rejected": -2.31269907951355, |
|
"logps/chosen": -147.7089080810547, |
|
"logps/rejected": -147.32814025878906, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2833206355571747, |
|
"rewards/margins": 0.048162560909986496, |
|
"rewards/rejected": -0.3314831852912903, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.6041898042162764e-05, |
|
"logits/chosen": -2.3534443378448486, |
|
"logits/rejected": -2.3750576972961426, |
|
"logps/chosen": -179.6420440673828, |
|
"logps/rejected": -186.58526611328125, |
|
"loss": 0.8517, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.15785303711891174, |
|
"rewards/margins": -0.20781296491622925, |
|
"rewards/rejected": 0.049959927797317505, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.602252873446386e-05, |
|
"logits/chosen": -1.7936934232711792, |
|
"logits/rejected": -1.7282207012176514, |
|
"logps/chosen": -171.0880584716797, |
|
"logps/rejected": -180.4699249267578, |
|
"loss": 0.7747, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.37305474281311035, |
|
"rewards/margins": 0.08523893356323242, |
|
"rewards/rejected": -0.45829373598098755, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.60031162446881e-05, |
|
"logits/chosen": -2.154116153717041, |
|
"logits/rejected": -2.1634771823883057, |
|
"logps/chosen": -142.67947387695312, |
|
"logps/rejected": -141.00306701660156, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04277772828936577, |
|
"rewards/margins": 0.16890141367912292, |
|
"rewards/rejected": -0.12612366676330566, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|