|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982851866508377, |
|
"eval_steps": 400, |
|
"global_step": 473, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00211053950666139, |
|
"grad_norm": 5.643460436957748, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -1.5622574090957642, |
|
"logits/rejected": -2.016603946685791, |
|
"logps/chosen": -279.929443359375, |
|
"logps/rejected": -249.6509552001953, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010552697533306952, |
|
"grad_norm": 4.760670706167096, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -1.6410560607910156, |
|
"logits/rejected": -1.8854162693023682, |
|
"logps/chosen": -306.70123291015625, |
|
"logps/rejected": -286.2392883300781, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0017719048773869872, |
|
"rewards/margins": -0.0009851222857832909, |
|
"rewards/rejected": -0.0007867825916036963, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021105395066613904, |
|
"grad_norm": 4.237628799563217, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.4761555194854736, |
|
"logits/rejected": -1.7796385288238525, |
|
"logps/chosen": -290.88739013671875, |
|
"logps/rejected": -265.3614196777344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.001178879290819168, |
|
"rewards/margins": 0.0011062298435717821, |
|
"rewards/rejected": -0.00228510913439095, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031658092599920855, |
|
"grad_norm": 5.575235759782868, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.5923292636871338, |
|
"logits/rejected": -1.9355911016464233, |
|
"logps/chosen": -293.08807373046875, |
|
"logps/rejected": -261.4955139160156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -8.010577585082501e-05, |
|
"rewards/margins": -0.0003216963086742908, |
|
"rewards/rejected": 0.00024159046006388962, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04221079013322781, |
|
"grad_norm": 5.446047742742675, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.5667310953140259, |
|
"logits/rejected": -2.014115810394287, |
|
"logps/chosen": -273.8595275878906, |
|
"logps/rejected": -235.01364135742188, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0028162619564682245, |
|
"rewards/margins": 0.0022979697678238153, |
|
"rewards/rejected": 0.0005182920140214264, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052763487666534756, |
|
"grad_norm": 5.373521803666822, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -1.6274404525756836, |
|
"logits/rejected": -1.875451683998108, |
|
"logps/chosen": -279.4980163574219, |
|
"logps/rejected": -255.500244140625, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.011563817039132118, |
|
"rewards/margins": 0.003255133982747793, |
|
"rewards/rejected": 0.008308682590723038, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06331618519984171, |
|
"grad_norm": 6.225478720213553, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.3949791193008423, |
|
"logits/rejected": -1.7053276300430298, |
|
"logps/chosen": -295.1358337402344, |
|
"logps/rejected": -266.3870849609375, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02798023819923401, |
|
"rewards/margins": 0.00888301245868206, |
|
"rewards/rejected": 0.0190972238779068, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07386888273314866, |
|
"grad_norm": 5.237638647535282, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -1.6196448802947998, |
|
"logits/rejected": -1.9479618072509766, |
|
"logps/chosen": -296.2655029296875, |
|
"logps/rejected": -268.84454345703125, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.05069383978843689, |
|
"rewards/margins": 0.019313272088766098, |
|
"rewards/rejected": 0.03138056769967079, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08442158026645562, |
|
"grad_norm": 4.876669460762773, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.5888957977294922, |
|
"logits/rejected": -1.862489938735962, |
|
"logps/chosen": -298.8040466308594, |
|
"logps/rejected": -281.5601501464844, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.07323700189590454, |
|
"rewards/margins": 0.02310621738433838, |
|
"rewards/rejected": 0.05013079196214676, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09497427779976256, |
|
"grad_norm": 4.155679871417378, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.6918067932128906, |
|
"logits/rejected": -2.00124454498291, |
|
"logps/chosen": -278.1552734375, |
|
"logps/rejected": -257.6329345703125, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06014139577746391, |
|
"rewards/margins": 0.035790883004665375, |
|
"rewards/rejected": 0.024350514635443687, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10552697533306951, |
|
"grad_norm": 4.856245709560566, |
|
"learning_rate": 4.999726797933858e-07, |
|
"logits/chosen": -1.7646106481552124, |
|
"logits/rejected": -1.9858261346817017, |
|
"logps/chosen": -278.0591125488281, |
|
"logps/rejected": -259.8578186035156, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03362422436475754, |
|
"rewards/margins": 0.0627993568778038, |
|
"rewards/rejected": -0.029175132513046265, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11607967286637647, |
|
"grad_norm": 5.091156338266151, |
|
"learning_rate": 4.99665396039775e-07, |
|
"logits/chosen": -1.7584812641143799, |
|
"logits/rejected": -2.0758414268493652, |
|
"logps/chosen": -275.55548095703125, |
|
"logps/rejected": -267.1745300292969, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.016241051256656647, |
|
"rewards/margins": 0.09382958710193634, |
|
"rewards/rejected": -0.11007064580917358, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12663237039968342, |
|
"grad_norm": 7.261447388148616, |
|
"learning_rate": 4.99017099386437e-07, |
|
"logits/chosen": -1.8382816314697266, |
|
"logits/rejected": -2.1405653953552246, |
|
"logps/chosen": -281.54827880859375, |
|
"logps/rejected": -263.27294921875, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11135590076446533, |
|
"rewards/margins": 0.09165789932012558, |
|
"rewards/rejected": -0.20301377773284912, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13718506793299037, |
|
"grad_norm": 5.4212641641464705, |
|
"learning_rate": 4.980286753286194e-07, |
|
"logits/chosen": -1.8312028646469116, |
|
"logits/rejected": -2.171030282974243, |
|
"logps/chosen": -287.714599609375, |
|
"logps/rejected": -269.2692565917969, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1507767140865326, |
|
"rewards/margins": 0.07915514707565308, |
|
"rewards/rejected": -0.22993186116218567, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14773776546629733, |
|
"grad_norm": 5.914411454935479, |
|
"learning_rate": 4.967014739346915e-07, |
|
"logits/chosen": -1.7997725009918213, |
|
"logits/rejected": -2.1724910736083984, |
|
"logps/chosen": -314.2709045410156, |
|
"logps/rejected": -288.6246337890625, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.12115994840860367, |
|
"rewards/margins": 0.11401550471782684, |
|
"rewards/rejected": -0.23517544567584991, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15829046299960428, |
|
"grad_norm": 6.712529318578845, |
|
"learning_rate": 4.950373080021136e-07, |
|
"logits/chosen": -1.7687238454818726, |
|
"logits/rejected": -2.1885459423065186, |
|
"logps/chosen": -325.1200256347656, |
|
"logps/rejected": -298.8721008300781, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15646745264530182, |
|
"rewards/margins": 0.1352117955684662, |
|
"rewards/rejected": -0.2916792631149292, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16884316053291124, |
|
"grad_norm": 11.136062496010506, |
|
"learning_rate": 4.930384505813737e-07, |
|
"logits/chosen": -1.8805389404296875, |
|
"logits/rejected": -2.243736982345581, |
|
"logps/chosen": -311.088134765625, |
|
"logps/rejected": -294.6890563964844, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.28947150707244873, |
|
"rewards/margins": 0.14183922111988068, |
|
"rewards/rejected": -0.4313107430934906, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1793958580662182, |
|
"grad_norm": 9.114819712911686, |
|
"learning_rate": 4.907076318712738e-07, |
|
"logits/chosen": -1.823948621749878, |
|
"logits/rejected": -2.289140462875366, |
|
"logps/chosen": -310.9908752441406, |
|
"logps/rejected": -296.40277099609375, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3205372095108032, |
|
"rewards/margins": 0.17761529982089996, |
|
"rewards/rejected": -0.49815255403518677, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18994855559952512, |
|
"grad_norm": 6.643890919509507, |
|
"learning_rate": 4.88048035489807e-07, |
|
"logits/chosen": -1.9663282632827759, |
|
"logits/rejected": -2.3473124504089355, |
|
"logps/chosen": -305.6136779785156, |
|
"logps/rejected": -289.89471435546875, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.31997618079185486, |
|
"rewards/margins": 0.13845226168632507, |
|
"rewards/rejected": -0.4584284722805023, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20050125313283207, |
|
"grad_norm": 8.184472249085363, |
|
"learning_rate": 4.85063294125718e-07, |
|
"logits/chosen": -1.9098714590072632, |
|
"logits/rejected": -2.2084691524505615, |
|
"logps/chosen": -316.2163391113281, |
|
"logps/rejected": -309.3311462402344, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3992615342140198, |
|
"rewards/margins": 0.18614216148853302, |
|
"rewards/rejected": -0.5854036211967468, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21105395066613902, |
|
"grad_norm": 8.142734346752789, |
|
"learning_rate": 4.817574845766874e-07, |
|
"logits/chosen": -2.0933427810668945, |
|
"logits/rejected": -2.4379730224609375, |
|
"logps/chosen": -331.0671691894531, |
|
"logps/rejected": -329.8666076660156, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5073726773262024, |
|
"rewards/margins": 0.2520057260990143, |
|
"rewards/rejected": -0.7593784332275391, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22160664819944598, |
|
"grad_norm": 7.833427916953237, |
|
"learning_rate": 4.781351221809166e-07, |
|
"logits/chosen": -2.1106457710266113, |
|
"logits/rejected": -2.3915274143218994, |
|
"logps/chosen": -346.46337890625, |
|
"logps/rejected": -337.7541198730469, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5495078563690186, |
|
"rewards/margins": 0.17171132564544678, |
|
"rewards/rejected": -0.7212191820144653, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23215934573275293, |
|
"grad_norm": 7.733643763374119, |
|
"learning_rate": 4.742011546497182e-07, |
|
"logits/chosen": -1.9331356287002563, |
|
"logits/rejected": -2.2653117179870605, |
|
"logps/chosen": -344.3125, |
|
"logps/rejected": -331.16632080078125, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.4401538372039795, |
|
"rewards/margins": 0.23562327027320862, |
|
"rewards/rejected": -0.6757770776748657, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24271204326605988, |
|
"grad_norm": 9.878124133877995, |
|
"learning_rate": 4.6996095530953875e-07, |
|
"logits/chosen": -2.1782004833221436, |
|
"logits/rejected": -2.4763283729553223, |
|
"logps/chosen": -324.13983154296875, |
|
"logps/rejected": -319.3317565917969, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5510643720626831, |
|
"rewards/margins": 0.20593421161174774, |
|
"rewards/rejected": -0.7569986581802368, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25326474079936684, |
|
"grad_norm": 10.19310620269605, |
|
"learning_rate": 4.654203157626399e-07, |
|
"logits/chosen": -1.9924976825714111, |
|
"logits/rejected": -2.3102524280548096, |
|
"logps/chosen": -377.19183349609375, |
|
"logps/rejected": -375.12823486328125, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7560365796089172, |
|
"rewards/margins": 0.3029988408088684, |
|
"rewards/rejected": -1.0590355396270752, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2638174383326738, |
|
"grad_norm": 11.001033426114137, |
|
"learning_rate": 4.605854379764673e-07, |
|
"logits/chosen": -2.199047565460205, |
|
"logits/rejected": -2.5438296794891357, |
|
"logps/chosen": -374.6500549316406, |
|
"logps/rejected": -363.5611572265625, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9937089681625366, |
|
"rewards/margins": 0.28307586908340454, |
|
"rewards/rejected": -1.2767850160598755, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27437013586598075, |
|
"grad_norm": 12.37698546731958, |
|
"learning_rate": 4.5546292581250857e-07, |
|
"logits/chosen": -2.1308746337890625, |
|
"logits/rejected": -2.4864110946655273, |
|
"logps/chosen": -400.5669860839844, |
|
"logps/rejected": -397.34454345703125, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2083604335784912, |
|
"rewards/margins": 0.26188138127326965, |
|
"rewards/rejected": -1.4702417850494385, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2849228333992877, |
|
"grad_norm": 11.055505495866356, |
|
"learning_rate": 4.5005977600621275e-07, |
|
"logits/chosen": -2.0843119621276855, |
|
"logits/rejected": -2.539513111114502, |
|
"logps/chosen": -385.6966857910156, |
|
"logps/rejected": -379.7543029785156, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9928609728813171, |
|
"rewards/margins": 0.3268323540687561, |
|
"rewards/rejected": -1.3196933269500732, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29547553093259465, |
|
"grad_norm": 15.017423788636298, |
|
"learning_rate": 4.443833686102919e-07, |
|
"logits/chosen": -2.218951940536499, |
|
"logits/rejected": -2.4617791175842285, |
|
"logps/chosen": -422.04803466796875, |
|
"logps/rejected": -423.21685791015625, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2504339218139648, |
|
"rewards/margins": 0.3247791528701782, |
|
"rewards/rejected": -1.575213074684143, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3060282284659016, |
|
"grad_norm": 13.104101042453381, |
|
"learning_rate": 4.384414569144561e-07, |
|
"logits/chosen": -2.239192485809326, |
|
"logits/rejected": -2.4994874000549316, |
|
"logps/chosen": -423.623046875, |
|
"logps/rejected": -425.60546875, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5029816627502441, |
|
"rewards/margins": 0.3298465609550476, |
|
"rewards/rejected": -1.832828164100647, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31658092599920856, |
|
"grad_norm": 9.51654888826691, |
|
"learning_rate": 4.3224215685535287e-07, |
|
"logits/chosen": -2.0407261848449707, |
|
"logits/rejected": -2.337188720703125, |
|
"logps/chosen": -426.2940979003906, |
|
"logps/rejected": -424.5220642089844, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3242921829223633, |
|
"rewards/margins": 0.31421297788619995, |
|
"rewards/rejected": -1.638505220413208, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3271336235325155, |
|
"grad_norm": 11.744189119101899, |
|
"learning_rate": 4.2579393593117364e-07, |
|
"logits/chosen": -2.0881667137145996, |
|
"logits/rejected": -2.4598240852355957, |
|
"logps/chosen": -373.4230041503906, |
|
"logps/rejected": -372.4430847167969, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0188493728637695, |
|
"rewards/margins": 0.29292336106300354, |
|
"rewards/rejected": -1.3117727041244507, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33768632106582247, |
|
"grad_norm": 11.411495536339306, |
|
"learning_rate": 4.191056016360699e-07, |
|
"logits/chosen": -2.1164355278015137, |
|
"logits/rejected": -2.3749523162841797, |
|
"logps/chosen": -452.0877380371094, |
|
"logps/rejected": -475.936767578125, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5110199451446533, |
|
"rewards/margins": 0.45130714774131775, |
|
"rewards/rejected": -1.962327241897583, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3482390185991294, |
|
"grad_norm": 13.015188201271227, |
|
"learning_rate": 4.121862894301754e-07, |
|
"logits/chosen": -2.0862815380096436, |
|
"logits/rejected": -2.4722859859466553, |
|
"logps/chosen": -415.59368896484375, |
|
"logps/rejected": -414.7337951660156, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2593889236450195, |
|
"rewards/margins": 0.3909255266189575, |
|
"rewards/rejected": -1.6503145694732666, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3587917161324364, |
|
"grad_norm": 15.106508196254897, |
|
"learning_rate": 4.050454502616667e-07, |
|
"logits/chosen": -2.120917797088623, |
|
"logits/rejected": -2.3543829917907715, |
|
"logps/chosen": -464.19622802734375, |
|
"logps/rejected": -488.60675048828125, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7536499500274658, |
|
"rewards/margins": 0.46063175797462463, |
|
"rewards/rejected": -2.2142815589904785, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36934441366574333, |
|
"grad_norm": 17.660956835556952, |
|
"learning_rate": 3.976928376579047e-07, |
|
"logits/chosen": -2.117267608642578, |
|
"logits/rejected": -2.336695432662964, |
|
"logps/chosen": -491.78216552734375, |
|
"logps/rejected": -518.7801513671875, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9079921245574951, |
|
"rewards/margins": 0.5516217350959778, |
|
"rewards/rejected": -2.459613800048828, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37989711119905023, |
|
"grad_norm": 14.241710823955074, |
|
"learning_rate": 3.9013849440328945e-07, |
|
"logits/chosen": -2.169321060180664, |
|
"logits/rejected": -2.405425786972046, |
|
"logps/chosen": -436.4549865722656, |
|
"logps/rejected": -458.5728454589844, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.5903228521347046, |
|
"rewards/margins": 0.4581179618835449, |
|
"rewards/rejected": -2.048440933227539, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3904498087323572, |
|
"grad_norm": 14.232999562557966, |
|
"learning_rate": 3.8239273882202473e-07, |
|
"logits/chosen": -2.1749088764190674, |
|
"logits/rejected": -2.4840614795684814, |
|
"logps/chosen": -479.7809143066406, |
|
"logps/rejected": -491.2457580566406, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8379102945327759, |
|
"rewards/margins": 0.42885318398475647, |
|
"rewards/rejected": -2.266763210296631, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40100250626566414, |
|
"grad_norm": 16.05324813627352, |
|
"learning_rate": 3.7446615068452804e-07, |
|
"logits/chosen": -2.2167088985443115, |
|
"logits/rejected": -2.5488333702087402, |
|
"logps/chosen": -488.9418029785156, |
|
"logps/rejected": -518.4141845703125, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9092267751693726, |
|
"rewards/margins": 0.5299785137176514, |
|
"rewards/rejected": -2.4392056465148926, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4115552037989711, |
|
"grad_norm": 14.147768615324013, |
|
"learning_rate": 3.6636955675673743e-07, |
|
"logits/chosen": -2.1767070293426514, |
|
"logits/rejected": -2.61075496673584, |
|
"logps/chosen": -479.6851501464844, |
|
"logps/rejected": -483.32501220703125, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7908236980438232, |
|
"rewards/margins": 0.46493005752563477, |
|
"rewards/rejected": -2.255753517150879, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42210790133227805, |
|
"grad_norm": 13.882861374739983, |
|
"learning_rate": 3.5811401601205093e-07, |
|
"logits/chosen": -2.219057321548462, |
|
"logits/rejected": -2.5431442260742188, |
|
"logps/chosen": -500.2259826660156, |
|
"logps/rejected": -521.2952270507812, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.941454529762268, |
|
"rewards/margins": 0.5581260919570923, |
|
"rewards/rejected": -2.4995803833007812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.432660598865585, |
|
"grad_norm": 13.10437188597977, |
|
"learning_rate": 3.497108045260995e-07, |
|
"logits/chosen": -2.3422179222106934, |
|
"logits/rejected": -2.6200077533721924, |
|
"logps/chosen": -444.88433837890625, |
|
"logps/rejected": -463.40582275390625, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.721801996231079, |
|
"rewards/margins": 0.46254196763038635, |
|
"rewards/rejected": -2.1843440532684326, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44321329639889195, |
|
"grad_norm": 17.090042659489537, |
|
"learning_rate": 3.411714000749838e-07, |
|
"logits/chosen": -2.2252583503723145, |
|
"logits/rejected": -2.598954916000366, |
|
"logps/chosen": -467.2124938964844, |
|
"logps/rejected": -482.96136474609375, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7041940689086914, |
|
"rewards/margins": 0.48477378487586975, |
|
"rewards/rejected": -2.1889679431915283, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4537659939321989, |
|
"grad_norm": 27.344254292887783, |
|
"learning_rate": 3.3250746645801287e-07, |
|
"logits/chosen": -2.346909523010254, |
|
"logits/rejected": -2.5004947185516357, |
|
"logps/chosen": -492.0323791503906, |
|
"logps/rejected": -510.87847900390625, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0776052474975586, |
|
"rewards/margins": 0.3693740963935852, |
|
"rewards/rejected": -2.446979284286499, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46431869146550586, |
|
"grad_norm": 19.808222569935815, |
|
"learning_rate": 3.237308375663571e-07, |
|
"logits/chosen": -2.291229486465454, |
|
"logits/rejected": -2.6437947750091553, |
|
"logps/chosen": -470.88909912109375, |
|
"logps/rejected": -506.59393310546875, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9697215557098389, |
|
"rewards/margins": 0.6469660997390747, |
|
"rewards/rejected": -2.616687774658203, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4748713889988128, |
|
"grad_norm": 18.32709921824934, |
|
"learning_rate": 3.148535012193767e-07, |
|
"logits/chosen": -2.1904757022857666, |
|
"logits/rejected": -2.5518805980682373, |
|
"logps/chosen": -459.55987548828125, |
|
"logps/rejected": -499.54840087890625, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.6650127172470093, |
|
"rewards/margins": 0.6420146226882935, |
|
"rewards/rejected": -2.3070271015167236, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48542408653211977, |
|
"grad_norm": 14.548363920921867, |
|
"learning_rate": 3.0588758279070183e-07, |
|
"logits/chosen": -2.1270744800567627, |
|
"logits/rejected": -2.476382255554199, |
|
"logps/chosen": -442.04400634765625, |
|
"logps/rejected": -464.97735595703125, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7559592723846436, |
|
"rewards/margins": 0.4754490852355957, |
|
"rewards/rejected": -2.2314083576202393, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4959767840654267, |
|
"grad_norm": 16.709600354788574, |
|
"learning_rate": 2.968453286464312e-07, |
|
"logits/chosen": -2.354429244995117, |
|
"logits/rejected": -2.5410735607147217, |
|
"logps/chosen": -514.9906005859375, |
|
"logps/rejected": -569.000732421875, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1175270080566406, |
|
"rewards/margins": 0.6777707934379578, |
|
"rewards/rejected": -2.795297861099243, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5065294815987337, |
|
"grad_norm": 14.595717161808087, |
|
"learning_rate": 2.8773908941806877e-07, |
|
"logits/chosen": -2.191709280014038, |
|
"logits/rejected": -2.4795994758605957, |
|
"logps/chosen": -513.7542724609375, |
|
"logps/rejected": -539.9058837890625, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.063544273376465, |
|
"rewards/margins": 0.5612505674362183, |
|
"rewards/rejected": -2.6247947216033936, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5170821791320406, |
|
"grad_norm": 27.782416609672772, |
|
"learning_rate": 2.785813031330473e-07, |
|
"logits/chosen": -2.316455602645874, |
|
"logits/rejected": -2.5953054428100586, |
|
"logps/chosen": -480.8763122558594, |
|
"logps/rejected": -517.4465942382812, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.9154212474822998, |
|
"rewards/margins": 0.5959927439689636, |
|
"rewards/rejected": -2.5114142894744873, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5276348766653476, |
|
"grad_norm": 19.504383146510033, |
|
"learning_rate": 2.693844782258779e-07, |
|
"logits/chosen": -2.288198947906494, |
|
"logits/rejected": -2.663243293762207, |
|
"logps/chosen": -504.8866271972656, |
|
"logps/rejected": -565.0271606445312, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.188603639602661, |
|
"rewards/margins": 0.93329256772995, |
|
"rewards/rejected": -3.121896266937256, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5381875741986545, |
|
"grad_norm": 16.174904226348485, |
|
"learning_rate": 2.601611764531342e-07, |
|
"logits/chosen": -2.3196043968200684, |
|
"logits/rejected": -2.615384578704834, |
|
"logps/chosen": -520.6978759765625, |
|
"logps/rejected": -568.9720458984375, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.302415370941162, |
|
"rewards/margins": 0.6759995222091675, |
|
"rewards/rejected": -2.978415012359619, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5487402717319615, |
|
"grad_norm": 13.877722437423808, |
|
"learning_rate": 2.5092399573560323e-07, |
|
"logits/chosen": -2.2904419898986816, |
|
"logits/rejected": -2.6239161491394043, |
|
"logps/chosen": -469.6702575683594, |
|
"logps/rejected": -495.83917236328125, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8616193532943726, |
|
"rewards/margins": 0.5212319493293762, |
|
"rewards/rejected": -2.3828511238098145, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5592929692652684, |
|
"grad_norm": 25.387018559215, |
|
"learning_rate": 2.4168555295104124e-07, |
|
"logits/chosen": -2.3710436820983887, |
|
"logits/rejected": -2.6667098999023438, |
|
"logps/chosen": -551.4783325195312, |
|
"logps/rejected": -584.6627197265625, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6731348037719727, |
|
"rewards/margins": 0.6126972436904907, |
|
"rewards/rejected": -3.285832166671753, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5698456667985754, |
|
"grad_norm": 21.194954670641433, |
|
"learning_rate": 2.3245846670103626e-07, |
|
"logits/chosen": -2.339695692062378, |
|
"logits/rejected": -2.728651285171509, |
|
"logps/chosen": -566.4747314453125, |
|
"logps/rejected": -614.0745849609375, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8559041023254395, |
|
"rewards/margins": 0.8008524179458618, |
|
"rewards/rejected": -3.656756639480591, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5803983643318823, |
|
"grad_norm": 17.02245510623681, |
|
"learning_rate": 2.232553400755159e-07, |
|
"logits/chosen": -2.462646007537842, |
|
"logits/rejected": -2.7295315265655518, |
|
"logps/chosen": -520.3306884765625, |
|
"logps/rejected": -550.9982299804688, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.492687940597534, |
|
"rewards/margins": 0.6085286140441895, |
|
"rewards/rejected": -3.1012163162231445, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5909510618651893, |
|
"grad_norm": 18.57311563682423, |
|
"learning_rate": 2.1408874343844294e-07, |
|
"logits/chosen": -2.46991229057312, |
|
"logits/rejected": -2.839108943939209, |
|
"logps/chosen": -564.0499877929688, |
|
"logps/rejected": -614.0209350585938, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8387348651885986, |
|
"rewards/margins": 0.748325765132904, |
|
"rewards/rejected": -3.5870604515075684, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6015037593984962, |
|
"grad_norm": 18.112658192267443, |
|
"learning_rate": 2.049711972582101e-07, |
|
"logits/chosen": -2.4610495567321777, |
|
"logits/rejected": -2.7717814445495605, |
|
"logps/chosen": -595.5016479492188, |
|
"logps/rejected": -650.4605712890625, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.076230764389038, |
|
"rewards/margins": 0.7762446999549866, |
|
"rewards/rejected": -3.852475643157959, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6120564569318032, |
|
"grad_norm": 25.977212495196564, |
|
"learning_rate": 1.9591515500618588e-07, |
|
"logits/chosen": -2.5490634441375732, |
|
"logits/rejected": -2.773324728012085, |
|
"logps/chosen": -515.6527099609375, |
|
"logps/rejected": -566.9156494140625, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.3954033851623535, |
|
"rewards/margins": 0.6492033004760742, |
|
"rewards/rejected": -3.0446066856384277, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6226091544651101, |
|
"grad_norm": 16.798443756074835, |
|
"learning_rate": 1.8693298614677112e-07, |
|
"logits/chosen": -2.400968074798584, |
|
"logits/rejected": -2.6868553161621094, |
|
"logps/chosen": -556.1025390625, |
|
"logps/rejected": -596.8348388671875, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.474238872528076, |
|
"rewards/margins": 0.6959460377693176, |
|
"rewards/rejected": -3.17018461227417, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6331618519984171, |
|
"grad_norm": 23.81678431486218, |
|
"learning_rate": 1.7803695924219814e-07, |
|
"logits/chosen": -2.479430675506592, |
|
"logits/rejected": -2.7782349586486816, |
|
"logps/chosen": -578.2171020507812, |
|
"logps/rejected": -651.0504150390625, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9857161045074463, |
|
"rewards/margins": 0.9989351034164429, |
|
"rewards/rejected": -3.9846510887145996, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.643714549531724, |
|
"grad_norm": 17.09054147088968, |
|
"learning_rate": 1.6923922519515067e-07, |
|
"logits/chosen": -2.4572558403015137, |
|
"logits/rejected": -2.866284132003784, |
|
"logps/chosen": -598.88037109375, |
|
"logps/rejected": -641.3062744140625, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.1678709983825684, |
|
"rewards/margins": 0.8001095056533813, |
|
"rewards/rejected": -3.9679806232452393, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.654267247065031, |
|
"grad_norm": 14.492251564068122, |
|
"learning_rate": 1.605518006520924e-07, |
|
"logits/chosen": -2.3932666778564453, |
|
"logits/rejected": -2.6719508171081543, |
|
"logps/chosen": -501.3484802246094, |
|
"logps/rejected": -544.0150146484375, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2441365718841553, |
|
"rewards/margins": 0.6640299558639526, |
|
"rewards/rejected": -2.9081664085388184, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6648199445983379, |
|
"grad_norm": 19.243991902749503, |
|
"learning_rate": 1.519865515899731e-07, |
|
"logits/chosen": -2.444279432296753, |
|
"logits/rejected": -2.6949431896209717, |
|
"logps/chosen": -506.01519775390625, |
|
"logps/rejected": -542.7237548828125, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.281829357147217, |
|
"rewards/margins": 0.6259506940841675, |
|
"rewards/rejected": -2.907780170440674, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6753726421316449, |
|
"grad_norm": 27.99539224141589, |
|
"learning_rate": 1.4355517710873182e-07, |
|
"logits/chosen": -2.5953707695007324, |
|
"logits/rejected": -2.877714157104492, |
|
"logps/chosen": -571.291015625, |
|
"logps/rejected": -615.3301391601562, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8195748329162598, |
|
"rewards/margins": 0.6808874607086182, |
|
"rewards/rejected": -3.500462293624878, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6859253396649518, |
|
"grad_norm": 23.88018530349238, |
|
"learning_rate": 1.3526919345173318e-07, |
|
"logits/chosen": -2.5718350410461426, |
|
"logits/rejected": -2.88576078414917, |
|
"logps/chosen": -595.6961669921875, |
|
"logps/rejected": -665.6595458984375, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.05544376373291, |
|
"rewards/margins": 0.9698305130004883, |
|
"rewards/rejected": -4.025274753570557, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6964780371982588, |
|
"grad_norm": 23.60330153062859, |
|
"learning_rate": 1.2713991827596443e-07, |
|
"logits/chosen": -2.614315986633301, |
|
"logits/rejected": -2.894726276397705, |
|
"logps/chosen": -562.1041259765625, |
|
"logps/rejected": -629.8328857421875, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.73535418510437, |
|
"rewards/margins": 0.9043378829956055, |
|
"rewards/rejected": -3.6396923065185547, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7070307347315657, |
|
"grad_norm": 18.43994758901315, |
|
"learning_rate": 1.191784551934773e-07, |
|
"logits/chosen": -2.494032144546509, |
|
"logits/rejected": -2.8370561599731445, |
|
"logps/chosen": -512.5650024414062, |
|
"logps/rejected": -558.193115234375, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.298001766204834, |
|
"rewards/margins": 0.7504197955131531, |
|
"rewards/rejected": -3.048421859741211, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7175834322648728, |
|
"grad_norm": 19.769138328586244, |
|
"learning_rate": 1.1139567860518953e-07, |
|
"logits/chosen": -2.399077892303467, |
|
"logits/rejected": -2.8016879558563232, |
|
"logps/chosen": -532.8413696289062, |
|
"logps/rejected": -595.0988159179688, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.4740264415740967, |
|
"rewards/margins": 0.9756819009780884, |
|
"rewards/rejected": -3.4497084617614746, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7281361297981797, |
|
"grad_norm": 29.121484778204135, |
|
"learning_rate": 1.0380221884776128e-07, |
|
"logits/chosen": -2.504153251647949, |
|
"logits/rejected": -2.826664447784424, |
|
"logps/chosen": -588.2379150390625, |
|
"logps/rejected": -649.8221435546875, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.9968106746673584, |
|
"rewards/margins": 0.9327837824821472, |
|
"rewards/rejected": -3.9295945167541504, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7386888273314867, |
|
"grad_norm": 20.917558525767543, |
|
"learning_rate": 9.640844767383405e-08, |
|
"logits/chosen": -2.4767587184906006, |
|
"logits/rejected": -2.815369129180908, |
|
"logps/chosen": -636.3276977539062, |
|
"logps/rejected": -681.0283203125, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.6004860401153564, |
|
"rewards/margins": 0.719234824180603, |
|
"rewards/rejected": -4.31972074508667, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7492415248647936, |
|
"grad_norm": 20.761748981239933, |
|
"learning_rate": 8.922446408546378e-08, |
|
"logits/chosen": -2.4393577575683594, |
|
"logits/rejected": -2.7462494373321533, |
|
"logps/chosen": -593.6701049804688, |
|
"logps/rejected": -662.8970947265625, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.010227918624878, |
|
"rewards/margins": 0.9989708662033081, |
|
"rewards/rejected": -4.0091986656188965, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7597942223981005, |
|
"grad_norm": 18.501565997520643, |
|
"learning_rate": 8.22600805400994e-08, |
|
"logits/chosen": -2.382094144821167, |
|
"logits/rejected": -2.714757204055786, |
|
"logps/chosen": -528.34033203125, |
|
"logps/rejected": -588.7109985351562, |
|
"loss": 0.4723, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.4561707973480225, |
|
"rewards/margins": 0.8588768243789673, |
|
"rewards/rejected": -3.3150477409362793, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7703469199314075, |
|
"grad_norm": 22.596420829881406, |
|
"learning_rate": 7.552480954794558e-08, |
|
"logits/chosen": -2.496333599090576, |
|
"logits/rejected": -2.8438127040863037, |
|
"logps/chosen": -587.7208862304688, |
|
"logps/rejected": -652.3656005859375, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.0826942920684814, |
|
"rewards/margins": 0.9047689437866211, |
|
"rewards/rejected": -3.9874634742736816, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7808996174647144, |
|
"grad_norm": 17.83641444640056, |
|
"learning_rate": 6.902785067901854e-08, |
|
"logits/chosen": -2.5392613410949707, |
|
"logits/rejected": -2.8968329429626465, |
|
"logps/chosen": -596.1561889648438, |
|
"logps/rejected": -664.4248046875, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.162071943283081, |
|
"rewards/margins": 0.8737271428108215, |
|
"rewards/rejected": -4.035799026489258, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7914523149980214, |
|
"grad_norm": 22.105262436574318, |
|
"learning_rate": 6.277807799763973e-08, |
|
"logits/chosen": -2.464101552963257, |
|
"logits/rejected": -2.823216199874878, |
|
"logps/chosen": -605.5325317382812, |
|
"logps/rejected": -688.7382202148438, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.242568254470825, |
|
"rewards/margins": 1.0074737071990967, |
|
"rewards/rejected": -4.250041961669922, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8020050125313283, |
|
"grad_norm": 25.73803952489229, |
|
"learning_rate": 5.678402794153145e-08, |
|
"logits/chosen": -2.5645461082458496, |
|
"logits/rejected": -2.8685081005096436, |
|
"logps/chosen": -624.9561767578125, |
|
"logps/rejected": -682.5247802734375, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.3231148719787598, |
|
"rewards/margins": 0.8197879791259766, |
|
"rewards/rejected": -4.142902374267578, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8125577100646353, |
|
"grad_norm": 22.783957458664876, |
|
"learning_rate": 5.105388766206969e-08, |
|
"logits/chosen": -2.611253261566162, |
|
"logits/rejected": -2.8708913326263428, |
|
"logps/chosen": -601.1683349609375, |
|
"logps/rejected": -657.6758422851562, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.23264741897583, |
|
"rewards/margins": 0.7812051773071289, |
|
"rewards/rejected": -4.013852119445801, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8231104075979422, |
|
"grad_norm": 17.937740756320142, |
|
"learning_rate": 4.5595483841620484e-08, |
|
"logits/chosen": -2.585615396499634, |
|
"logits/rejected": -2.860517978668213, |
|
"logps/chosen": -610.9042358398438, |
|
"logps/rejected": -673.6950073242188, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.3800010681152344, |
|
"rewards/margins": 0.7753348350524902, |
|
"rewards/rejected": -4.155335426330566, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8336631051312492, |
|
"grad_norm": 25.048225079070804, |
|
"learning_rate": 4.0416272003232526e-08, |
|
"logits/chosen": -2.5495500564575195, |
|
"logits/rejected": -2.783395290374756, |
|
"logps/chosen": -589.7579956054688, |
|
"logps/rejected": -651.603515625, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.1502811908721924, |
|
"rewards/margins": 0.8453443646430969, |
|
"rewards/rejected": -3.9956252574920654, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8442158026645561, |
|
"grad_norm": 27.936816284901383, |
|
"learning_rate": 3.552332632729041e-08, |
|
"logits/chosen": -2.5146939754486084, |
|
"logits/rejected": -2.804884195327759, |
|
"logps/chosen": -594.3411865234375, |
|
"logps/rejected": -653.3115844726562, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.1166281700134277, |
|
"rewards/margins": 0.8442124128341675, |
|
"rewards/rejected": -3.9608407020568848, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8442158026645561, |
|
"eval_logits/chosen": -3.206465244293213, |
|
"eval_logits/rejected": -3.0895018577575684, |
|
"eval_logps/chosen": -606.6761474609375, |
|
"eval_logps/rejected": -664.0686645507812, |
|
"eval_loss": 0.6230235695838928, |
|
"eval_rewards/accuracies": 0.6370967626571655, |
|
"eval_rewards/chosen": -3.440429449081421, |
|
"eval_rewards/margins": 0.46332982182502747, |
|
"eval_rewards/rejected": -3.903759717941284, |
|
"eval_runtime": 145.9837, |
|
"eval_samples_per_second": 13.536, |
|
"eval_steps_per_second": 0.849, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8547685001978631, |
|
"grad_norm": 19.235680562568444, |
|
"learning_rate": 3.092332998903416e-08, |
|
"logits/chosen": -2.4855546951293945, |
|
"logits/rejected": -2.8127689361572266, |
|
"logps/chosen": -608.1304931640625, |
|
"logps/rejected": -668.380859375, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.3122177124023438, |
|
"rewards/margins": 0.8978776931762695, |
|
"rewards/rejected": -4.210095405578613, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.86532119773117, |
|
"grad_norm": 20.802575196574956, |
|
"learning_rate": 2.6622566030146455e-08, |
|
"logits/chosen": -2.571362018585205, |
|
"logits/rejected": -2.8206756114959717, |
|
"logps/chosen": -649.1343994140625, |
|
"logps/rejected": -712.3201293945312, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.6075432300567627, |
|
"rewards/margins": 0.9334003329277039, |
|
"rewards/rejected": -4.5409440994262695, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.875873895264477, |
|
"grad_norm": 20.49229366313954, |
|
"learning_rate": 2.26269087768734e-08, |
|
"logits/chosen": -2.5383522510528564, |
|
"logits/rejected": -2.862185478210449, |
|
"logps/chosen": -622.9601440429688, |
|
"logps/rejected": -697.26904296875, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.579232692718506, |
|
"rewards/margins": 0.9947258234024048, |
|
"rewards/rejected": -4.573958396911621, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8864265927977839, |
|
"grad_norm": 18.435121377291708, |
|
"learning_rate": 1.894181581640106e-08, |
|
"logits/chosen": -2.4851062297821045, |
|
"logits/rejected": -2.818612813949585, |
|
"logps/chosen": -691.8073120117188, |
|
"logps/rejected": -776.2567138671875, |
|
"loss": 0.428, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.87129545211792, |
|
"rewards/margins": 1.1842930316925049, |
|
"rewards/rejected": -5.055588722229004, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8969792903310909, |
|
"grad_norm": 29.475224627532654, |
|
"learning_rate": 1.5572320542448143e-08, |
|
"logits/chosen": -2.510409355163574, |
|
"logits/rejected": -2.80336594581604, |
|
"logps/chosen": -651.34326171875, |
|
"logps/rejected": -712.4142456054688, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.6103858947753906, |
|
"rewards/margins": 0.8933914303779602, |
|
"rewards/rejected": -4.503777027130127, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9075319878643978, |
|
"grad_norm": 25.052350185477973, |
|
"learning_rate": 1.2523025280255729e-08, |
|
"logits/chosen": -2.5651907920837402, |
|
"logits/rejected": -2.870457172393799, |
|
"logps/chosen": -678.6126708984375, |
|
"logps/rejected": -742.5474853515625, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.7764217853546143, |
|
"rewards/margins": 1.0084255933761597, |
|
"rewards/rejected": -4.784847259521484, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9180846853977048, |
|
"grad_norm": 20.51875255327418, |
|
"learning_rate": 9.798095000364214e-09, |
|
"logits/chosen": -2.5898213386535645, |
|
"logits/rejected": -2.949827194213867, |
|
"logps/chosen": -640.5285034179688, |
|
"logps/rejected": -698.768798828125, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.686187744140625, |
|
"rewards/margins": 0.8774013519287109, |
|
"rewards/rejected": -4.563588619232178, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9286373829310117, |
|
"grad_norm": 20.399922495391955, |
|
"learning_rate": 7.401251629764876e-09, |
|
"logits/chosen": -2.594036817550659, |
|
"logits/rejected": -2.882014274597168, |
|
"logps/chosen": -671.9681396484375, |
|
"logps/rejected": -730.8321533203125, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.8397529125213623, |
|
"rewards/margins": 0.9077512621879578, |
|
"rewards/rejected": -4.747504234313965, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9391900804643187, |
|
"grad_norm": 22.999851244619595, |
|
"learning_rate": 5.335768968195098e-09, |
|
"logits/chosen": -2.593620538711548, |
|
"logits/rejected": -2.895954132080078, |
|
"logps/chosen": -661.6905517578125, |
|
"logps/rejected": -731.1881713867188, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.763947010040283, |
|
"rewards/margins": 0.9344717264175415, |
|
"rewards/rejected": -4.698418617248535, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9497427779976256, |
|
"grad_norm": 25.929731545060648, |
|
"learning_rate": 3.604468216521883e-09, |
|
"logits/chosen": -2.6423192024230957, |
|
"logits/rejected": -2.9191346168518066, |
|
"logps/chosen": -610.2369995117188, |
|
"logps/rejected": -673.9677124023438, |
|
"loss": 0.4538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.2649269104003906, |
|
"rewards/margins": 0.8473002314567566, |
|
"rewards/rejected": -4.112226963043213, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9602954755309326, |
|
"grad_norm": 30.30149224906545, |
|
"learning_rate": 2.2097141233206884e-09, |
|
"logits/chosen": -2.479203462600708, |
|
"logits/rejected": -2.7860312461853027, |
|
"logps/chosen": -680.89453125, |
|
"logps/rejected": -742.8502807617188, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.7775790691375732, |
|
"rewards/margins": 0.8955792188644409, |
|
"rewards/rejected": -4.673158645629883, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9708481730642395, |
|
"grad_norm": 17.27399919597619, |
|
"learning_rate": 1.1534117549133472e-09, |
|
"logits/chosen": -2.5051302909851074, |
|
"logits/rejected": -2.7565226554870605, |
|
"logps/chosen": -642.7161254882812, |
|
"logps/rejected": -727.14599609375, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.4852137565612793, |
|
"rewards/margins": 1.0831372737884521, |
|
"rewards/rejected": -4.5683512687683105, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9814008705975465, |
|
"grad_norm": 22.26013302983421, |
|
"learning_rate": 4.3700389327672173e-10, |
|
"logits/chosen": -2.3914403915405273, |
|
"logits/rejected": -2.711667537689209, |
|
"logps/chosen": -663.2670288085938, |
|
"logps/rejected": -728.1150512695312, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.666525363922119, |
|
"rewards/margins": 0.9401981234550476, |
|
"rewards/rejected": -4.606723308563232, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9919535681308534, |
|
"grad_norm": 39.07814619267544, |
|
"learning_rate": 6.146906537587982e-11, |
|
"logits/chosen": -2.5620739459991455, |
|
"logits/rejected": -2.8582262992858887, |
|
"logps/chosen": -628.55322265625, |
|
"logps/rejected": -687.5040283203125, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.4171409606933594, |
|
"rewards/margins": 0.8897517919540405, |
|
"rewards/rejected": -4.3068928718566895, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9982851866508377, |
|
"step": 473, |
|
"total_flos": 0.0, |
|
"train_loss": 0.545083115015171, |
|
"train_runtime": 9073.2474, |
|
"train_samples_per_second": 6.684, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|