|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 64, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": -1.0596340894699097, |
|
"debug/policy_chosen_logps": -179.04273986816406, |
|
"debug/policy_rejected_logits": -1.1748394966125488, |
|
"debug/policy_rejected_logps": -295.01690673828125, |
|
"debug/reference_chosen_logps": -179.04273986816406, |
|
"debug/reference_rejected_logps": -295.01690673828125, |
|
"epoch": 0.015625, |
|
"grad_norm": 52.30319105460711, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0596340894699097, |
|
"logits/rejected": -1.1748394966125488, |
|
"logps/chosen": -179.04273986816406, |
|
"logps/rejected": -295.01690673828125, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1150486469268799, |
|
"debug/policy_chosen_logps": -124.63790893554688, |
|
"debug/policy_rejected_logits": -1.0623761415481567, |
|
"debug/policy_rejected_logps": -270.75244140625, |
|
"debug/reference_chosen_logps": -125.14633178710938, |
|
"debug/reference_rejected_logps": -271.20208740234375, |
|
"epoch": 0.03125, |
|
"grad_norm": 29.130704023833047, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1150486469268799, |
|
"logits/rejected": -1.0623761415481567, |
|
"logps/chosen": -124.63790893554688, |
|
"logps/rejected": -270.75244140625, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.005084190517663956, |
|
"rewards/margins": 0.0005879019154235721, |
|
"rewards/rejected": 0.0044962880201637745, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1071562767028809, |
|
"debug/policy_chosen_logps": -136.3170166015625, |
|
"debug/policy_rejected_logits": -1.1613606214523315, |
|
"debug/policy_rejected_logps": -268.709228515625, |
|
"debug/reference_chosen_logps": -137.68783569335938, |
|
"debug/reference_rejected_logps": -268.8507995605469, |
|
"epoch": 0.046875, |
|
"grad_norm": 24.965184935253273, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1071562767028809, |
|
"logits/rejected": -1.1613606214523315, |
|
"logps/chosen": -136.3170166015625, |
|
"logps/rejected": -268.709228515625, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.013708190061151981, |
|
"rewards/margins": 0.012292098253965378, |
|
"rewards/rejected": 0.0014160918071866035, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.066061019897461, |
|
"debug/policy_chosen_logps": -153.8428192138672, |
|
"debug/policy_rejected_logits": -1.1866570711135864, |
|
"debug/policy_rejected_logps": -274.9277648925781, |
|
"debug/reference_chosen_logps": -155.69000244140625, |
|
"debug/reference_rejected_logps": -275.12884521484375, |
|
"epoch": 0.0625, |
|
"grad_norm": 24.49810670915077, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.066061019897461, |
|
"logits/rejected": -1.1866570711135864, |
|
"logps/chosen": -153.8428192138672, |
|
"logps/rejected": -274.9277648925781, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.018471689894795418, |
|
"rewards/margins": 0.016461096704006195, |
|
"rewards/rejected": 0.0020105931907892227, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0783909559249878, |
|
"debug/policy_chosen_logps": -161.8551483154297, |
|
"debug/policy_rejected_logits": -1.1809625625610352, |
|
"debug/policy_rejected_logps": -291.5763244628906, |
|
"debug/reference_chosen_logps": -165.77706909179688, |
|
"debug/reference_rejected_logps": -290.215087890625, |
|
"epoch": 0.078125, |
|
"grad_norm": 22.066344534464825, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0783909559249878, |
|
"logits/rejected": -1.1809625625610352, |
|
"logps/chosen": -161.8551483154297, |
|
"logps/rejected": -291.5763244628906, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.039219196885824203, |
|
"rewards/margins": 0.05283135548233986, |
|
"rewards/rejected": -0.013612156733870506, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0005463361740112, |
|
"debug/policy_chosen_logps": -177.85003662109375, |
|
"debug/policy_rejected_logits": -1.0288403034210205, |
|
"debug/policy_rejected_logps": -263.21014404296875, |
|
"debug/reference_chosen_logps": -178.246337890625, |
|
"debug/reference_rejected_logps": -263.5099182128906, |
|
"epoch": 0.09375, |
|
"grad_norm": 41.16778948079108, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0005463361740112, |
|
"logits/rejected": -1.0288403034210205, |
|
"logps/chosen": -177.85003662109375, |
|
"logps/rejected": -263.21014404296875, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003962935879826546, |
|
"rewards/margins": 0.0009648129343986511, |
|
"rewards/rejected": 0.0029981210827827454, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.9317433834075928, |
|
"debug/policy_chosen_logps": -155.7017822265625, |
|
"debug/policy_rejected_logits": -1.3209773302078247, |
|
"debug/policy_rejected_logps": -308.2155456542969, |
|
"debug/reference_chosen_logps": -165.14569091796875, |
|
"debug/reference_rejected_logps": -295.0081481933594, |
|
"epoch": 0.109375, |
|
"grad_norm": 15.904262612549944, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.9317433834075928, |
|
"logits/rejected": -1.3209773302078247, |
|
"logps/chosen": -155.7017822265625, |
|
"logps/rejected": -308.2155456542969, |
|
"loss": 0.4323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0944390594959259, |
|
"rewards/margins": 0.22651299834251404, |
|
"rewards/rejected": -0.13207395374774933, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0539729595184326, |
|
"debug/policy_chosen_logps": -173.66781616210938, |
|
"debug/policy_rejected_logits": -1.0206472873687744, |
|
"debug/policy_rejected_logps": -271.9588317871094, |
|
"debug/reference_chosen_logps": -177.30899047851562, |
|
"debug/reference_rejected_logps": -263.9579162597656, |
|
"epoch": 0.125, |
|
"grad_norm": 15.028091497342194, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0539729595184326, |
|
"logits/rejected": -1.0206472873687744, |
|
"logps/chosen": -173.66781616210938, |
|
"logps/rejected": -271.9588317871094, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.036411646753549576, |
|
"rewards/margins": 0.1164209246635437, |
|
"rewards/rejected": -0.08000928163528442, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.9866081476211548, |
|
"debug/policy_chosen_logps": -176.56866455078125, |
|
"debug/policy_rejected_logits": -0.9740838408470154, |
|
"debug/policy_rejected_logps": -272.35650634765625, |
|
"debug/reference_chosen_logps": -177.0741729736328, |
|
"debug/reference_rejected_logps": -260.4818420410156, |
|
"epoch": 0.140625, |
|
"grad_norm": 32.8906220838234, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.9866081476211548, |
|
"logits/rejected": -0.9740838408470154, |
|
"logps/chosen": -176.56866455078125, |
|
"logps/rejected": -272.35650634765625, |
|
"loss": 0.4107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005055226851254702, |
|
"rewards/margins": 0.12380212545394897, |
|
"rewards/rejected": -0.11874689161777496, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.9441277980804443, |
|
"debug/policy_chosen_logps": -164.24789428710938, |
|
"debug/policy_rejected_logits": -1.1364271640777588, |
|
"debug/policy_rejected_logps": -292.0938720703125, |
|
"debug/reference_chosen_logps": -160.7564697265625, |
|
"debug/reference_rejected_logps": -257.1752014160156, |
|
"epoch": 0.15625, |
|
"grad_norm": 28.811843166780264, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.9441277980804443, |
|
"logits/rejected": -1.1364271640777588, |
|
"logps/chosen": -164.24789428710938, |
|
"logps/rejected": -292.0938720703125, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03491419926285744, |
|
"rewards/margins": 0.3142724931240082, |
|
"rewards/rejected": -0.3491867184638977, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.9175143837928772, |
|
"debug/policy_chosen_logps": -214.65664672851562, |
|
"debug/policy_rejected_logits": -1.1515822410583496, |
|
"debug/policy_rejected_logps": -244.6530303955078, |
|
"debug/reference_chosen_logps": -207.79930114746094, |
|
"debug/reference_rejected_logps": -230.90333557128906, |
|
"epoch": 0.171875, |
|
"grad_norm": 38.46428758925275, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.9175143837928772, |
|
"logits/rejected": -1.1515822410583496, |
|
"logps/chosen": -214.65664672851562, |
|
"logps/rejected": -244.6530303955078, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06857340782880783, |
|
"rewards/margins": 0.06892354786396027, |
|
"rewards/rejected": -0.1374969631433487, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.8965519070625305, |
|
"debug/policy_chosen_logps": -153.26284790039062, |
|
"debug/policy_rejected_logits": -1.1321805715560913, |
|
"debug/policy_rejected_logps": -318.78076171875, |
|
"debug/reference_chosen_logps": -154.14707946777344, |
|
"debug/reference_rejected_logps": -289.067138671875, |
|
"epoch": 0.1875, |
|
"grad_norm": 41.433140559474445, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.8965519070625305, |
|
"logits/rejected": -1.1321805715560913, |
|
"logps/chosen": -153.26284790039062, |
|
"logps/rejected": -318.78076171875, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008842326700687408, |
|
"rewards/margins": 0.30597835779190063, |
|
"rewards/rejected": -0.2971360683441162, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.945601761341095, |
|
"debug/policy_chosen_logps": -122.90229797363281, |
|
"debug/policy_rejected_logits": -1.0716924667358398, |
|
"debug/policy_rejected_logps": -274.2931823730469, |
|
"debug/reference_chosen_logps": -120.32145690917969, |
|
"debug/reference_rejected_logps": -250.55557250976562, |
|
"epoch": 0.203125, |
|
"grad_norm": 26.79881614435138, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.945601761341095, |
|
"logits/rejected": -1.0716924667358398, |
|
"logps/chosen": -122.90229797363281, |
|
"logps/rejected": -274.2931823730469, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02580837532877922, |
|
"rewards/margins": 0.2115677297115326, |
|
"rewards/rejected": -0.2373761087656021, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0047388076782227, |
|
"debug/policy_chosen_logps": -200.4830780029297, |
|
"debug/policy_rejected_logits": -1.1980981826782227, |
|
"debug/policy_rejected_logps": -315.792236328125, |
|
"debug/reference_chosen_logps": -190.80075073242188, |
|
"debug/reference_rejected_logps": -281.5347595214844, |
|
"epoch": 0.21875, |
|
"grad_norm": 27.316365360407435, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0047388076782227, |
|
"logits/rejected": -1.1980981826782227, |
|
"logps/chosen": -200.4830780029297, |
|
"logps/rejected": -315.792236328125, |
|
"loss": 0.4324, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09682333469390869, |
|
"rewards/margins": 0.24575121700763702, |
|
"rewards/rejected": -0.3425745368003845, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1348706483840942, |
|
"debug/policy_chosen_logps": -208.73074340820312, |
|
"debug/policy_rejected_logits": -1.121549129486084, |
|
"debug/policy_rejected_logps": -310.7353210449219, |
|
"debug/reference_chosen_logps": -204.0843048095703, |
|
"debug/reference_rejected_logps": -281.996337890625, |
|
"epoch": 0.234375, |
|
"grad_norm": 58.18504208169894, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1348706483840942, |
|
"logits/rejected": -1.121549129486084, |
|
"logps/chosen": -208.73074340820312, |
|
"logps/rejected": -310.7353210449219, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04646441712975502, |
|
"rewards/margins": 0.24092541635036469, |
|
"rewards/rejected": -0.2873898148536682, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.9974825978279114, |
|
"debug/policy_chosen_logps": -154.0273895263672, |
|
"debug/policy_rejected_logits": -1.1503194570541382, |
|
"debug/policy_rejected_logps": -307.7276611328125, |
|
"debug/reference_chosen_logps": -154.69586181640625, |
|
"debug/reference_rejected_logps": -273.1531677246094, |
|
"epoch": 0.25, |
|
"grad_norm": 56.48600158612175, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.9974825978279114, |
|
"logits/rejected": -1.1503194570541382, |
|
"logps/chosen": -154.0273895263672, |
|
"logps/rejected": -307.7276611328125, |
|
"loss": 0.4093, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.006684892810881138, |
|
"rewards/margins": 0.3524298071861267, |
|
"rewards/rejected": -0.34574490785598755, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0567247867584229, |
|
"debug/policy_chosen_logps": -137.61720275878906, |
|
"debug/policy_rejected_logits": -1.0961592197418213, |
|
"debug/policy_rejected_logps": -313.12060546875, |
|
"debug/reference_chosen_logps": -135.6652069091797, |
|
"debug/reference_rejected_logps": -297.18695068359375, |
|
"epoch": 0.265625, |
|
"grad_norm": 31.458528575785774, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0567247867584229, |
|
"logits/rejected": -1.0961592197418213, |
|
"logps/chosen": -137.61720275878906, |
|
"logps/rejected": -313.12060546875, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.01951989158987999, |
|
"rewards/margins": 0.13981682062149048, |
|
"rewards/rejected": -0.15933671593666077, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0550764799118042, |
|
"debug/policy_chosen_logps": -143.5434112548828, |
|
"debug/policy_rejected_logits": -1.3183400630950928, |
|
"debug/policy_rejected_logps": -359.35418701171875, |
|
"debug/reference_chosen_logps": -157.90188598632812, |
|
"debug/reference_rejected_logps": -317.474853515625, |
|
"epoch": 0.28125, |
|
"grad_norm": 24.003283570475016, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0550764799118042, |
|
"logits/rejected": -1.3183400630950928, |
|
"logps/chosen": -143.5434112548828, |
|
"logps/rejected": -359.35418701171875, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14358465373516083, |
|
"rewards/margins": 0.5623779296875, |
|
"rewards/rejected": -0.418793261051178, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1249719858169556, |
|
"debug/policy_chosen_logps": -163.00375366210938, |
|
"debug/policy_rejected_logits": -1.1258165836334229, |
|
"debug/policy_rejected_logps": -283.2430725097656, |
|
"debug/reference_chosen_logps": -166.72418212890625, |
|
"debug/reference_rejected_logps": -264.2232360839844, |
|
"epoch": 0.296875, |
|
"grad_norm": 41.98096605753313, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1249719858169556, |
|
"logits/rejected": -1.1258165836334229, |
|
"logps/chosen": -163.00375366210938, |
|
"logps/rejected": -283.2430725097656, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03720443695783615, |
|
"rewards/margins": 0.22740286588668823, |
|
"rewards/rejected": -0.19019843637943268, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.000652551651001, |
|
"debug/policy_chosen_logps": -174.8540802001953, |
|
"debug/policy_rejected_logits": -1.075732946395874, |
|
"debug/policy_rejected_logps": -248.3970947265625, |
|
"debug/reference_chosen_logps": -179.03424072265625, |
|
"debug/reference_rejected_logps": -235.50778198242188, |
|
"epoch": 0.3125, |
|
"grad_norm": 26.892461198324778, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.000652551651001, |
|
"logits/rejected": -1.075732946395874, |
|
"logps/chosen": -174.8540802001953, |
|
"logps/rejected": -248.3970947265625, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.041801512241363525, |
|
"rewards/margins": 0.1706947386264801, |
|
"rewards/rejected": -0.12889322638511658, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0587340593338013, |
|
"debug/policy_chosen_logps": -148.18423461914062, |
|
"debug/policy_rejected_logits": -1.435739278793335, |
|
"debug/policy_rejected_logps": -331.1427001953125, |
|
"debug/reference_chosen_logps": -151.2082061767578, |
|
"debug/reference_rejected_logps": -314.77117919921875, |
|
"epoch": 0.328125, |
|
"grad_norm": 15.800648562809261, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0587340593338013, |
|
"logits/rejected": -1.435739278793335, |
|
"logps/chosen": -148.18423461914062, |
|
"logps/rejected": -331.1427001953125, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.030239801853895187, |
|
"rewards/margins": 0.1939551830291748, |
|
"rewards/rejected": -0.16371536254882812, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0205200910568237, |
|
"debug/policy_chosen_logps": -157.31350708007812, |
|
"debug/policy_rejected_logits": -1.0888888835906982, |
|
"debug/policy_rejected_logps": -346.0768127441406, |
|
"debug/reference_chosen_logps": -161.5574493408203, |
|
"debug/reference_rejected_logps": -338.91650390625, |
|
"epoch": 0.34375, |
|
"grad_norm": 21.49065797596958, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0205200910568237, |
|
"logits/rejected": -1.0888888835906982, |
|
"logps/chosen": -157.31350708007812, |
|
"logps/rejected": -346.0768127441406, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.042439430952072144, |
|
"rewards/margins": 0.11404269933700562, |
|
"rewards/rejected": -0.07160326838493347, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1462302207946777, |
|
"debug/policy_chosen_logps": -195.76788330078125, |
|
"debug/policy_rejected_logits": -1.2484185695648193, |
|
"debug/policy_rejected_logps": -277.576904296875, |
|
"debug/reference_chosen_logps": -198.74685668945312, |
|
"debug/reference_rejected_logps": -265.5393981933594, |
|
"epoch": 0.359375, |
|
"grad_norm": 17.749863549342045, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1462302207946777, |
|
"logits/rejected": -1.2484185695648193, |
|
"logps/chosen": -195.76788330078125, |
|
"logps/rejected": -277.576904296875, |
|
"loss": 0.4165, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.029789581894874573, |
|
"rewards/margins": 0.1501646637916565, |
|
"rewards/rejected": -0.12037509679794312, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.952358067035675, |
|
"debug/policy_chosen_logps": -115.6708984375, |
|
"debug/policy_rejected_logits": -1.036898136138916, |
|
"debug/policy_rejected_logps": -245.47000122070312, |
|
"debug/reference_chosen_logps": -131.1976776123047, |
|
"debug/reference_rejected_logps": -238.638427734375, |
|
"epoch": 0.375, |
|
"grad_norm": 16.031924320507283, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.952358067035675, |
|
"logits/rejected": -1.036898136138916, |
|
"logps/chosen": -115.6708984375, |
|
"logps/rejected": -245.47000122070312, |
|
"loss": 0.3771, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1552678644657135, |
|
"rewards/margins": 0.22358371317386627, |
|
"rewards/rejected": -0.06831584870815277, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.070897102355957, |
|
"debug/policy_chosen_logps": -178.87374877929688, |
|
"debug/policy_rejected_logits": -1.1623822450637817, |
|
"debug/policy_rejected_logps": -243.98184204101562, |
|
"debug/reference_chosen_logps": -179.05862426757812, |
|
"debug/reference_rejected_logps": -244.07818603515625, |
|
"epoch": 0.390625, |
|
"grad_norm": 38.66586744942012, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.070897102355957, |
|
"logits/rejected": -1.1623822450637817, |
|
"logps/chosen": -178.87374877929688, |
|
"logps/rejected": -243.98184204101562, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.001848660409450531, |
|
"rewards/margins": 0.0008851997554302216, |
|
"rewards/rejected": 0.000963456928730011, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1025017499923706, |
|
"debug/policy_chosen_logps": -173.5986328125, |
|
"debug/policy_rejected_logits": -1.1473654508590698, |
|
"debug/policy_rejected_logps": -245.47994995117188, |
|
"debug/reference_chosen_logps": -186.88778686523438, |
|
"debug/reference_rejected_logps": -241.27210998535156, |
|
"epoch": 0.40625, |
|
"grad_norm": 50.21384448251296, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1025017499923706, |
|
"logits/rejected": -1.1473654508590698, |
|
"logps/chosen": -173.5986328125, |
|
"logps/rejected": -245.47994995117188, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.13289162516593933, |
|
"rewards/margins": 0.1749698668718338, |
|
"rewards/rejected": -0.04207824170589447, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0175386667251587, |
|
"debug/policy_chosen_logps": -94.42786407470703, |
|
"debug/policy_rejected_logits": -1.1346431970596313, |
|
"debug/policy_rejected_logps": -240.36541748046875, |
|
"debug/reference_chosen_logps": -106.56871032714844, |
|
"debug/reference_rejected_logps": -240.29310607910156, |
|
"epoch": 0.421875, |
|
"grad_norm": 17.96088818186707, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0175386667251587, |
|
"logits/rejected": -1.1346431970596313, |
|
"logps/chosen": -94.42786407470703, |
|
"logps/rejected": -240.36541748046875, |
|
"loss": 0.39, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.12140841782093048, |
|
"rewards/margins": 0.12213139981031418, |
|
"rewards/rejected": -0.0007229708135128021, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1408073902130127, |
|
"debug/policy_chosen_logps": -126.60142517089844, |
|
"debug/policy_rejected_logits": -1.20956289768219, |
|
"debug/policy_rejected_logps": -313.8656311035156, |
|
"debug/reference_chosen_logps": -130.33799743652344, |
|
"debug/reference_rejected_logps": -291.3277893066406, |
|
"epoch": 0.4375, |
|
"grad_norm": 29.993745130410183, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1408073902130127, |
|
"logits/rejected": -1.20956289768219, |
|
"logps/chosen": -126.60142517089844, |
|
"logps/rejected": -313.8656311035156, |
|
"loss": 0.3947, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03736574202775955, |
|
"rewards/margins": 0.2627440094947815, |
|
"rewards/rejected": -0.22537828981876373, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0291798114776611, |
|
"debug/policy_chosen_logps": -192.92529296875, |
|
"debug/policy_rejected_logits": -1.2137432098388672, |
|
"debug/policy_rejected_logps": -315.1015930175781, |
|
"debug/reference_chosen_logps": -184.9921875, |
|
"debug/reference_rejected_logps": -301.6517639160156, |
|
"epoch": 0.453125, |
|
"grad_norm": 51.5929899248971, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0291798114776611, |
|
"logits/rejected": -1.2137432098388672, |
|
"logps/chosen": -192.92529296875, |
|
"logps/rejected": -315.1015930175781, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0793309286236763, |
|
"rewards/margins": 0.05516732484102249, |
|
"rewards/rejected": -0.1344982385635376, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.033249020576477, |
|
"debug/policy_chosen_logps": -129.13734436035156, |
|
"debug/policy_rejected_logits": -1.1481682062149048, |
|
"debug/policy_rejected_logps": -319.0918884277344, |
|
"debug/reference_chosen_logps": -134.66598510742188, |
|
"debug/reference_rejected_logps": -297.1129150390625, |
|
"epoch": 0.46875, |
|
"grad_norm": 41.13041833853564, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.033249020576477, |
|
"logits/rejected": -1.1481682062149048, |
|
"logps/chosen": -129.13734436035156, |
|
"logps/rejected": -319.0918884277344, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05528645217418671, |
|
"rewards/margins": 0.2750762701034546, |
|
"rewards/rejected": -0.2197897881269455, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1428550481796265, |
|
"debug/policy_chosen_logps": -174.7340087890625, |
|
"debug/policy_rejected_logits": -1.017913818359375, |
|
"debug/policy_rejected_logps": -238.23471069335938, |
|
"debug/reference_chosen_logps": -180.0450897216797, |
|
"debug/reference_rejected_logps": -228.79031372070312, |
|
"epoch": 0.484375, |
|
"grad_norm": 54.8216481339695, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1428550481796265, |
|
"logits/rejected": -1.017913818359375, |
|
"logps/chosen": -174.7340087890625, |
|
"logps/rejected": -238.23471069335938, |
|
"loss": 0.4467, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.053110986948013306, |
|
"rewards/margins": 0.1475549191236496, |
|
"rewards/rejected": -0.0944439247250557, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0067996978759766, |
|
"debug/policy_chosen_logps": -145.49851989746094, |
|
"debug/policy_rejected_logits": -1.210583209991455, |
|
"debug/policy_rejected_logps": -274.90240478515625, |
|
"debug/reference_chosen_logps": -151.12542724609375, |
|
"debug/reference_rejected_logps": -264.36016845703125, |
|
"epoch": 0.5, |
|
"grad_norm": 31.943528016300796, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0067996978759766, |
|
"logits/rejected": -1.210583209991455, |
|
"logps/chosen": -145.49851989746094, |
|
"logps/rejected": -274.90240478515625, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05626893788576126, |
|
"rewards/margins": 0.16169115900993347, |
|
"rewards/rejected": -0.10542222112417221, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1181310415267944, |
|
"debug/policy_chosen_logps": -154.81201171875, |
|
"debug/policy_rejected_logits": -1.2310353517532349, |
|
"debug/policy_rejected_logps": -287.8173828125, |
|
"debug/reference_chosen_logps": -170.07876586914062, |
|
"debug/reference_rejected_logps": -274.1385498046875, |
|
"epoch": 0.515625, |
|
"grad_norm": 18.618810659036946, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1181310415267944, |
|
"logits/rejected": -1.2310353517532349, |
|
"logps/chosen": -154.81201171875, |
|
"logps/rejected": -287.8173828125, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1526675671339035, |
|
"rewards/margins": 0.2894558906555176, |
|
"rewards/rejected": -0.13678830862045288, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0529826879501343, |
|
"debug/policy_chosen_logps": -128.18128967285156, |
|
"debug/policy_rejected_logits": -1.2277421951293945, |
|
"debug/policy_rejected_logps": -326.91705322265625, |
|
"debug/reference_chosen_logps": -147.74295043945312, |
|
"debug/reference_rejected_logps": -300.6445617675781, |
|
"epoch": 0.53125, |
|
"grad_norm": 19.76877319971208, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0529826879501343, |
|
"logits/rejected": -1.2277421951293945, |
|
"logps/chosen": -128.18128967285156, |
|
"logps/rejected": -326.91705322265625, |
|
"loss": 0.3702, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.19561666250228882, |
|
"rewards/margins": 0.4583418369293213, |
|
"rewards/rejected": -0.26272517442703247, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0484968423843384, |
|
"debug/policy_chosen_logps": -177.14181518554688, |
|
"debug/policy_rejected_logits": -1.0831434726715088, |
|
"debug/policy_rejected_logps": -277.63067626953125, |
|
"debug/reference_chosen_logps": -184.79954528808594, |
|
"debug/reference_rejected_logps": -262.337646484375, |
|
"epoch": 0.546875, |
|
"grad_norm": 13.405977545151604, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0484968423843384, |
|
"logits/rejected": -1.0831434726715088, |
|
"logps/chosen": -177.14181518554688, |
|
"logps/rejected": -277.63067626953125, |
|
"loss": 0.3774, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07657738029956818, |
|
"rewards/margins": 0.22950761020183563, |
|
"rewards/rejected": -0.15293022990226746, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.070804476737976, |
|
"debug/policy_chosen_logps": -119.32257080078125, |
|
"debug/policy_rejected_logits": -1.1960089206695557, |
|
"debug/policy_rejected_logps": -257.6097412109375, |
|
"debug/reference_chosen_logps": -134.1144561767578, |
|
"debug/reference_rejected_logps": -249.19239807128906, |
|
"epoch": 0.5625, |
|
"grad_norm": 54.78669264655883, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.070804476737976, |
|
"logits/rejected": -1.1960089206695557, |
|
"logps/chosen": -119.32257080078125, |
|
"logps/rejected": -257.6097412109375, |
|
"loss": 0.4202, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1479189097881317, |
|
"rewards/margins": 0.23209232091903687, |
|
"rewards/rejected": -0.08417341113090515, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0936942100524902, |
|
"debug/policy_chosen_logps": -198.59994506835938, |
|
"debug/policy_rejected_logits": -1.1287853717803955, |
|
"debug/policy_rejected_logps": -267.81048583984375, |
|
"debug/reference_chosen_logps": -206.68980407714844, |
|
"debug/reference_rejected_logps": -260.12896728515625, |
|
"epoch": 0.578125, |
|
"grad_norm": 18.618309162410206, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0936942100524902, |
|
"logits/rejected": -1.1287853717803955, |
|
"logps/chosen": -198.59994506835938, |
|
"logps/rejected": -267.81048583984375, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.08089858293533325, |
|
"rewards/margins": 0.15771383047103882, |
|
"rewards/rejected": -0.07681524008512497, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0987818241119385, |
|
"debug/policy_chosen_logps": -156.1143798828125, |
|
"debug/policy_rejected_logits": -1.016094446182251, |
|
"debug/policy_rejected_logps": -280.1226806640625, |
|
"debug/reference_chosen_logps": -174.13986206054688, |
|
"debug/reference_rejected_logps": -272.59063720703125, |
|
"epoch": 0.59375, |
|
"grad_norm": 48.92722394829403, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0987818241119385, |
|
"logits/rejected": -1.016094446182251, |
|
"logps/chosen": -156.1143798828125, |
|
"logps/rejected": -280.1226806640625, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.18025492131710052, |
|
"rewards/margins": 0.2555754780769348, |
|
"rewards/rejected": -0.07532056421041489, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1431177854537964, |
|
"debug/policy_chosen_logps": -121.45298767089844, |
|
"debug/policy_rejected_logits": -1.2573899030685425, |
|
"debug/policy_rejected_logps": -243.77618408203125, |
|
"debug/reference_chosen_logps": -132.9182891845703, |
|
"debug/reference_rejected_logps": -236.6573486328125, |
|
"epoch": 0.609375, |
|
"grad_norm": 28.262320173832173, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1431177854537964, |
|
"logits/rejected": -1.2573899030685425, |
|
"logps/chosen": -121.45298767089844, |
|
"logps/rejected": -243.77618408203125, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11465291678905487, |
|
"rewards/margins": 0.18584111332893372, |
|
"rewards/rejected": -0.07118818163871765, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1291528940200806, |
|
"debug/policy_chosen_logps": -124.88560485839844, |
|
"debug/policy_rejected_logits": -1.1997623443603516, |
|
"debug/policy_rejected_logps": -341.6507568359375, |
|
"debug/reference_chosen_logps": -145.1587677001953, |
|
"debug/reference_rejected_logps": -316.4557189941406, |
|
"epoch": 0.625, |
|
"grad_norm": 17.205504877297493, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1291528940200806, |
|
"logits/rejected": -1.1997623443603516, |
|
"logps/chosen": -124.88560485839844, |
|
"logps/rejected": -341.6507568359375, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.202731654047966, |
|
"rewards/margins": 0.4546818137168884, |
|
"rewards/rejected": -0.25195014476776123, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.119407057762146, |
|
"debug/policy_chosen_logps": -155.58392333984375, |
|
"debug/policy_rejected_logits": -1.165313720703125, |
|
"debug/policy_rejected_logps": -216.57156372070312, |
|
"debug/reference_chosen_logps": -161.89459228515625, |
|
"debug/reference_rejected_logps": -214.2755126953125, |
|
"epoch": 0.640625, |
|
"grad_norm": 20.732094832807366, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.119407057762146, |
|
"logits/rejected": -1.165313720703125, |
|
"logps/chosen": -155.58392333984375, |
|
"logps/rejected": -216.57156372070312, |
|
"loss": 0.3763, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.06310684233903885, |
|
"rewards/margins": 0.0860673040151596, |
|
"rewards/rejected": -0.02296045981347561, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2078365087509155, |
|
"debug/policy_chosen_logps": -137.1336212158203, |
|
"debug/policy_rejected_logits": -1.2154945135116577, |
|
"debug/policy_rejected_logps": -227.4922637939453, |
|
"debug/reference_chosen_logps": -139.9180145263672, |
|
"debug/reference_rejected_logps": -215.813232421875, |
|
"epoch": 0.65625, |
|
"grad_norm": 34.027873181354636, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2078365087509155, |
|
"logits/rejected": -1.2154945135116577, |
|
"logps/chosen": -137.1336212158203, |
|
"logps/rejected": -227.4922637939453, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02784401923418045, |
|
"rewards/margins": 0.14463430643081665, |
|
"rewards/rejected": -0.1167902946472168, |
|
"step": 42 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0180912017822266, |
|
"debug/policy_chosen_logps": -173.8270263671875, |
|
"debug/policy_rejected_logits": -1.1830826997756958, |
|
"debug/policy_rejected_logps": -286.73638916015625, |
|
"debug/reference_chosen_logps": -174.58895874023438, |
|
"debug/reference_rejected_logps": -263.51458740234375, |
|
"epoch": 0.671875, |
|
"grad_norm": 26.885686366047068, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0180912017822266, |
|
"logits/rejected": -1.1830826997756958, |
|
"logps/chosen": -173.8270263671875, |
|
"logps/rejected": -286.73638916015625, |
|
"loss": 0.3939, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.007619347423315048, |
|
"rewards/margins": 0.2398374080657959, |
|
"rewards/rejected": -0.23221805691719055, |
|
"step": 43 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.8629423379898071, |
|
"debug/policy_chosen_logps": -186.4468994140625, |
|
"debug/policy_rejected_logits": -1.196955680847168, |
|
"debug/policy_rejected_logps": -291.8290710449219, |
|
"debug/reference_chosen_logps": -192.09939575195312, |
|
"debug/reference_rejected_logps": -283.04547119140625, |
|
"epoch": 0.6875, |
|
"grad_norm": 15.341359477798175, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.8629423379898071, |
|
"logits/rejected": -1.196955680847168, |
|
"logps/chosen": -186.4468994140625, |
|
"logps/rejected": -291.8290710449219, |
|
"loss": 0.3941, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05652495473623276, |
|
"rewards/margins": 0.14436087012290955, |
|
"rewards/rejected": -0.08783592283725739, |
|
"step": 44 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1474281549453735, |
|
"debug/policy_chosen_logps": -185.1705322265625, |
|
"debug/policy_rejected_logits": -1.2113550901412964, |
|
"debug/policy_rejected_logps": -299.13165283203125, |
|
"debug/reference_chosen_logps": -184.02684020996094, |
|
"debug/reference_rejected_logps": -283.3847961425781, |
|
"epoch": 0.703125, |
|
"grad_norm": 27.424785120293386, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1474281549453735, |
|
"logits/rejected": -1.2113550901412964, |
|
"logps/chosen": -185.1705322265625, |
|
"logps/rejected": -299.13165283203125, |
|
"loss": 0.4015, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.011436812579631805, |
|
"rewards/margins": 0.1460317075252533, |
|
"rewards/rejected": -0.1574685126543045, |
|
"step": 45 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0573773384094238, |
|
"debug/policy_chosen_logps": -127.71075439453125, |
|
"debug/policy_rejected_logits": -1.0924162864685059, |
|
"debug/policy_rejected_logps": -323.93768310546875, |
|
"debug/reference_chosen_logps": -139.21630859375, |
|
"debug/reference_rejected_logps": -311.1994323730469, |
|
"epoch": 0.71875, |
|
"grad_norm": 17.144934905131425, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0573773384094238, |
|
"logits/rejected": -1.0924162864685059, |
|
"logps/chosen": -127.71075439453125, |
|
"logps/rejected": -323.93768310546875, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11505550146102905, |
|
"rewards/margins": 0.24243810772895813, |
|
"rewards/rejected": -0.12738259136676788, |
|
"step": 46 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0909616947174072, |
|
"debug/policy_chosen_logps": -137.27731323242188, |
|
"debug/policy_rejected_logits": -1.2138352394104004, |
|
"debug/policy_rejected_logps": -241.8701171875, |
|
"debug/reference_chosen_logps": -147.23553466796875, |
|
"debug/reference_rejected_logps": -222.49639892578125, |
|
"epoch": 0.734375, |
|
"grad_norm": 12.93169650628382, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0909616947174072, |
|
"logits/rejected": -1.2138352394104004, |
|
"logps/chosen": -137.27731323242188, |
|
"logps/rejected": -241.8701171875, |
|
"loss": 0.3217, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0995820164680481, |
|
"rewards/margins": 0.2933192849159241, |
|
"rewards/rejected": -0.19373726844787598, |
|
"step": 47 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.096240520477295, |
|
"debug/policy_chosen_logps": -232.75778198242188, |
|
"debug/policy_rejected_logits": -1.1766290664672852, |
|
"debug/policy_rejected_logps": -306.53369140625, |
|
"debug/reference_chosen_logps": -230.5318145751953, |
|
"debug/reference_rejected_logps": -294.82598876953125, |
|
"epoch": 0.75, |
|
"grad_norm": 26.099751982850893, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.096240520477295, |
|
"logits/rejected": -1.1766290664672852, |
|
"logps/chosen": -232.75778198242188, |
|
"logps/rejected": -306.53369140625, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.022259674966335297, |
|
"rewards/margins": 0.09481699019670486, |
|
"rewards/rejected": -0.11707665771245956, |
|
"step": 48 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1644705533981323, |
|
"debug/policy_chosen_logps": -166.67062377929688, |
|
"debug/policy_rejected_logits": -1.293932557106018, |
|
"debug/policy_rejected_logps": -293.45050048828125, |
|
"debug/reference_chosen_logps": -175.53598022460938, |
|
"debug/reference_rejected_logps": -276.24322509765625, |
|
"epoch": 0.765625, |
|
"grad_norm": 18.426480334845714, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1644705533981323, |
|
"logits/rejected": -1.293932557106018, |
|
"logps/chosen": -166.67062377929688, |
|
"logps/rejected": -293.45050048828125, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08865345269441605, |
|
"rewards/margins": 0.2607261538505554, |
|
"rewards/rejected": -0.17207267880439758, |
|
"step": 49 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2152189016342163, |
|
"debug/policy_chosen_logps": -170.15440368652344, |
|
"debug/policy_rejected_logits": -1.2675527334213257, |
|
"debug/policy_rejected_logps": -284.37353515625, |
|
"debug/reference_chosen_logps": -173.90533447265625, |
|
"debug/reference_rejected_logps": -265.96417236328125, |
|
"epoch": 0.78125, |
|
"grad_norm": 19.567832925259168, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2152189016342163, |
|
"logits/rejected": -1.2675527334213257, |
|
"logps/chosen": -170.15440368652344, |
|
"logps/rejected": -284.37353515625, |
|
"loss": 0.3895, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03750941902399063, |
|
"rewards/margins": 0.22160324454307556, |
|
"rewards/rejected": -0.18409383296966553, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1334317922592163, |
|
"debug/policy_chosen_logps": -127.97447204589844, |
|
"debug/policy_rejected_logits": -1.07590651512146, |
|
"debug/policy_rejected_logps": -220.5333251953125, |
|
"debug/reference_chosen_logps": -136.77487182617188, |
|
"debug/reference_rejected_logps": -219.20693969726562, |
|
"epoch": 0.796875, |
|
"grad_norm": 37.00007516828202, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1334317922592163, |
|
"logits/rejected": -1.07590651512146, |
|
"logps/chosen": -127.97447204589844, |
|
"logps/rejected": -220.5333251953125, |
|
"loss": 0.3521, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0880039781332016, |
|
"rewards/margins": 0.10126776248216629, |
|
"rewards/rejected": -0.013263778761029243, |
|
"step": 51 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.207089900970459, |
|
"debug/policy_chosen_logps": -149.59579467773438, |
|
"debug/policy_rejected_logits": -1.3598229885101318, |
|
"debug/policy_rejected_logps": -312.65423583984375, |
|
"debug/reference_chosen_logps": -160.83349609375, |
|
"debug/reference_rejected_logps": -290.1050109863281, |
|
"epoch": 0.8125, |
|
"grad_norm": 34.43193601355931, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.207089900970459, |
|
"logits/rejected": -1.3598229885101318, |
|
"logps/chosen": -149.59579467773438, |
|
"logps/rejected": -312.65423583984375, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.11237694323062897, |
|
"rewards/margins": 0.3378693461418152, |
|
"rewards/rejected": -0.22549240291118622, |
|
"step": 52 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0188125371932983, |
|
"debug/policy_chosen_logps": -185.8585205078125, |
|
"debug/policy_rejected_logits": -1.0791672468185425, |
|
"debug/policy_rejected_logps": -251.5456085205078, |
|
"debug/reference_chosen_logps": -191.01089477539062, |
|
"debug/reference_rejected_logps": -245.11524963378906, |
|
"epoch": 0.828125, |
|
"grad_norm": 14.59754124103045, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0188125371932983, |
|
"logits/rejected": -1.0791672468185425, |
|
"logps/chosen": -185.8585205078125, |
|
"logps/rejected": -251.5456085205078, |
|
"loss": 0.3737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.051523588597774506, |
|
"rewards/margins": 0.1158272996544838, |
|
"rewards/rejected": -0.0643036961555481, |
|
"step": 53 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0893926620483398, |
|
"debug/policy_chosen_logps": -149.98660278320312, |
|
"debug/policy_rejected_logits": -1.0650213956832886, |
|
"debug/policy_rejected_logps": -274.9080810546875, |
|
"debug/reference_chosen_logps": -158.46145629882812, |
|
"debug/reference_rejected_logps": -258.4507141113281, |
|
"epoch": 0.84375, |
|
"grad_norm": 14.810580428901549, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0893926620483398, |
|
"logits/rejected": -1.0650213956832886, |
|
"logps/chosen": -149.98660278320312, |
|
"logps/rejected": -274.9080810546875, |
|
"loss": 0.3213, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0847485214471817, |
|
"rewards/margins": 0.24932223558425903, |
|
"rewards/rejected": -0.16457369923591614, |
|
"step": 54 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2362074851989746, |
|
"debug/policy_chosen_logps": -102.0992202758789, |
|
"debug/policy_rejected_logits": -1.3010079860687256, |
|
"debug/policy_rejected_logps": -289.4234313964844, |
|
"debug/reference_chosen_logps": -120.96076965332031, |
|
"debug/reference_rejected_logps": -275.486083984375, |
|
"epoch": 0.859375, |
|
"grad_norm": 13.99372317117744, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2362074851989746, |
|
"logits/rejected": -1.3010079860687256, |
|
"logps/chosen": -102.0992202758789, |
|
"logps/rejected": -289.4234313964844, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18861553072929382, |
|
"rewards/margins": 0.32798925042152405, |
|
"rewards/rejected": -0.13937373459339142, |
|
"step": 55 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0959794521331787, |
|
"debug/policy_chosen_logps": -176.76089477539062, |
|
"debug/policy_rejected_logits": -1.291311264038086, |
|
"debug/policy_rejected_logps": -313.87506103515625, |
|
"debug/reference_chosen_logps": -185.58998107910156, |
|
"debug/reference_rejected_logps": -290.28045654296875, |
|
"epoch": 0.875, |
|
"grad_norm": 37.669129247782706, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0959794521331787, |
|
"logits/rejected": -1.291311264038086, |
|
"logps/chosen": -176.76089477539062, |
|
"logps/rejected": -313.87506103515625, |
|
"loss": 0.3289, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.08829064667224884, |
|
"rewards/margins": 0.3242364823818207, |
|
"rewards/rejected": -0.23594582080841064, |
|
"step": 56 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.0723934173583984, |
|
"debug/policy_chosen_logps": -127.6189193725586, |
|
"debug/policy_rejected_logits": -1.1941779851913452, |
|
"debug/policy_rejected_logps": -263.9356689453125, |
|
"debug/reference_chosen_logps": -139.6109161376953, |
|
"debug/reference_rejected_logps": -251.62448120117188, |
|
"epoch": 0.890625, |
|
"grad_norm": 15.916622092420505, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.0723934173583984, |
|
"logits/rejected": -1.1941779851913452, |
|
"logps/chosen": -127.6189193725586, |
|
"logps/rejected": -263.9356689453125, |
|
"loss": 0.3641, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.11991991102695465, |
|
"rewards/margins": 0.24303147196769714, |
|
"rewards/rejected": -0.12311156839132309, |
|
"step": 57 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.083500862121582, |
|
"debug/policy_chosen_logps": -212.94515991210938, |
|
"debug/policy_rejected_logits": -1.196679711341858, |
|
"debug/policy_rejected_logps": -263.7575378417969, |
|
"debug/reference_chosen_logps": -221.95928955078125, |
|
"debug/reference_rejected_logps": -267.07586669921875, |
|
"epoch": 0.90625, |
|
"grad_norm": 26.520012974267605, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.083500862121582, |
|
"logits/rejected": -1.196679711341858, |
|
"logps/chosen": -212.94515991210938, |
|
"logps/rejected": -263.7575378417969, |
|
"loss": 0.4082, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.09014149010181427, |
|
"rewards/margins": 0.05695834010839462, |
|
"rewards/rejected": 0.03318314626812935, |
|
"step": 58 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2750979661941528, |
|
"debug/policy_chosen_logps": -120.48554229736328, |
|
"debug/policy_rejected_logits": -1.2684656381607056, |
|
"debug/policy_rejected_logps": -331.54986572265625, |
|
"debug/reference_chosen_logps": -130.17742919921875, |
|
"debug/reference_rejected_logps": -307.9356689453125, |
|
"epoch": 0.921875, |
|
"grad_norm": 19.676716039926774, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2750979661941528, |
|
"logits/rejected": -1.2684656381607056, |
|
"logps/chosen": -120.48554229736328, |
|
"logps/rejected": -331.54986572265625, |
|
"loss": 0.3486, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09691886603832245, |
|
"rewards/margins": 0.3330605924129486, |
|
"rewards/rejected": -0.23614171147346497, |
|
"step": 59 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1700026988983154, |
|
"debug/policy_chosen_logps": -164.0662078857422, |
|
"debug/policy_rejected_logits": -1.0647200345993042, |
|
"debug/policy_rejected_logps": -289.3599548339844, |
|
"debug/reference_chosen_logps": -172.45896911621094, |
|
"debug/reference_rejected_logps": -276.63592529296875, |
|
"epoch": 0.9375, |
|
"grad_norm": 16.5920657057711, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1700026988983154, |
|
"logits/rejected": -1.0647200345993042, |
|
"logps/chosen": -164.0662078857422, |
|
"logps/rejected": -289.3599548339844, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08392763137817383, |
|
"rewards/margins": 0.21116778254508972, |
|
"rewards/rejected": -0.1272401511669159, |
|
"step": 60 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.2429842948913574, |
|
"debug/policy_chosen_logps": -164.30996704101562, |
|
"debug/policy_rejected_logits": -1.2771668434143066, |
|
"debug/policy_rejected_logps": -291.66436767578125, |
|
"debug/reference_chosen_logps": -178.3618927001953, |
|
"debug/reference_rejected_logps": -263.1362609863281, |
|
"epoch": 0.953125, |
|
"grad_norm": 17.917957649513887, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.2429842948913574, |
|
"logits/rejected": -1.2771668434143066, |
|
"logps/chosen": -164.30996704101562, |
|
"logps/rejected": -291.66436767578125, |
|
"loss": 0.333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14051929116249084, |
|
"rewards/margins": 0.42580026388168335, |
|
"rewards/rejected": -0.2852809429168701, |
|
"step": 61 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1629077196121216, |
|
"debug/policy_chosen_logps": -171.4347381591797, |
|
"debug/policy_rejected_logits": -1.2383259534835815, |
|
"debug/policy_rejected_logps": -257.24322509765625, |
|
"debug/reference_chosen_logps": -176.8075408935547, |
|
"debug/reference_rejected_logps": -236.5648193359375, |
|
"epoch": 0.96875, |
|
"grad_norm": 27.22416658714319, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1629077196121216, |
|
"logits/rejected": -1.2383259534835815, |
|
"logps/chosen": -171.4347381591797, |
|
"logps/rejected": -257.24322509765625, |
|
"loss": 0.3685, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.053728047758340836, |
|
"rewards/margins": 0.2605122923851013, |
|
"rewards/rejected": -0.20678424835205078, |
|
"step": 62 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.167179822921753, |
|
"debug/policy_chosen_logps": -241.232666015625, |
|
"debug/policy_rejected_logits": -1.1904563903808594, |
|
"debug/policy_rejected_logps": -349.2745361328125, |
|
"debug/reference_chosen_logps": -237.09837341308594, |
|
"debug/reference_rejected_logps": -312.7959289550781, |
|
"epoch": 0.984375, |
|
"grad_norm": 35.440096057306455, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.167179822921753, |
|
"logits/rejected": -1.1904563903808594, |
|
"logps/chosen": -241.232666015625, |
|
"logps/rejected": -349.2745361328125, |
|
"loss": 0.3671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04134296253323555, |
|
"rewards/margins": 0.32344281673431396, |
|
"rewards/rejected": -0.3647857904434204, |
|
"step": 63 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.1739040613174438, |
|
"debug/policy_chosen_logps": -139.26182556152344, |
|
"debug/policy_rejected_logits": -1.2884361743927002, |
|
"debug/policy_rejected_logps": -280.71124267578125, |
|
"debug/reference_chosen_logps": -148.495361328125, |
|
"debug/reference_rejected_logps": -259.9752197265625, |
|
"epoch": 1.0, |
|
"grad_norm": 43.313159052812985, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.1739040613174438, |
|
"logits/rejected": -1.2884361743927002, |
|
"logps/chosen": -139.26182556152344, |
|
"logps/rejected": -280.71124267578125, |
|
"loss": 0.3453, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09233523905277252, |
|
"rewards/margins": 0.2996952533721924, |
|
"rewards/rejected": -0.20735999941825867, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 64, |
|
"total_flos": 0.0, |
|
"train_loss": 0.40815131505951285, |
|
"train_runtime": 194.2921, |
|
"train_samples_per_second": 20.979, |
|
"train_steps_per_second": 0.329 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 64, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|