|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.985781990521327, |
|
"eval_steps": 50, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 56.951628924108704, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.8022689819335938, |
|
"logits/rejected": -2.699367046356201, |
|
"logps/chosen": -354.14007568359375, |
|
"logps/rejected": -648.7852783203125, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0030409712344408035, |
|
"rewards/margins": 0.015484926290810108, |
|
"rewards/rejected": -0.01244395412504673, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 16.911922497415656, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.8449482917785645, |
|
"logits/rejected": -2.7297720909118652, |
|
"logps/chosen": -361.7726135253906, |
|
"logps/rejected": -731.9713134765625, |
|
"loss": 0.4488, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11448182910680771, |
|
"rewards/margins": 0.7460837364196777, |
|
"rewards/rejected": -0.6316019892692566, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 2.8879981399804886, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.924880027770996, |
|
"logits/rejected": -2.7608063220977783, |
|
"logps/chosen": -344.0640869140625, |
|
"logps/rejected": -1062.529541015625, |
|
"loss": 0.1128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4154191017150879, |
|
"rewards/margins": 4.946678638458252, |
|
"rewards/rejected": -4.531259536743164, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 0.7967945507055681, |
|
"learning_rate": 4.990147841143461e-07, |
|
"logits/chosen": -2.9928297996520996, |
|
"logits/rejected": -2.858860969543457, |
|
"logps/chosen": -369.7523193359375, |
|
"logps/rejected": -2523.788818359375, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17358417809009552, |
|
"rewards/margins": 18.882659912109375, |
|
"rewards/rejected": -19.056243896484375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 0.08242657747458541, |
|
"learning_rate": 4.950256493879794e-07, |
|
"logits/chosen": -3.1458115577697754, |
|
"logits/rejected": -3.068504810333252, |
|
"logps/chosen": -445.88641357421875, |
|
"logps/rejected": -3839.385498046875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8680551648139954, |
|
"rewards/margins": 31.354045867919922, |
|
"rewards/rejected": -32.22209930419922, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"eval_logits/chosen": -3.214230537414551, |
|
"eval_logits/rejected": -3.0434162616729736, |
|
"eval_logps/chosen": -511.5262451171875, |
|
"eval_logps/rejected": -4356.53564453125, |
|
"eval_loss": 0.006651720497757196, |
|
"eval_rewards/accuracies": 0.9939516186714172, |
|
"eval_rewards/chosen": -1.4454454183578491, |
|
"eval_rewards/margins": 35.71202850341797, |
|
"eval_rewards/rejected": -37.157470703125, |
|
"eval_runtime": 194.5294, |
|
"eval_samples_per_second": 20.074, |
|
"eval_steps_per_second": 0.319, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 0.08904936739654302, |
|
"learning_rate": 4.88020090697132e-07, |
|
"logits/chosen": -3.2791202068328857, |
|
"logits/rejected": -3.141754150390625, |
|
"logps/chosen": -564.9468383789062, |
|
"logps/rejected": -4684.3271484375, |
|
"loss": 0.004, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.9347045421600342, |
|
"rewards/margins": 38.56499099731445, |
|
"rewards/rejected": -40.49969482421875, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 1.511268095124282, |
|
"learning_rate": 4.780843509929904e-07, |
|
"logits/chosen": -3.2914862632751465, |
|
"logits/rejected": -3.0883309841156006, |
|
"logps/chosen": -603.4210205078125, |
|
"logps/rejected": -4877.28662109375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6363024711608887, |
|
"rewards/margins": 39.97002410888672, |
|
"rewards/rejected": -42.606327056884766, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 0.22202350824430725, |
|
"learning_rate": 4.6534074564712217e-07, |
|
"logits/chosen": -3.417383909225464, |
|
"logits/rejected": -3.290362596511841, |
|
"logps/chosen": -600.4118041992188, |
|
"logps/rejected": -5436.11376953125, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5593833923339844, |
|
"rewards/margins": 45.55999755859375, |
|
"rewards/rejected": -48.11937713623047, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 2.0861019684034874, |
|
"learning_rate": 4.4994615667026846e-07, |
|
"logits/chosen": -3.4805240631103516, |
|
"logits/rejected": -3.3906772136688232, |
|
"logps/chosen": -624.0176391601562, |
|
"logps/rejected": -5296.82275390625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.676025867462158, |
|
"rewards/margins": 44.0660285949707, |
|
"rewards/rejected": -46.7420539855957, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 2.8965011668216905, |
|
"learning_rate": 4.320901013934887e-07, |
|
"logits/chosen": -3.4210407733917236, |
|
"logits/rejected": -3.3643829822540283, |
|
"logps/chosen": -556.0076904296875, |
|
"logps/rejected": -4813.1806640625, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0869507789611816, |
|
"rewards/margins": 39.87181854248047, |
|
"rewards/rejected": -41.95877456665039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"eval_logits/chosen": -3.4104061126708984, |
|
"eval_logits/rejected": -3.2429261207580566, |
|
"eval_logps/chosen": -570.0164184570312, |
|
"eval_logps/rejected": -4765.2841796875, |
|
"eval_loss": 0.0052900416776537895, |
|
"eval_rewards/accuracies": 0.9939516186714172, |
|
"eval_rewards/chosen": -2.0303473472595215, |
|
"eval_rewards/margins": 39.21460723876953, |
|
"eval_rewards/rejected": -41.24495315551758, |
|
"eval_runtime": 192.2337, |
|
"eval_samples_per_second": 20.314, |
|
"eval_steps_per_second": 0.323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.042654028436019, |
|
"grad_norm": 1.2489542878599509, |
|
"learning_rate": 4.119923993874379e-07, |
|
"logits/chosen": -3.4639148712158203, |
|
"logits/rejected": -3.4126315116882324, |
|
"logps/chosen": -549.92138671875, |
|
"logps/rejected": -5150.29638671875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9557552337646484, |
|
"rewards/margins": 43.08815002441406, |
|
"rewards/rejected": -45.04390335083008, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 0.919711694376481, |
|
"learning_rate": 3.899004663415083e-07, |
|
"logits/chosen": -3.455725908279419, |
|
"logits/rejected": -3.3397490978240967, |
|
"logps/chosen": -534.6444702148438, |
|
"logps/rejected": -5193.822265625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8104517459869385, |
|
"rewards/margins": 43.72606658935547, |
|
"rewards/rejected": -45.53651809692383, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2322274881516588, |
|
"grad_norm": 0.03772744312797018, |
|
"learning_rate": 3.6608626821692824e-07, |
|
"logits/chosen": -3.503054141998291, |
|
"logits/rejected": -3.4913394451141357, |
|
"logps/chosen": -509.2953186035156, |
|
"logps/rejected": -5831.84228515625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6301825046539307, |
|
"rewards/margins": 49.84960174560547, |
|
"rewards/rejected": -51.47977828979492, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 0.00011722006953608906, |
|
"learning_rate": 3.408429731701635e-07, |
|
"logits/chosen": -3.636444091796875, |
|
"logits/rejected": -3.614245891571045, |
|
"logps/chosen": -664.00341796875, |
|
"logps/rejected": -5503.0537109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.150538682937622, |
|
"rewards/margins": 45.41934585571289, |
|
"rewards/rejected": -48.56988525390625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 0.0013414969188062405, |
|
"learning_rate": 3.144813424636031e-07, |
|
"logits/chosen": -3.788306713104248, |
|
"logits/rejected": -3.686079740524292, |
|
"logps/chosen": -791.1682739257812, |
|
"logps/rejected": -5721.5634765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.1505842208862305, |
|
"rewards/margins": 46.614662170410156, |
|
"rewards/rejected": -50.7652473449707, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"eval_logits/chosen": -3.5867350101470947, |
|
"eval_logits/rejected": -3.5067942142486572, |
|
"eval_logps/chosen": -561.57568359375, |
|
"eval_logps/rejected": -5161.087890625, |
|
"eval_loss": 0.006992733106017113, |
|
"eval_rewards/accuracies": 0.9939516186714172, |
|
"eval_rewards/chosen": -1.9459394216537476, |
|
"eval_rewards/margins": 43.25704574584961, |
|
"eval_rewards/rejected": -45.2029914855957, |
|
"eval_runtime": 191.7726, |
|
"eval_samples_per_second": 20.363, |
|
"eval_steps_per_second": 0.323, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 0.0004138099071654368, |
|
"learning_rate": 2.8732590479375165e-07, |
|
"logits/chosen": -3.556847333908081, |
|
"logits/rejected": -3.5835862159729004, |
|
"logps/chosen": -528.8604736328125, |
|
"logps/rejected": -5157.8740234375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7568155527114868, |
|
"rewards/margins": 43.957759857177734, |
|
"rewards/rejected": -45.714576721191406, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.6113744075829384, |
|
"grad_norm": 0.0016286137021698196, |
|
"learning_rate": 2.597109611334169e-07, |
|
"logits/chosen": -3.579390287399292, |
|
"logits/rejected": -3.6478075981140137, |
|
"logps/chosen": -520.5675048828125, |
|
"logps/rejected": -5432.5673828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6220014095306396, |
|
"rewards/margins": 46.55379867553711, |
|
"rewards/rejected": -48.17579650878906, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 0.00799320909391895, |
|
"learning_rate": 2.3197646927086694e-07, |
|
"logits/chosen": -3.5350117683410645, |
|
"logits/rejected": -3.6110050678253174, |
|
"logps/chosen": -534.5997314453125, |
|
"logps/rejected": -5420.73583984375, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6480720043182373, |
|
"rewards/margins": 46.55036163330078, |
|
"rewards/rejected": -48.19843292236328, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8009478672985781, |
|
"grad_norm": 0.0014081828819370304, |
|
"learning_rate": 2.0446385870993467e-07, |
|
"logits/chosen": -3.5267558097839355, |
|
"logits/rejected": -3.5355076789855957, |
|
"logps/chosen": -524.6720581054688, |
|
"logps/rejected": -5069.0888671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4387648105621338, |
|
"rewards/margins": 43.29344177246094, |
|
"rewards/rejected": -44.73220443725586, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 0.018000801767423476, |
|
"learning_rate": 1.775118274523545e-07, |
|
"logits/chosen": -3.5183377265930176, |
|
"logits/rejected": -3.5119102001190186, |
|
"logps/chosen": -486.629150390625, |
|
"logps/rejected": -5021.52490234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3421844244003296, |
|
"rewards/margins": 42.632965087890625, |
|
"rewards/rejected": -43.97514724731445, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": -3.502014398574829, |
|
"eval_logits/rejected": -3.422856092453003, |
|
"eval_logps/chosen": -512.3704223632812, |
|
"eval_logps/rejected": -5067.64501953125, |
|
"eval_loss": 0.004733214620500803, |
|
"eval_rewards/accuracies": 0.9959677457809448, |
|
"eval_rewards/chosen": -1.4538869857788086, |
|
"eval_rewards/margins": 42.814674377441406, |
|
"eval_rewards/rejected": -44.26856231689453, |
|
"eval_runtime": 194.1121, |
|
"eval_samples_per_second": 20.117, |
|
"eval_steps_per_second": 0.319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"grad_norm": 0.9312964869423628, |
|
"learning_rate": 1.514521724066537e-07, |
|
"logits/chosen": -3.540240526199341, |
|
"logits/rejected": -3.5632777214050293, |
|
"logps/chosen": -531.4307861328125, |
|
"logps/rejected": -5061.63818359375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.486299753189087, |
|
"rewards/margins": 42.955726623535156, |
|
"rewards/rejected": -44.44202423095703, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.085308056872038, |
|
"grad_norm": 0.029566978048640967, |
|
"learning_rate": 1.266057047539568e-07, |
|
"logits/chosen": -3.5052971839904785, |
|
"logits/rejected": -3.5332977771759033, |
|
"logps/chosen": -477.3848571777344, |
|
"logps/rejected": -5269.00390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4086942672729492, |
|
"rewards/margins": 45.027523040771484, |
|
"rewards/rejected": -46.43621826171875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1800947867298577, |
|
"grad_norm": 0.0005556188331340245, |
|
"learning_rate": 1.032783005551884e-07, |
|
"logits/chosen": -3.5509438514709473, |
|
"logits/rejected": -3.5611331462860107, |
|
"logps/chosen": -473.364501953125, |
|
"logps/rejected": -4865.369140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.312354326248169, |
|
"rewards/margins": 41.259403228759766, |
|
"rewards/rejected": -42.57175827026367, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.2748815165876777, |
|
"grad_norm": 0.005629678669869344, |
|
"learning_rate": 8.175713521924976e-08, |
|
"logits/chosen": -3.5678086280822754, |
|
"logits/rejected": -3.5121123790740967, |
|
"logps/chosen": -496.83258056640625, |
|
"logps/rejected": -5081.9599609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4451147317886353, |
|
"rewards/margins": 43.37391662597656, |
|
"rewards/rejected": -44.81903839111328, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"grad_norm": 0.0012113886351427462, |
|
"learning_rate": 6.230714818829733e-08, |
|
"logits/chosen": -3.530911922454834, |
|
"logits/rejected": -3.5102057456970215, |
|
"logps/chosen": -484.5502014160156, |
|
"logps/rejected": -5412.3271484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.327695608139038, |
|
"rewards/margins": 46.98969268798828, |
|
"rewards/rejected": -48.31739044189453, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"eval_logits/chosen": -3.5510308742523193, |
|
"eval_logits/rejected": -3.444518566131592, |
|
"eval_logps/chosen": -512.2269287109375, |
|
"eval_logps/rejected": -5116.15771484375, |
|
"eval_loss": 0.005008448380976915, |
|
"eval_rewards/accuracies": 0.9959677457809448, |
|
"eval_rewards/chosen": -1.4524519443511963, |
|
"eval_rewards/margins": 43.301239013671875, |
|
"eval_rewards/rejected": -44.753692626953125, |
|
"eval_runtime": 192.1218, |
|
"eval_samples_per_second": 20.326, |
|
"eval_steps_per_second": 0.323, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.4644549763033177, |
|
"grad_norm": 0.005272804838769864, |
|
"learning_rate": 4.516778136213037e-08, |
|
"logits/chosen": -3.5464816093444824, |
|
"logits/rejected": -3.532754898071289, |
|
"logps/chosen": -474.98077392578125, |
|
"logps/rejected": -5214.1748046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3641650676727295, |
|
"rewards/margins": 44.85725021362305, |
|
"rewards/rejected": -46.22141647338867, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5592417061611377, |
|
"grad_norm": 0.01760309981671165, |
|
"learning_rate": 3.055003141378948e-08, |
|
"logits/chosen": -3.5305237770080566, |
|
"logits/rejected": -3.543522357940674, |
|
"logps/chosen": -502.1796875, |
|
"logps/rejected": -5842.8251953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2657973766326904, |
|
"rewards/margins": 50.87003707885742, |
|
"rewards/rejected": -52.135841369628906, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.654028436018957, |
|
"grad_norm": 0.01345213655983596, |
|
"learning_rate": 1.8633852284264508e-08, |
|
"logits/chosen": -3.5437607765197754, |
|
"logits/rejected": -3.537663221359253, |
|
"logps/chosen": -519.03759765625, |
|
"logps/rejected": -5507.5615234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3319001197814941, |
|
"rewards/margins": 47.102291107177734, |
|
"rewards/rejected": -48.4341926574707, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.748815165876777, |
|
"grad_norm": 5.9654408780918595e-05, |
|
"learning_rate": 9.56593983327919e-09, |
|
"logits/chosen": -3.5722999572753906, |
|
"logits/rejected": -3.5434532165527344, |
|
"logps/chosen": -525.2794189453125, |
|
"logps/rejected": -5359.7451171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4624111652374268, |
|
"rewards/margins": 46.21337890625, |
|
"rewards/rejected": -47.675785064697266, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"grad_norm": 0.0012624104591569302, |
|
"learning_rate": 3.4579259185321398e-09, |
|
"logits/chosen": -3.5550761222839355, |
|
"logits/rejected": -3.541923999786377, |
|
"logps/chosen": -513.0765380859375, |
|
"logps/rejected": -5235.28759765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3970950841903687, |
|
"rewards/margins": 45.181175231933594, |
|
"rewards/rejected": -46.578269958496094, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"eval_logits/chosen": -3.5504369735717773, |
|
"eval_logits/rejected": -3.444122552871704, |
|
"eval_logps/chosen": -512.8049926757812, |
|
"eval_logps/rejected": -5128.248046875, |
|
"eval_loss": 0.004975645802915096, |
|
"eval_rewards/accuracies": 0.9959677457809448, |
|
"eval_rewards/chosen": -1.4582326412200928, |
|
"eval_rewards/margins": 43.41635513305664, |
|
"eval_rewards/rejected": -44.87459182739258, |
|
"eval_runtime": 192.6295, |
|
"eval_samples_per_second": 20.272, |
|
"eval_steps_per_second": 0.322, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.938388625592417, |
|
"grad_norm": 0.0005953504074610172, |
|
"learning_rate": 3.850041354441502e-10, |
|
"logits/chosen": -3.5716750621795654, |
|
"logits/rejected": -3.5102698802948, |
|
"logps/chosen": -509.0469665527344, |
|
"logps/rejected": -4801.1611328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4760140180587769, |
|
"rewards/margins": 40.71800994873047, |
|
"rewards/rejected": -42.19402313232422, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.985781990521327, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04083177362173292, |
|
"train_runtime": 9033.5209, |
|
"train_samples_per_second": 4.483, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|