|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 500, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 26.42055892944336, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": 0.7578125, |
|
"logits/rejected": 0.97265625, |
|
"logps/chosen": -284.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.6958, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": 0.00531005859375, |
|
"rewards/margins": 0.0027618408203125, |
|
"rewards/rejected": 0.0025177001953125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 25.812650680541992, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": 0.63671875, |
|
"logits/rejected": 0.65234375, |
|
"logps/chosen": -382.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.0172119140625, |
|
"rewards/margins": 0.01953125, |
|
"rewards/rejected": -0.002410888671875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 28.49739646911621, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 0.8203125, |
|
"logits/rejected": 0.7421875, |
|
"logps/chosen": -304.0, |
|
"logps/rejected": -260.0, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.00148773193359375, |
|
"rewards/margins": -0.005279541015625, |
|
"rewards/rejected": 0.00677490234375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 25.176631927490234, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": 0.80078125, |
|
"logits/rejected": 0.98046875, |
|
"logps/chosen": -340.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0247802734375, |
|
"rewards/margins": 0.0751953125, |
|
"rewards/rejected": -0.05029296875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 23.213665008544922, |
|
"learning_rate": 9.999880027023293e-07, |
|
"logits/chosen": 0.8046875, |
|
"logits/rejected": 0.87109375, |
|
"logps/chosen": -322.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.08544921875, |
|
"rewards/margins": 0.1982421875, |
|
"rewards/rejected": -0.11328125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 22.0967960357666, |
|
"learning_rate": 9.995681577335256e-07, |
|
"logits/chosen": 0.79296875, |
|
"logits/rejected": 0.984375, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.6378, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.029541015625, |
|
"rewards/margins": 0.1767578125, |
|
"rewards/rejected": -0.1474609375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 22.011857986450195, |
|
"learning_rate": 9.985490234976131e-07, |
|
"logits/chosen": 0.75390625, |
|
"logits/rejected": 0.671875, |
|
"logps/chosen": -342.0, |
|
"logps/rejected": -264.0, |
|
"loss": 0.6117, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.09375, |
|
"rewards/margins": 0.443359375, |
|
"rewards/rejected": -0.349609375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 27.431547164916992, |
|
"learning_rate": 9.969318225629239e-07, |
|
"logits/chosen": 0.56640625, |
|
"logits/rejected": 0.6875, |
|
"logps/chosen": -388.0, |
|
"logps/rejected": -342.0, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.0264892578125, |
|
"rewards/margins": 0.451171875, |
|
"rewards/rejected": -0.423828125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 24.862520217895508, |
|
"learning_rate": 9.947184949473476e-07, |
|
"logits/chosen": 0.796875, |
|
"logits/rejected": 0.8359375, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -298.0, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.055419921875, |
|
"rewards/margins": 0.44921875, |
|
"rewards/rejected": -0.392578125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 23.47401237487793, |
|
"learning_rate": 9.919116957910565e-07, |
|
"logits/chosen": 0.4921875, |
|
"logits/rejected": 0.51953125, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -298.0, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.0224609375, |
|
"rewards/margins": 0.49609375, |
|
"rewards/rejected": -0.474609375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 24.995296478271484, |
|
"learning_rate": 9.88514792171362e-07, |
|
"logits/chosen": 0.671875, |
|
"logits/rejected": 0.6796875, |
|
"logps/chosen": -330.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06494140625, |
|
"rewards/margins": 0.341796875, |
|
"rewards/rejected": -0.408203125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 22.512697219848633, |
|
"learning_rate": 9.845318590635185e-07, |
|
"logits/chosen": 0.5703125, |
|
"logits/rejected": 0.62109375, |
|
"logps/chosen": -340.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0174560546875, |
|
"rewards/margins": 0.5390625, |
|
"rewards/rejected": -0.55859375, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 26.06734848022461, |
|
"learning_rate": 9.799676744523238e-07, |
|
"logits/chosen": 0.54296875, |
|
"logits/rejected": 0.62890625, |
|
"logps/chosen": -348.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0166015625, |
|
"rewards/margins": 0.55078125, |
|
"rewards/rejected": -0.56640625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 21.467668533325195, |
|
"learning_rate": 9.748277136003789e-07, |
|
"logits/chosen": 0.62890625, |
|
"logits/rejected": 0.8828125, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.041015625, |
|
"rewards/margins": 0.369140625, |
|
"rewards/rejected": -0.41015625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 24.102495193481445, |
|
"learning_rate": 9.691181424798824e-07, |
|
"logits/chosen": 0.62109375, |
|
"logits/rejected": 0.66796875, |
|
"logps/chosen": -298.0, |
|
"logps/rejected": -276.0, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.05615234375, |
|
"rewards/margins": 0.423828125, |
|
"rewards/rejected": -0.3671875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 22.33710479736328, |
|
"learning_rate": 9.628458103758402e-07, |
|
"logits/chosen": 0.3828125, |
|
"logits/rejected": 0.5234375, |
|
"logps/chosen": -348.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.00665283203125, |
|
"rewards/margins": 0.44921875, |
|
"rewards/rejected": -0.455078125, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 21.965810775756836, |
|
"learning_rate": 9.560182416695637e-07, |
|
"logits/chosen": 0.490234375, |
|
"logits/rejected": 0.5, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -306.0, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0615234375, |
|
"rewards/margins": 0.28515625, |
|
"rewards/rejected": -0.34765625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 23.698848724365234, |
|
"learning_rate": 9.486436268123111e-07, |
|
"logits/chosen": 0.5390625, |
|
"logits/rejected": 0.6015625, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0267333984375, |
|
"rewards/margins": 0.390625, |
|
"rewards/rejected": -0.41796875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 20.521411895751953, |
|
"learning_rate": 9.40730812499903e-07, |
|
"logits/chosen": 0.6953125, |
|
"logits/rejected": 0.60546875, |
|
"logps/chosen": -322.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.005859375, |
|
"rewards/margins": 0.482421875, |
|
"rewards/rejected": -0.48828125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 25.220121383666992, |
|
"learning_rate": 9.322892910600958e-07, |
|
"logits/chosen": 0.73046875, |
|
"logits/rejected": 0.9765625, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0478515625, |
|
"rewards/margins": 0.52734375, |
|
"rewards/rejected": -0.478515625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 26.859111785888672, |
|
"learning_rate": 9.233291890654476e-07, |
|
"logits/chosen": 0.458984375, |
|
"logits/rejected": 0.498046875, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.5686, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.04443359375, |
|
"rewards/margins": 0.62890625, |
|
"rewards/rejected": -0.671875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 19.048847198486328, |
|
"learning_rate": 9.138612551853332e-07, |
|
"logits/chosen": 0.427734375, |
|
"logits/rejected": 0.51953125, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1962890625, |
|
"rewards/margins": 0.458984375, |
|
"rewards/rejected": -0.65625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 29.931427001953125, |
|
"learning_rate": 9.03896847291683e-07, |
|
"logits/chosen": 0.498046875, |
|
"logits/rejected": 0.490234375, |
|
"logps/chosen": -368.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06884765625, |
|
"rewards/margins": 0.54296875, |
|
"rewards/rejected": -0.61328125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 24.653114318847656, |
|
"learning_rate": 8.934479188339137e-07, |
|
"logits/chosen": 0.515625, |
|
"logits/rejected": 0.384765625, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.53125, |
|
"rewards/rejected": -0.6484375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 27.315216064453125, |
|
"learning_rate": 8.825270044993962e-07, |
|
"logits/chosen": 0.447265625, |
|
"logits/rejected": 0.71875, |
|
"logps/chosen": -394.0, |
|
"logps/rejected": -350.0, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.056640625, |
|
"rewards/margins": 0.76953125, |
|
"rewards/rejected": -0.828125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 26.475204467773438, |
|
"learning_rate": 8.711472051766605e-07, |
|
"logits/chosen": 0.390625, |
|
"logits/rejected": 0.466796875, |
|
"logps/chosen": -322.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.162109375, |
|
"rewards/margins": 0.59375, |
|
"rewards/rejected": -0.7578125, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 24.804954528808594, |
|
"learning_rate": 8.593221722393789e-07, |
|
"logits/chosen": 0.419921875, |
|
"logits/rejected": 0.443359375, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.5661, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.16796875, |
|
"rewards/margins": 0.63671875, |
|
"rewards/rejected": -0.8046875, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 26.523075103759766, |
|
"learning_rate": 8.470660911699782e-07, |
|
"logits/chosen": 0.498046875, |
|
"logits/rejected": 0.640625, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.34375, |
|
"rewards/margins": 0.6171875, |
|
"rewards/rejected": -0.96484375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 18.05160140991211, |
|
"learning_rate": 8.343936645425276e-07, |
|
"logits/chosen": 0.60546875, |
|
"logits/rejected": 0.75390625, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.63671875, |
|
"rewards/margins": 0.59375, |
|
"rewards/rejected": -1.2265625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 30.55604362487793, |
|
"learning_rate": 8.213200943853158e-07, |
|
"logits/chosen": 0.275390625, |
|
"logits/rejected": 0.6015625, |
|
"logps/chosen": -312.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.453125, |
|
"rewards/margins": 0.341796875, |
|
"rewards/rejected": -0.796875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 25.680500030517578, |
|
"learning_rate": 8.07861063944276e-07, |
|
"logits/chosen": 0.421875, |
|
"logits/rejected": 0.423828125, |
|
"logps/chosen": -394.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.013427734375, |
|
"rewards/margins": 0.859375, |
|
"rewards/rejected": -0.84375, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 27.477874755859375, |
|
"learning_rate": 7.940327188691341e-07, |
|
"logits/chosen": 0.283203125, |
|
"logits/rejected": 0.326171875, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2177734375, |
|
"rewards/margins": 0.59765625, |
|
"rewards/rejected": -0.81640625, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 23.42730712890625, |
|
"learning_rate": 7.798516478448514e-07, |
|
"logits/chosen": 0.2490234375, |
|
"logits/rejected": 0.25390625, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.05810546875, |
|
"rewards/margins": 0.44921875, |
|
"rewards/rejected": -0.5078125, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 24.55620574951172, |
|
"learning_rate": 7.653348626915957e-07, |
|
"logits/chosen": 0.302734375, |
|
"logits/rejected": 0.3125, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -298.0, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1396484375, |
|
"rewards/margins": 0.51953125, |
|
"rewards/rejected": -0.66015625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 23.369258880615234, |
|
"learning_rate": 7.504997779571132e-07, |
|
"logits/chosen": 0.53515625, |
|
"logits/rejected": 0.5546875, |
|
"logps/chosen": -350.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.291015625, |
|
"rewards/margins": 0.443359375, |
|
"rewards/rejected": -0.734375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 26.217023849487305, |
|
"learning_rate": 7.353641900259823e-07, |
|
"logits/chosen": 0.3203125, |
|
"logits/rejected": 0.2138671875, |
|
"logps/chosen": -368.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.345703125, |
|
"rewards/margins": 0.63671875, |
|
"rewards/rejected": -0.984375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 26.125743865966797, |
|
"learning_rate": 7.199462557708097e-07, |
|
"logits/chosen": 0.265625, |
|
"logits/rejected": 0.455078125, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -336.0, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.23828125, |
|
"rewards/margins": 0.6015625, |
|
"rewards/rejected": -0.83984375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 25.499277114868164, |
|
"learning_rate": 7.042644707709815e-07, |
|
"logits/chosen": 0.287109375, |
|
"logits/rejected": 0.423828125, |
|
"logps/chosen": -312.0, |
|
"logps/rejected": -251.0, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.275390625, |
|
"rewards/margins": 0.56640625, |
|
"rewards/rejected": -0.83984375, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 24.560373306274414, |
|
"learning_rate": 6.883376471250955e-07, |
|
"logits/chosen": 0.388671875, |
|
"logits/rejected": 0.263671875, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.244140625, |
|
"rewards/margins": 0.50390625, |
|
"rewards/rejected": -0.75, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 27.33049774169922, |
|
"learning_rate": 6.72184890883692e-07, |
|
"logits/chosen": 0.349609375, |
|
"logits/rejected": 0.4375, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0361328125, |
|
"rewards/margins": 0.546875, |
|
"rewards/rejected": -0.58203125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 24.623563766479492, |
|
"learning_rate": 6.558255791293571e-07, |
|
"logits/chosen": 0.453125, |
|
"logits/rejected": 0.44921875, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.07958984375, |
|
"rewards/margins": 0.72265625, |
|
"rewards/rejected": -0.8046875, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 31.462345123291016, |
|
"learning_rate": 6.392793367316904e-07, |
|
"logits/chosen": 0.3984375, |
|
"logits/rejected": 0.3203125, |
|
"logps/chosen": -360.0, |
|
"logps/rejected": -316.0, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0059814453125, |
|
"rewards/margins": 0.70703125, |
|
"rewards/rejected": -0.7109375, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 29.43328285217285, |
|
"learning_rate": 6.225660128050247e-07, |
|
"logits/chosen": 0.373046875, |
|
"logits/rejected": 0.515625, |
|
"logps/chosen": -308.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.376953125, |
|
"rewards/margins": 0.48828125, |
|
"rewards/rejected": -0.8671875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 25.328622817993164, |
|
"learning_rate": 6.057056568971383e-07, |
|
"logits/chosen": 0.4140625, |
|
"logits/rejected": 0.46484375, |
|
"logps/chosen": -408.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.474609375, |
|
"rewards/margins": 0.89453125, |
|
"rewards/rejected": -1.3671875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 20.754179000854492, |
|
"learning_rate": 5.887184949375242e-07, |
|
"logits/chosen": 0.48828125, |
|
"logits/rejected": 0.388671875, |
|
"logps/chosen": -350.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.50390625, |
|
"rewards/margins": 0.7578125, |
|
"rewards/rejected": -1.265625, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 25.111303329467773, |
|
"learning_rate": 5.716249049740689e-07, |
|
"logits/chosen": 0.41796875, |
|
"logits/rejected": 0.578125, |
|
"logps/chosen": -368.0, |
|
"logps/rejected": -336.0, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1806640625, |
|
"rewards/margins": 0.79296875, |
|
"rewards/rejected": -0.97265625, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 20.85157585144043, |
|
"learning_rate": 5.544453927272492e-07, |
|
"logits/chosen": 0.44140625, |
|
"logits/rejected": 0.466796875, |
|
"logps/chosen": -348.0, |
|
"logps/rejected": -330.0, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.103515625, |
|
"rewards/margins": 0.7578125, |
|
"rewards/rejected": -0.86328125, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 27.79718017578125, |
|
"learning_rate": 5.372005669911693e-07, |
|
"logits/chosen": 0.50390625, |
|
"logits/rejected": 0.5546875, |
|
"logps/chosen": -346.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07666015625, |
|
"rewards/margins": 0.61328125, |
|
"rewards/rejected": -0.69140625, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 25.19495964050293, |
|
"learning_rate": 5.199111149109497e-07, |
|
"logits/chosen": 0.302734375, |
|
"logits/rejected": 0.49609375, |
|
"logps/chosen": -330.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.03759765625, |
|
"rewards/margins": 0.89453125, |
|
"rewards/rejected": -0.93359375, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 23.607749938964844, |
|
"learning_rate": 5.025977771661266e-07, |
|
"logits/chosen": 0.494140625, |
|
"logits/rejected": 0.50390625, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.330078125, |
|
"rewards/margins": 0.4296875, |
|
"rewards/rejected": -0.7578125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 28.496578216552734, |
|
"learning_rate": 4.852813230898279e-07, |
|
"logits/chosen": 0.462890625, |
|
"logits/rejected": 0.498046875, |
|
"logps/chosen": -374.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.050048828125, |
|
"rewards/margins": 0.6953125, |
|
"rewards/rejected": -0.64453125, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 23.15301513671875, |
|
"learning_rate": 4.679825257535794e-07, |
|
"logits/chosen": 0.439453125, |
|
"logits/rejected": 0.43359375, |
|
"logps/chosen": -366.0, |
|
"logps/rejected": -334.0, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0294189453125, |
|
"rewards/margins": 0.71484375, |
|
"rewards/rejected": -0.68359375, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 22.724777221679688, |
|
"learning_rate": 4.507221370476223e-07, |
|
"logits/chosen": 0.4609375, |
|
"logits/rejected": 0.59765625, |
|
"logps/chosen": -336.0, |
|
"logps/rejected": -328.0, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02880859375, |
|
"rewards/margins": 0.671875, |
|
"rewards/rejected": -0.640625, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 20.862764358520508, |
|
"learning_rate": 4.3352086278664377e-07, |
|
"logits/chosen": 0.4921875, |
|
"logits/rejected": 0.515625, |
|
"logps/chosen": -318.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.051025390625, |
|
"rewards/margins": 0.7109375, |
|
"rewards/rejected": -0.66015625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 23.62275505065918, |
|
"learning_rate": 4.1639933787077854e-07, |
|
"logits/chosen": 0.46484375, |
|
"logits/rejected": 0.609375, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -304.0, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01165771484375, |
|
"rewards/margins": 0.6640625, |
|
"rewards/rejected": -0.6796875, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 26.077327728271484, |
|
"learning_rate": 3.9937810153168016e-07, |
|
"logits/chosen": 0.5078125, |
|
"logits/rejected": 0.427734375, |
|
"logps/chosen": -374.0, |
|
"logps/rejected": -334.0, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2197265625, |
|
"rewards/margins": 0.609375, |
|
"rewards/rejected": -0.828125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 25.223384857177734, |
|
"learning_rate": 3.8247757269335957e-07, |
|
"logits/chosen": 0.37890625, |
|
"logits/rejected": 0.474609375, |
|
"logps/chosen": -334.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1943359375, |
|
"rewards/margins": 0.72265625, |
|
"rewards/rejected": -0.91796875, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 23.150033950805664, |
|
"learning_rate": 3.657180254773445e-07, |
|
"logits/chosen": 0.421875, |
|
"logits/rejected": 0.546875, |
|
"logps/chosen": -334.0, |
|
"logps/rejected": -274.0, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.359375, |
|
"rewards/margins": 0.828125, |
|
"rewards/rejected": -1.1875, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 26.779611587524414, |
|
"learning_rate": 3.4911956488154694e-07, |
|
"logits/chosen": 0.48828125, |
|
"logits/rejected": 0.625, |
|
"logps/chosen": -360.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4296875, |
|
"rewards/margins": 0.73046875, |
|
"rewards/rejected": -1.15625, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 30.289024353027344, |
|
"learning_rate": 3.327021026620137e-07, |
|
"logits/chosen": 0.53125, |
|
"logits/rejected": 0.5234375, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -328.0, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4921875, |
|
"rewards/margins": 0.734375, |
|
"rewards/rejected": -1.2265625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 26.568483352661133, |
|
"learning_rate": 3.16485333446493e-07, |
|
"logits/chosen": 0.396484375, |
|
"logits/rejected": 0.81640625, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -368.0, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.58984375, |
|
"rewards/margins": 0.76953125, |
|
"rewards/rejected": -1.359375, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 23.833829879760742, |
|
"learning_rate": 3.004887111084704e-07, |
|
"logits/chosen": 0.314453125, |
|
"logits/rejected": 0.37890625, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.373046875, |
|
"rewards/margins": 0.8125, |
|
"rewards/rejected": -1.1875, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 24.47187042236328, |
|
"learning_rate": 2.8473142543001816e-07, |
|
"logits/chosen": 0.45703125, |
|
"logits/rejected": 0.455078125, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.55859375, |
|
"rewards/margins": 0.70703125, |
|
"rewards/rejected": -1.265625, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 29.288145065307617, |
|
"learning_rate": 2.6923237908145226e-07, |
|
"logits/chosen": 0.458984375, |
|
"logits/rejected": 0.494140625, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.396484375, |
|
"rewards/margins": 0.76171875, |
|
"rewards/rejected": -1.15625, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 25.19725799560547, |
|
"learning_rate": 2.540101649454119e-07, |
|
"logits/chosen": 0.6171875, |
|
"logits/rejected": 0.5859375, |
|
"logps/chosen": -346.0, |
|
"logps/rejected": -316.0, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.302734375, |
|
"rewards/margins": 0.8984375, |
|
"rewards/rejected": -1.1953125, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 23.4490909576416, |
|
"learning_rate": 2.3908304381256603e-07, |
|
"logits/chosen": 0.439453125, |
|
"logits/rejected": 0.69921875, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -298.0, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.515625, |
|
"rewards/margins": 0.625, |
|
"rewards/rejected": -1.140625, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 25.44082260131836, |
|
"learning_rate": 2.2446892247570255e-07, |
|
"logits/chosen": 0.458984375, |
|
"logits/rejected": 0.55859375, |
|
"logps/chosen": -334.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.216796875, |
|
"rewards/margins": 0.859375, |
|
"rewards/rejected": -1.078125, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 27.55499839782715, |
|
"learning_rate": 2.1018533224847633e-07, |
|
"logits/chosen": 0.51171875, |
|
"logits/rejected": 0.5, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.5717, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.171875, |
|
"rewards/margins": 0.7890625, |
|
"rewards/rejected": -0.9609375, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 24.051036834716797, |
|
"learning_rate": 1.9624940793459055e-07, |
|
"logits/chosen": 0.3125, |
|
"logits/rejected": 0.486328125, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03173828125, |
|
"rewards/margins": 0.69921875, |
|
"rewards/rejected": -0.734375, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 28.70475196838379, |
|
"learning_rate": 1.8267786727263424e-07, |
|
"logits/chosen": 0.58203125, |
|
"logits/rejected": 0.8046875, |
|
"logps/chosen": -292.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1865234375, |
|
"rewards/margins": 0.484375, |
|
"rewards/rejected": -0.671875, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 21.762775421142578, |
|
"learning_rate": 1.694869908812399e-07, |
|
"logits/chosen": 0.458984375, |
|
"logits/rejected": 0.419921875, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2578125, |
|
"rewards/margins": 0.53515625, |
|
"rewards/rejected": -0.7890625, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 24.395294189453125, |
|
"learning_rate": 1.5669260272861422e-07, |
|
"logits/chosen": 0.4140625, |
|
"logits/rejected": 0.33984375, |
|
"logps/chosen": -302.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.018310546875, |
|
"rewards/margins": 0.75, |
|
"rewards/rejected": -0.73046875, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 25.604717254638672, |
|
"learning_rate": 1.4431005114987483e-07, |
|
"logits/chosen": 0.56640625, |
|
"logits/rejected": 0.466796875, |
|
"logps/chosen": -360.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.09716796875, |
|
"rewards/margins": 0.6796875, |
|
"rewards/rejected": -0.77734375, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 27.11602783203125, |
|
"learning_rate": 1.323541904349636e-07, |
|
"logits/chosen": 0.5234375, |
|
"logits/rejected": 0.58984375, |
|
"logps/chosen": -362.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1513671875, |
|
"rewards/margins": 0.74609375, |
|
"rewards/rejected": -0.8984375, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 25.82685661315918, |
|
"learning_rate": 1.2083936300922237e-07, |
|
"logits/chosen": 0.60546875, |
|
"logits/rejected": 0.6015625, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -344.0, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.640625, |
|
"rewards/rejected": -0.75, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 24.40570068359375, |
|
"learning_rate": 1.0977938222801004e-07, |
|
"logits/chosen": 0.578125, |
|
"logits/rejected": 0.40234375, |
|
"logps/chosen": -334.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.19140625, |
|
"rewards/margins": 0.734375, |
|
"rewards/rejected": -0.92578125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 22.45295524597168, |
|
"learning_rate": 9.918751580599999e-08, |
|
"logits/chosen": 0.54296875, |
|
"logits/rejected": 0.53515625, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.09814453125, |
|
"rewards/margins": 0.69921875, |
|
"rewards/rejected": -0.796875, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 23.214521408081055, |
|
"learning_rate": 8.907646990103495e-08, |
|
"logits/chosen": 0.59375, |
|
"logits/rejected": 0.5625, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.212890625, |
|
"rewards/margins": 0.69140625, |
|
"rewards/rejected": -0.90234375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 21.733230590820312, |
|
"learning_rate": 7.945837387163424e-08, |
|
"logits/chosen": 0.5234375, |
|
"logits/rejected": 0.53125, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -350.0, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.216796875, |
|
"rewards/margins": 0.80078125, |
|
"rewards/rejected": -1.015625, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 28.44561004638672, |
|
"learning_rate": 7.034476572643854e-08, |
|
"logits/chosen": 0.609375, |
|
"logits/rejected": 0.625, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.220703125, |
|
"rewards/margins": 0.625, |
|
"rewards/rejected": -0.84765625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 24.274005889892578, |
|
"learning_rate": 6.174657828304541e-08, |
|
"logits/chosen": 0.46875, |
|
"logits/rejected": 0.55078125, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1640625, |
|
"rewards/margins": 0.74609375, |
|
"rewards/rejected": -0.91015625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 18.96664810180664, |
|
"learning_rate": 5.36741260528415e-08, |
|
"logits/chosen": 0.423828125, |
|
"logits/rejected": 0.310546875, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.058349609375, |
|
"rewards/margins": 0.97265625, |
|
"rewards/rejected": -1.03125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 21.882705688476562, |
|
"learning_rate": 4.613709286756412e-08, |
|
"logits/chosen": 0.4921875, |
|
"logits/rejected": 0.443359375, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.07568359375, |
|
"rewards/margins": 0.85546875, |
|
"rewards/rejected": -0.9296875, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 16.712879180908203, |
|
"learning_rate": 3.914452026243509e-08, |
|
"logits/chosen": 0.482421875, |
|
"logits/rejected": 0.4765625, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1796875, |
|
"rewards/margins": 0.58984375, |
|
"rewards/rejected": -0.76953125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 23.453189849853516, |
|
"learning_rate": 3.270479662980247e-08, |
|
"logits/chosen": 0.57421875, |
|
"logits/rejected": 0.361328125, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0693359375, |
|
"rewards/margins": 0.703125, |
|
"rewards/rejected": -0.7734375, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 25.8822021484375, |
|
"learning_rate": 2.6825647156302865e-08, |
|
"logits/chosen": 0.60546875, |
|
"logits/rejected": 0.66015625, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -360.0, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2099609375, |
|
"rewards/margins": 0.70703125, |
|
"rewards/rejected": -0.9140625, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 29.531282424926758, |
|
"learning_rate": 2.151412455561441e-08, |
|
"logits/chosen": 0.5234375, |
|
"logits/rejected": 0.61328125, |
|
"logps/chosen": -342.0, |
|
"logps/rejected": -298.0, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.224609375, |
|
"rewards/margins": 0.75390625, |
|
"rewards/rejected": -0.9765625, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 27.597761154174805, |
|
"learning_rate": 1.6776600607918356e-08, |
|
"logits/chosen": 0.5078125, |
|
"logits/rejected": 0.41015625, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2333984375, |
|
"rewards/margins": 0.71875, |
|
"rewards/rejected": -0.953125, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 22.400959014892578, |
|
"learning_rate": 1.2618758516218186e-08, |
|
"logits/chosen": 0.384765625, |
|
"logits/rejected": 0.443359375, |
|
"logps/chosen": -348.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09814453125, |
|
"rewards/margins": 0.7734375, |
|
"rewards/rejected": -0.87109375, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 23.831453323364258, |
|
"learning_rate": 9.045586088686496e-09, |
|
"logits/chosen": 0.443359375, |
|
"logits/rejected": 0.427734375, |
|
"logps/chosen": -372.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0031890869140625, |
|
"rewards/margins": 0.65625, |
|
"rewards/rejected": -0.65234375, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 24.923038482666016, |
|
"learning_rate": 6.06136975521715e-09, |
|
"logits/chosen": 0.3671875, |
|
"logits/rejected": 0.40625, |
|
"logps/chosen": -304.0, |
|
"logps/rejected": -274.0, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1318359375, |
|
"rewards/margins": 0.81640625, |
|
"rewards/rejected": -0.94921875, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 24.579931259155273, |
|
"learning_rate": 3.6696894253614442e-09, |
|
"logits/chosen": 0.375, |
|
"logits/rejected": 0.3671875, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.572, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0146484375, |
|
"rewards/margins": 0.82421875, |
|
"rewards/rejected": -0.83984375, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 23.33355712890625, |
|
"learning_rate": 1.8734141938160918e-09, |
|
"logits/chosen": 0.640625, |
|
"logits/rejected": 0.64453125, |
|
"logps/chosen": -350.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.0311279296875, |
|
"rewards/margins": 0.796875, |
|
"rewards/rejected": -0.828125, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 24.21430778503418, |
|
"learning_rate": 6.746988986155999e-10, |
|
"logits/chosen": 0.50390625, |
|
"logits/rejected": 0.443359375, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.07275390625, |
|
"rewards/margins": 0.77734375, |
|
"rewards/rejected": -0.8515625, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 23.800779342651367, |
|
"learning_rate": 7.498153615653758e-11, |
|
"logits/chosen": 0.515625, |
|
"logits/rejected": 0.447265625, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15625, |
|
"rewards/margins": 0.72265625, |
|
"rewards/rejected": -0.87890625, |
|
"step": 950 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|