|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3822, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005232862375719519, |
|
"grad_norm": 10.351250771822766, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -12.5625, |
|
"logits/rejected": -11.6875, |
|
"logps/chosen": -430.0, |
|
"logps/rejected": -460.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0052328623757195184, |
|
"grad_norm": 9.844286946232794, |
|
"learning_rate": 1.3054830287206267e-06, |
|
"logits/chosen": -11.3125, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.2083333283662796, |
|
"rewards/chosen": 0.003997802734375, |
|
"rewards/margins": 0.0033111572265625, |
|
"rewards/rejected": 0.000701904296875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 8.840907831144664, |
|
"learning_rate": 2.6109660574412534e-06, |
|
"logits/chosen": -11.0625, |
|
"logits/rejected": -11.0625, |
|
"logps/chosen": -264.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.0019989013671875, |
|
"rewards/margins": 0.009521484375, |
|
"rewards/rejected": -0.01153564453125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 10.083186641323694, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -10.375, |
|
"logits/rejected": -10.3125, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.00970458984375, |
|
"rewards/margins": 0.023681640625, |
|
"rewards/rejected": -0.01397705078125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 9.068339095588007, |
|
"learning_rate": 5.221932114882507e-06, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.0625, |
|
"logps/chosen": -336.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.032470703125, |
|
"rewards/margins": 0.030517578125, |
|
"rewards/rejected": -0.06298828125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.026164311878597593, |
|
"grad_norm": 9.546615633761498, |
|
"learning_rate": 6.527415143603134e-06, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09912109375, |
|
"rewards/margins": 0.1376953125, |
|
"rewards/rejected": -0.2373046875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 9.19274633645371, |
|
"learning_rate": 7.832898172323761e-06, |
|
"logits/chosen": -12.4375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -334.0, |
|
"logps/rejected": -328.0, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3203125, |
|
"rewards/margins": 0.1455078125, |
|
"rewards/rejected": -0.466796875, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 10.83447468535562, |
|
"learning_rate": 9.138381201044387e-06, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.5625, |
|
"logps/chosen": -380.0, |
|
"logps/rejected": -340.0, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6796875, |
|
"rewards/margins": 0.287109375, |
|
"rewards/rejected": -0.96875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 9.13507791543036, |
|
"learning_rate": 1.0443864229765014e-05, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5703125, |
|
"rewards/margins": 0.283203125, |
|
"rewards/rejected": -0.8515625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 8.742369309742912, |
|
"learning_rate": 1.174934725848564e-05, |
|
"logits/chosen": -11.875, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -296.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.28515625, |
|
"rewards/margins": 0.208984375, |
|
"rewards/rejected": -0.494140625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 11.89346927211031, |
|
"learning_rate": 1.3054830287206268e-05, |
|
"logits/chosen": -11.875, |
|
"logits/rejected": -10.875, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.380859375, |
|
"rewards/margins": 0.27734375, |
|
"rewards/rejected": -0.65625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0575614861329147, |
|
"grad_norm": 7.722831940344756, |
|
"learning_rate": 1.4360313315926893e-05, |
|
"logits/chosen": -11.125, |
|
"logits/rejected": -10.625, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6328125, |
|
"rewards/margins": 0.443359375, |
|
"rewards/rejected": -1.078125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 10.557403681525182, |
|
"learning_rate": 1.5665796344647522e-05, |
|
"logits/chosen": -11.6875, |
|
"logits/rejected": -11.375, |
|
"logps/chosen": -422.0, |
|
"logps/rejected": -376.0, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.578125, |
|
"rewards/margins": 0.28515625, |
|
"rewards/rejected": -0.86328125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 8.52113738726324, |
|
"learning_rate": 1.6971279373368146e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -11.125, |
|
"logps/chosen": -282.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6796875, |
|
"rewards/margins": 0.2890625, |
|
"rewards/rejected": -0.96875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 10.818036746056942, |
|
"learning_rate": 1.8276762402088773e-05, |
|
"logits/chosen": -12.3125, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -362.0, |
|
"logps/rejected": -340.0, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0703125, |
|
"rewards/margins": 0.6015625, |
|
"rewards/rejected": -1.671875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 9.701498673533864, |
|
"learning_rate": 1.95822454308094e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -326.0, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.515625, |
|
"rewards/margins": 0.48046875, |
|
"rewards/rejected": -1.9921875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 9.972948367466696, |
|
"learning_rate": 2.0887728459530027e-05, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -13.5, |
|
"logps/chosen": -366.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.328125, |
|
"rewards/margins": 0.68359375, |
|
"rewards/rejected": -2.015625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08895866038723181, |
|
"grad_norm": 9.17511762621532, |
|
"learning_rate": 2.2193211488250655e-05, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -346.0, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.3359375, |
|
"rewards/margins": 0.1865234375, |
|
"rewards/rejected": -1.5234375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 6.965621587975365, |
|
"learning_rate": 2.349869451697128e-05, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -302.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.359375, |
|
"rewards/margins": 0.359375, |
|
"rewards/rejected": -1.71875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09942438513867086, |
|
"grad_norm": 9.553188606130272, |
|
"learning_rate": 2.4804177545691905e-05, |
|
"logits/chosen": -12.4375, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -416.0, |
|
"logps/rejected": -356.0, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7578125, |
|
"rewards/margins": 0.62890625, |
|
"rewards/rejected": -2.390625, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 9.121837612837107, |
|
"learning_rate": 2.6109660574412536e-05, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -360.0, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.859375, |
|
"rewards/margins": 0.38671875, |
|
"rewards/rejected": -2.25, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 8.059177993813114, |
|
"learning_rate": 2.741514360313316e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.796875, |
|
"rewards/margins": 0.306640625, |
|
"rewards/rejected": -2.109375, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 8.500755304845391, |
|
"learning_rate": 2.8720626631853787e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -354.0, |
|
"loss": 0.7723, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.4375, |
|
"rewards/margins": 0.578125, |
|
"rewards/rejected": -3.015625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12035583464154893, |
|
"grad_norm": 9.230475840269714, |
|
"learning_rate": 3.0026109660574414e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -386.0, |
|
"logps/rejected": -422.0, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.359375, |
|
"rewards/margins": 0.63671875, |
|
"rewards/rejected": -3.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 9.204392189192681, |
|
"learning_rate": 3.1331592689295045e-05, |
|
"logits/chosen": -10.875, |
|
"logits/rejected": -10.0, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -336.0, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.9765625, |
|
"rewards/margins": 0.65234375, |
|
"rewards/rejected": -2.640625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13082155939298795, |
|
"grad_norm": 9.749349255282869, |
|
"learning_rate": 3.263707571801567e-05, |
|
"logits/chosen": -11.0625, |
|
"logits/rejected": -10.4375, |
|
"logps/chosen": -388.0, |
|
"logps/rejected": -348.0, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0625, |
|
"rewards/margins": 0.439453125, |
|
"rewards/rejected": -2.5, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 10.257472173487507, |
|
"learning_rate": 3.394255874673629e-05, |
|
"logits/chosen": -10.9375, |
|
"logits/rejected": -10.4375, |
|
"logps/chosen": -368.0, |
|
"logps/rejected": -338.0, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0625, |
|
"rewards/margins": 0.68359375, |
|
"rewards/rejected": -2.75, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 6.958550084405995, |
|
"learning_rate": 3.524804177545692e-05, |
|
"logits/chosen": -9.875, |
|
"logits/rejected": -9.5625, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -362.0, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8125, |
|
"rewards/margins": 0.7109375, |
|
"rewards/rejected": -2.515625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 9.847805644914816, |
|
"learning_rate": 3.6553524804177546e-05, |
|
"logits/chosen": -11.625, |
|
"logits/rejected": -11.6875, |
|
"logps/chosen": -410.0, |
|
"logps/rejected": -382.0, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.75, |
|
"rewards/margins": 0.49609375, |
|
"rewards/rejected": -3.25, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15175300889586604, |
|
"grad_norm": 8.573440905919824, |
|
"learning_rate": 3.7859007832898173e-05, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -382.0, |
|
"logps/rejected": -336.0, |
|
"loss": 0.7394, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9609375, |
|
"rewards/margins": 0.62109375, |
|
"rewards/rejected": -2.578125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 8.80725505375811, |
|
"learning_rate": 3.91644908616188e-05, |
|
"logits/chosen": -13.75, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -380.0, |
|
"logps/rejected": -344.0, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.3125, |
|
"rewards/margins": 0.458984375, |
|
"rewards/rejected": -2.765625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16221873364730507, |
|
"grad_norm": 8.968657211210374, |
|
"learning_rate": 4.046997389033943e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -386.0, |
|
"logps/rejected": -346.0, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.4375, |
|
"rewards/margins": 0.578125, |
|
"rewards/rejected": -3.015625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 10.194843430881596, |
|
"learning_rate": 4.1775456919060055e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -442.0, |
|
"logps/rejected": -372.0, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.5625, |
|
"rewards/margins": 0.76171875, |
|
"rewards/rejected": -4.3125, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 10.313402119423792, |
|
"learning_rate": 4.308093994778068e-05, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -398.0, |
|
"logps/rejected": -364.0, |
|
"loss": 0.735, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.984375, |
|
"rewards/margins": 0.9375, |
|
"rewards/rejected": -4.90625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 9.173592771992633, |
|
"learning_rate": 4.438642297650131e-05, |
|
"logits/chosen": -11.1875, |
|
"logits/rejected": -10.8125, |
|
"logps/chosen": -406.0, |
|
"logps/rejected": -382.0, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.125, |
|
"rewards/margins": 0.8515625, |
|
"rewards/rejected": -4.96875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 8.34069764210929, |
|
"learning_rate": 4.5691906005221936e-05, |
|
"logits/chosen": -11.375, |
|
"logits/rejected": -11.25, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -360.0, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.59375, |
|
"rewards/margins": 0.87109375, |
|
"rewards/rejected": -5.46875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 11.451242673237575, |
|
"learning_rate": 4.699738903394256e-05, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -424.0, |
|
"logps/rejected": -404.0, |
|
"loss": 0.767, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -4.4375, |
|
"rewards/margins": 0.578125, |
|
"rewards/rejected": -5.03125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1936159079016222, |
|
"grad_norm": 8.706088083363465, |
|
"learning_rate": 4.830287206266319e-05, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -380.0, |
|
"logps/rejected": -362.0, |
|
"loss": 0.7948, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -4.4375, |
|
"rewards/margins": 0.55078125, |
|
"rewards/rejected": -5.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 8.71050562327642, |
|
"learning_rate": 4.960835509138381e-05, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -360.0, |
|
"logps/rejected": -402.0, |
|
"loss": 0.7885, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.40625, |
|
"rewards/margins": 0.79296875, |
|
"rewards/rejected": -5.21875, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 9.631202895027702, |
|
"learning_rate": 4.9999488859837295e-05, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -396.0, |
|
"loss": 0.8545, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.25, |
|
"rewards/margins": 0.93359375, |
|
"rewards/rejected": -5.1875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 12.175473256479712, |
|
"learning_rate": 4.999698536649904e-05, |
|
"logits/chosen": -14.5, |
|
"logits/rejected": -14.5, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -392.0, |
|
"loss": 0.7412, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -4.8125, |
|
"rewards/margins": 0.65234375, |
|
"rewards/rejected": -5.4375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21454735740450026, |
|
"grad_norm": 10.472391673320981, |
|
"learning_rate": 4.999239584575648e-05, |
|
"logits/chosen": -14.75, |
|
"logits/rejected": -14.875, |
|
"logps/chosen": -446.0, |
|
"logps/rejected": -438.0, |
|
"loss": 0.7897, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.84375, |
|
"rewards/margins": 0.298828125, |
|
"rewards/rejected": -5.15625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 10.549150549694106, |
|
"learning_rate": 4.9985720680610434e-05, |
|
"logits/chosen": -14.6875, |
|
"logits/rejected": -14.6875, |
|
"logps/chosen": -412.0, |
|
"logps/rejected": -390.0, |
|
"loss": 0.7813, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -4.625, |
|
"rewards/margins": 0.2734375, |
|
"rewards/rejected": -4.90625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2250130821559393, |
|
"grad_norm": 8.610558700109573, |
|
"learning_rate": 4.997696042811118e-05, |
|
"logits/chosen": -14.625, |
|
"logits/rejected": -14.8125, |
|
"logps/chosen": -416.0, |
|
"logps/rejected": -344.0, |
|
"loss": 0.7214, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -4.375, |
|
"rewards/margins": 0.59765625, |
|
"rewards/rejected": -4.96875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 9.047522311209711, |
|
"learning_rate": 4.996611581931193e-05, |
|
"logits/chosen": -14.125, |
|
"logits/rejected": -14.25, |
|
"logps/chosen": -440.0, |
|
"logps/rejected": -374.0, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -4.5625, |
|
"rewards/margins": 0.498046875, |
|
"rewards/rejected": -5.0625, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 10.207379585431303, |
|
"learning_rate": 4.995318775920787e-05, |
|
"logits/chosen": -13.5, |
|
"logits/rejected": -13.5, |
|
"logps/chosen": -384.0, |
|
"logps/rejected": -386.0, |
|
"loss": 0.7792, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -4.71875, |
|
"rewards/margins": 0.130859375, |
|
"rewards/rejected": -4.84375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 9.821006860178837, |
|
"learning_rate": 4.9938177326660587e-05, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -13.375, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -436.0, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -4.625, |
|
"rewards/margins": 1.046875, |
|
"rewards/rejected": -5.65625, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24594453165881738, |
|
"grad_norm": 10.675355174224261, |
|
"learning_rate": 4.99210857743081e-05, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -436.0, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -5.875, |
|
"rewards/margins": 0.72265625, |
|
"rewards/rejected": -6.59375, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 10.26484583598167, |
|
"learning_rate": 4.990191452846024e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -406.0, |
|
"logps/rejected": -396.0, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -5.34375, |
|
"rewards/margins": 0.66796875, |
|
"rewards/rejected": -6.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 9.035937614807631, |
|
"learning_rate": 4.988066518897971e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.3125, |
|
"logps/chosen": -464.0, |
|
"logps/rejected": -436.0, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -6.09375, |
|
"rewards/margins": 0.921875, |
|
"rewards/rejected": -7.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 6.87488260728394, |
|
"learning_rate": 4.985733952914852e-05, |
|
"logits/chosen": -15.5625, |
|
"logits/rejected": -15.375, |
|
"logps/chosen": -452.0, |
|
"logps/rejected": -438.0, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -7.0, |
|
"rewards/margins": 0.69921875, |
|
"rewards/rejected": -7.71875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 9.363373210153325, |
|
"learning_rate": 4.983193949552002e-05, |
|
"logits/chosen": -15.625, |
|
"logits/rejected": -15.9375, |
|
"logps/chosen": -450.0, |
|
"logps/rejected": -420.0, |
|
"loss": 0.7343, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -6.5, |
|
"rewards/margins": 0.458984375, |
|
"rewards/rejected": -6.9375, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 7.7281840818721, |
|
"learning_rate": 4.980446720775646e-05, |
|
"logits/chosen": -14.125, |
|
"logits/rejected": -14.25, |
|
"logps/chosen": -464.0, |
|
"logps/rejected": -474.0, |
|
"loss": 0.742, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -5.375, |
|
"rewards/margins": 0.58984375, |
|
"rewards/rejected": -5.9375, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2773417059131345, |
|
"grad_norm": 9.271525518013695, |
|
"learning_rate": 4.9774924958452084e-05, |
|
"logits/chosen": -14.5625, |
|
"logits/rejected": -14.75, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -408.0, |
|
"loss": 0.8633, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -6.3125, |
|
"rewards/margins": 0.265625, |
|
"rewards/rejected": -6.59375, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 11.416271168800474, |
|
"learning_rate": 4.974331521294186e-05, |
|
"logits/chosen": -14.75, |
|
"logits/rejected": -14.75, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -460.0, |
|
"loss": 0.7933, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -7.3125, |
|
"rewards/margins": 0.5625, |
|
"rewards/rejected": -7.90625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28780743066457354, |
|
"grad_norm": 7.787620742028088, |
|
"learning_rate": 4.97096406090957e-05, |
|
"logits/chosen": -14.3125, |
|
"logits/rejected": -14.125, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -468.0, |
|
"loss": 0.7545, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -6.125, |
|
"rewards/margins": 0.69921875, |
|
"rewards/rejected": -6.8125, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 8.382409522213168, |
|
"learning_rate": 4.96739039570983e-05, |
|
"logits/chosen": -14.25, |
|
"logits/rejected": -14.125, |
|
"logps/chosen": -444.0, |
|
"logps/rejected": -436.0, |
|
"loss": 0.7372, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.6875, |
|
"rewards/margins": 0.609375, |
|
"rewards/rejected": -6.3125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 9.779033425632107, |
|
"learning_rate": 4.963610823921471e-05, |
|
"logits/chosen": -14.625, |
|
"logits/rejected": -14.4375, |
|
"logps/chosen": -482.0, |
|
"logps/rejected": -434.0, |
|
"loss": 0.9479, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -7.125, |
|
"rewards/margins": -0.15625, |
|
"rewards/rejected": -6.96875, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 9.733444132976096, |
|
"learning_rate": 4.959625660954139e-05, |
|
"logits/chosen": -14.5, |
|
"logits/rejected": -14.625, |
|
"logps/chosen": -444.0, |
|
"logps/rejected": -388.0, |
|
"loss": 0.744, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -6.78125, |
|
"rewards/margins": 0.65234375, |
|
"rewards/rejected": -7.40625, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3087388801674516, |
|
"grad_norm": 8.051810745094972, |
|
"learning_rate": 4.9554352393743045e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -452.0, |
|
"loss": 0.9418, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.3125, |
|
"rewards/margins": 0.64453125, |
|
"rewards/rejected": -9.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 5.903225463973512, |
|
"learning_rate": 4.9510399088775047e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -502.0, |
|
"loss": 0.7086, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -8.125, |
|
"rewards/margins": 0.50390625, |
|
"rewards/rejected": -8.5625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31920460491889063, |
|
"grad_norm": 11.634410981967923, |
|
"learning_rate": 4.9464400362591644e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -420.0, |
|
"logps/rejected": -396.0, |
|
"loss": 0.7854, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -6.5625, |
|
"rewards/margins": 0.50390625, |
|
"rewards/rejected": -7.09375, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 8.791552603599007, |
|
"learning_rate": 4.941636005383986e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -402.0, |
|
"loss": 0.8591, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -6.59375, |
|
"rewards/margins": 0.86328125, |
|
"rewards/rejected": -7.4375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 9.02432053656773, |
|
"learning_rate": 4.936628217153914e-05, |
|
"logits/chosen": -12.3125, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -386.0, |
|
"logps/rejected": -408.0, |
|
"loss": 0.7811, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -6.40625, |
|
"rewards/margins": 0.4296875, |
|
"rewards/rejected": -6.8125, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 9.394934456092882, |
|
"learning_rate": 4.931417089474682e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.6875, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -438.0, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -6.5625, |
|
"rewards/margins": 0.796875, |
|
"rewards/rejected": -7.375, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 7.439435296736013, |
|
"learning_rate": 4.926003057220935e-05, |
|
"logits/chosen": -15.3125, |
|
"logits/rejected": -15.4375, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -434.0, |
|
"loss": 0.7351, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -7.0, |
|
"rewards/margins": 0.609375, |
|
"rewards/rejected": -7.625, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 9.162980300519617, |
|
"learning_rate": 4.92038657219994e-05, |
|
"logits/chosen": -16.375, |
|
"logits/rejected": -16.5, |
|
"logps/chosen": -402.0, |
|
"logps/rejected": -392.0, |
|
"loss": 0.743, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -6.53125, |
|
"rewards/margins": 0.671875, |
|
"rewards/rejected": -7.1875, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35060177917320773, |
|
"grad_norm": 11.704890362462061, |
|
"learning_rate": 4.914568103113882e-05, |
|
"logits/chosen": -15.6875, |
|
"logits/rejected": -16.0, |
|
"logps/chosen": -442.0, |
|
"logps/rejected": -418.0, |
|
"loss": 0.7136, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -6.59375, |
|
"rewards/margins": 0.671875, |
|
"rewards/rejected": -7.28125, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 7.227855368315395, |
|
"learning_rate": 4.908548135520752e-05, |
|
"logits/chosen": -14.8125, |
|
"logits/rejected": -14.9375, |
|
"logps/chosen": -456.0, |
|
"logps/rejected": -416.0, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -6.9375, |
|
"rewards/margins": 0.5390625, |
|
"rewards/rejected": -7.5, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 9.146985437020582, |
|
"learning_rate": 4.9023271717938224e-05, |
|
"logits/chosen": -14.125, |
|
"logits/rejected": -14.0625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -486.0, |
|
"loss": 0.7974, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 0.53515625, |
|
"rewards/rejected": -9.5625, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 9.278505608910281, |
|
"learning_rate": 4.8959057310797286e-05, |
|
"logits/chosen": -14.0625, |
|
"logits/rejected": -14.3125, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -428.0, |
|
"loss": 0.7751, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -6.90625, |
|
"rewards/margins": 0.427734375, |
|
"rewards/rejected": -7.3125, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3715332286760858, |
|
"grad_norm": 9.712228745007655, |
|
"learning_rate": 4.889284349255141e-05, |
|
"logits/chosen": -14.6875, |
|
"logits/rejected": -14.6875, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -474.0, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -7.46875, |
|
"rewards/margins": 1.1328125, |
|
"rewards/rejected": -8.5625, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 8.521168690100787, |
|
"learning_rate": 4.8824635788820475e-05, |
|
"logits/chosen": -14.375, |
|
"logits/rejected": -14.25, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -446.0, |
|
"loss": 0.8019, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.875, |
|
"rewards/margins": 0.453125, |
|
"rewards/rejected": -8.3125, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3819989534275249, |
|
"grad_norm": 11.10196898625787, |
|
"learning_rate": 4.8754439891616434e-05, |
|
"logits/chosen": -14.1875, |
|
"logits/rejected": -14.1875, |
|
"logps/chosen": -472.0, |
|
"logps/rejected": -458.0, |
|
"loss": 0.8141, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -7.34375, |
|
"rewards/margins": 0.60546875, |
|
"rewards/rejected": -7.9375, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 10.792601975369012, |
|
"learning_rate": 4.8682261658868264e-05, |
|
"logits/chosen": -14.25, |
|
"logits/rejected": -14.375, |
|
"logps/chosen": -450.0, |
|
"logps/rejected": -414.0, |
|
"loss": 0.6468, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -7.34375, |
|
"rewards/margins": 0.8828125, |
|
"rewards/rejected": -8.25, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 55.736192989145714, |
|
"learning_rate": 4.860810711393317e-05, |
|
"logits/chosen": -14.75, |
|
"logits/rejected": -14.5625, |
|
"logps/chosen": -480.0, |
|
"logps/rejected": -502.0, |
|
"loss": 1.0953, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -8.5625, |
|
"rewards/margins": 0.93359375, |
|
"rewards/rejected": -9.5, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 8.990314887586853, |
|
"learning_rate": 4.853198244509386e-05, |
|
"logits/chosen": -14.625, |
|
"logits/rejected": -14.625, |
|
"logps/chosen": -450.0, |
|
"logps/rejected": -434.0, |
|
"loss": 0.7577, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.78125, |
|
"rewards/margins": 0.6640625, |
|
"rewards/rejected": -8.4375, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 7.965840471202787, |
|
"learning_rate": 4.845389400504221e-05, |
|
"logits/chosen": -14.375, |
|
"logits/rejected": -14.25, |
|
"logps/chosen": -488.0, |
|
"logps/rejected": -490.0, |
|
"loss": 0.7896, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.5625, |
|
"rewards/margins": 0.484375, |
|
"rewards/rejected": -8.0625, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 11.497968950154975, |
|
"learning_rate": 4.837384831034905e-05, |
|
"logits/chosen": -13.875, |
|
"logits/rejected": -13.75, |
|
"logps/chosen": -466.0, |
|
"logps/rejected": -438.0, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -7.15625, |
|
"rewards/margins": 0.6875, |
|
"rewards/rejected": -7.84375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.413396127681842, |
|
"grad_norm": 9.66137517497451, |
|
"learning_rate": 4.829185204092039e-05, |
|
"logits/chosen": -14.5625, |
|
"logits/rejected": -14.25, |
|
"logps/chosen": -416.0, |
|
"logps/rejected": -442.0, |
|
"loss": 0.7498, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -7.0, |
|
"rewards/margins": 1.15625, |
|
"rewards/rejected": -8.125, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 10.37574761233887, |
|
"learning_rate": 4.8207912039439964e-05, |
|
"logits/chosen": -15.4375, |
|
"logits/rejected": -15.625, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -466.0, |
|
"loss": 0.8479, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -6.875, |
|
"rewards/margins": 0.29296875, |
|
"rewards/rejected": -7.1875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 10.351543599017008, |
|
"learning_rate": 4.812203531079819e-05, |
|
"logits/chosen": -15.25, |
|
"logits/rejected": -15.375, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -454.0, |
|
"loss": 0.7395, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -7.09375, |
|
"rewards/margins": 0.75, |
|
"rewards/rejected": -7.84375, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 8.622095411540833, |
|
"learning_rate": 4.803422902150762e-05, |
|
"logits/chosen": -14.375, |
|
"logits/rejected": -14.4375, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -456.0, |
|
"loss": 0.7025, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.6875, |
|
"rewards/margins": 0.6875, |
|
"rewards/rejected": -9.375, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43432757718472004, |
|
"grad_norm": 9.264123822708422, |
|
"learning_rate": 4.794450049910487e-05, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -454.0, |
|
"logps/rejected": -438.0, |
|
"loss": 0.8018, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.125, |
|
"rewards/margins": 0.369140625, |
|
"rewards/rejected": -8.5, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 7.9655618610185845, |
|
"learning_rate": 4.785285723153915e-05, |
|
"logits/chosen": -11.625, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -472.0, |
|
"loss": 0.7406, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -7.53125, |
|
"rewards/margins": 0.6953125, |
|
"rewards/rejected": -8.25, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44479330193615907, |
|
"grad_norm": 10.645509151597812, |
|
"learning_rate": 4.775930686654738e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -440.0, |
|
"loss": 0.733, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -8.125, |
|
"rewards/margins": 1.0078125, |
|
"rewards/rejected": -9.125, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 9.095410131374702, |
|
"learning_rate": 4.7663857211015936e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -434.0, |
|
"logps/rejected": -468.0, |
|
"loss": 0.7619, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -6.53125, |
|
"rewards/margins": 0.5546875, |
|
"rewards/rejected": -7.0625, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 7.213273555139093, |
|
"learning_rate": 4.756651623032922e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -458.0, |
|
"logps/rejected": -412.0, |
|
"loss": 0.7308, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -6.40625, |
|
"rewards/margins": 0.3359375, |
|
"rewards/rejected": -6.75, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 10.25034124783594, |
|
"learning_rate": 4.746729204770491e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -470.0, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -8.6875, |
|
"rewards/margins": 0.9140625, |
|
"rewards/rejected": -9.625, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46572475143903713, |
|
"grad_norm": 9.164696578171599, |
|
"learning_rate": 4.736619294351607e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.25, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.7735, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 0.68359375, |
|
"rewards/rejected": -10.5, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 9.176482738323408, |
|
"learning_rate": 4.726322735460012e-05, |
|
"logits/chosen": -11.75, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -510.0, |
|
"loss": 0.761, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -7.875, |
|
"rewards/margins": 1.3984375, |
|
"rewards/rejected": -9.25, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 12.033840542792827, |
|
"learning_rate": 4.715840387355481e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -11.875, |
|
"logps/chosen": -452.0, |
|
"logps/rejected": -440.0, |
|
"loss": 0.84, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 0.83203125, |
|
"rewards/rejected": -9.5625, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 9.951231450685535, |
|
"learning_rate": 4.705173124802114e-05, |
|
"logits/chosen": -12.125, |
|
"logits/rejected": -12.0625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -500.0, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -9.4375, |
|
"rewards/margins": 0.451171875, |
|
"rewards/rejected": -9.875, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 5.982719210494255, |
|
"learning_rate": 4.694321837995337e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -482.0, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 0.9296875, |
|
"rewards/rejected": -10.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 7.732935041516918, |
|
"learning_rate": 4.683287432487612e-05, |
|
"logits/chosen": -12.4375, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -482.0, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 1.125, |
|
"rewards/rejected": -10.4375, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4971219256933543, |
|
"grad_norm": 11.192514507830417, |
|
"learning_rate": 4.672070829112868e-05, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -498.0, |
|
"logps/rejected": -488.0, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 1.0390625, |
|
"rewards/rejected": -9.875, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 9.893002359130774, |
|
"learning_rate": 4.6606729639096606e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -10.4375, |
|
"rewards/margins": 1.484375, |
|
"rewards/rejected": -11.9375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5075876504447933, |
|
"grad_norm": 11.075478903393348, |
|
"learning_rate": 4.6490947880430515e-05, |
|
"logits/chosen": -11.5, |
|
"logits/rejected": -11.0, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.7253, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -10.75, |
|
"rewards/margins": 1.4453125, |
|
"rewards/rejected": -12.1875, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 7.207931474384908, |
|
"learning_rate": 4.637337267725239e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -11.8125, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.25, |
|
"rewards/margins": 1.5078125, |
|
"rewards/rejected": -11.75, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 10.223265502840688, |
|
"learning_rate": 4.625401384134921e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -466.0, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -9.5625, |
|
"rewards/margins": 1.0, |
|
"rewards/rejected": -10.5625, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 9.366756020201832, |
|
"learning_rate": 4.613288133335418e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -12.0625, |
|
"logps/chosen": -490.0, |
|
"logps/rejected": -472.0, |
|
"loss": 0.7333, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 1.0, |
|
"rewards/rejected": -10.625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5285190999476713, |
|
"grad_norm": 9.668457676351762, |
|
"learning_rate": 4.600998526191553e-05, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.8312, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -9.9375, |
|
"rewards/margins": 0.73828125, |
|
"rewards/rejected": -10.6875, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 9.169851106004764, |
|
"learning_rate": 4.588533588285287e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -502.0, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 1.3046875, |
|
"rewards/rejected": -11.6875, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5389848246991105, |
|
"grad_norm": 6.76805241291535, |
|
"learning_rate": 4.5758943598301354e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.1875, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -492.0, |
|
"loss": 0.7095, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.875, |
|
"rewards/margins": 0.66796875, |
|
"rewards/rejected": -11.5, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 7.48260799269685, |
|
"learning_rate": 4.5630818955843646e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -11.8125, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.7073, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -9.9375, |
|
"rewards/margins": 1.03125, |
|
"rewards/rejected": -10.9375, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 9.671760718133113, |
|
"learning_rate": 4.550097264762968e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -492.0, |
|
"loss": 0.8316, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 0.71875, |
|
"rewards/rejected": -9.5625, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 7.232272637524744, |
|
"learning_rate": 4.536941550948439e-05, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -482.0, |
|
"loss": 0.7443, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 0.84765625, |
|
"rewards/rejected": -10.0625, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5599162742019885, |
|
"grad_norm": 7.573253707315908, |
|
"learning_rate": 4.5236158520003444e-05, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -502.0, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -8.1875, |
|
"rewards/margins": 1.0390625, |
|
"rewards/rejected": -9.25, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 7.888840261037634, |
|
"learning_rate": 4.510121279963709e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -496.0, |
|
"loss": 0.7204, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -10.0625, |
|
"rewards/margins": 1.03125, |
|
"rewards/rejected": -11.0625, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5703819989534276, |
|
"grad_norm": 13.762939010968964, |
|
"learning_rate": 4.4964589609762095e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.8346, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 0.7734375, |
|
"rewards/rejected": -12.1875, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 9.56155800775074, |
|
"learning_rate": 4.482630035174205e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -492.0, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -10.8125, |
|
"rewards/margins": 0.828125, |
|
"rewards/rejected": -11.625, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 9.650034622926128, |
|
"learning_rate": 4.468635656597582e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -490.0, |
|
"logps/rejected": -488.0, |
|
"loss": 0.7859, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 0.77734375, |
|
"rewards/rejected": -10.5, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 9.009994906294855, |
|
"learning_rate": 4.454476993093454e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.375, |
|
"logps/chosen": -580.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.9287, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 0.58203125, |
|
"rewards/rejected": -10.9375, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5913134484563056, |
|
"grad_norm": 10.042283135138778, |
|
"learning_rate": 4.440155226218703e-05, |
|
"logits/chosen": -11.375, |
|
"logits/rejected": -11.1875, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -504.0, |
|
"loss": 0.8404, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 0.66015625, |
|
"rewards/rejected": -9.875, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 8.418582913270656, |
|
"learning_rate": 4.425671551141376e-05, |
|
"logits/chosen": -11.125, |
|
"logits/rejected": -10.875, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -10.5625, |
|
"rewards/margins": 0.8671875, |
|
"rewards/rejected": -11.5, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6017791732077447, |
|
"grad_norm": 9.416691885986982, |
|
"learning_rate": 4.411027176540948e-05, |
|
"logits/chosen": -10.75, |
|
"logits/rejected": -10.5625, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -510.0, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -11.0, |
|
"rewards/margins": 1.234375, |
|
"rewards/rejected": -12.1875, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 10.865786644213799, |
|
"learning_rate": 4.396223324507454e-05, |
|
"logits/chosen": -10.8125, |
|
"logits/rejected": -10.75, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -454.0, |
|
"loss": 0.9435, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 0.640625, |
|
"rewards/rejected": -12.125, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 12.184578875756303, |
|
"learning_rate": 4.3812612304395046e-05, |
|
"logits/chosen": -11.25, |
|
"logits/rejected": -11.125, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -564.0, |
|
"loss": 0.7482, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -11.375, |
|
"rewards/margins": 0.76953125, |
|
"rewards/rejected": -12.125, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 7.183055929786656, |
|
"learning_rate": 4.366142142941195e-05, |
|
"logits/chosen": -10.6875, |
|
"logits/rejected": -10.375, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.711, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -11.125, |
|
"rewards/margins": 0.8515625, |
|
"rewards/rejected": -12.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 7.359506759653038, |
|
"learning_rate": 4.350867323717902e-05, |
|
"logits/chosen": -10.625, |
|
"logits/rejected": -10.375, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -510.0, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.6875, |
|
"rewards/margins": 0.7890625, |
|
"rewards/rejected": -11.4375, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 6.501759078401181, |
|
"learning_rate": 4.335438047470996e-05, |
|
"logits/chosen": -10.75, |
|
"logits/rejected": -10.5, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -10.625, |
|
"rewards/margins": 1.4296875, |
|
"rewards/rejected": -12.0625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6331763474620618, |
|
"grad_norm": 7.620878974669834, |
|
"learning_rate": 4.3198556017914635e-05, |
|
"logits/chosen": -11.1875, |
|
"logits/rejected": -10.875, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.7357, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -11.125, |
|
"rewards/margins": 1.40625, |
|
"rewards/rejected": -12.5625, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 9.34056834357026, |
|
"learning_rate": 4.30412128705246e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.7723, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 1.109375, |
|
"rewards/rejected": -11.4375, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 7.674870336983743, |
|
"learning_rate": 4.28823641630079e-05, |
|
"logits/chosen": -11.375, |
|
"logits/rejected": -11.1875, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.7292, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -10.125, |
|
"rewards/margins": 1.5078125, |
|
"rewards/rejected": -11.625, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 7.40544549406474, |
|
"learning_rate": 4.2722023151473294e-05, |
|
"logits/chosen": -10.9375, |
|
"logits/rejected": -10.9375, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -492.0, |
|
"loss": 0.7212, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.75, |
|
"rewards/margins": 1.1171875, |
|
"rewards/rejected": -11.875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6541077969649398, |
|
"grad_norm": 9.53123578774151, |
|
"learning_rate": 4.256020321656405e-05, |
|
"logits/chosen": -10.625, |
|
"logits/rejected": -10.375, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.7306, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -12.3125, |
|
"rewards/margins": 0.95703125, |
|
"rewards/rejected": -13.25, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 6.161183952229396, |
|
"learning_rate": 4.239691786234133e-05, |
|
"logits/chosen": -11.0, |
|
"logits/rejected": -10.9375, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -488.0, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.9375, |
|
"rewards/margins": 1.140625, |
|
"rewards/rejected": -12.125, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6645735217163788, |
|
"grad_norm": 8.172666817720033, |
|
"learning_rate": 4.223218071515721e-05, |
|
"logits/chosen": -11.0, |
|
"logits/rejected": -10.875, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.0625, |
|
"rewards/margins": 0.99609375, |
|
"rewards/rejected": -12.0625, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 12.188827913123875, |
|
"learning_rate": 4.206600552251756e-05, |
|
"logits/chosen": -11.25, |
|
"logits/rejected": -11.125, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -486.0, |
|
"loss": 0.79, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -10.5, |
|
"rewards/margins": 0.9609375, |
|
"rewards/rejected": -11.4375, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 18.826047916539064, |
|
"learning_rate": 4.189840615193486e-05, |
|
"logits/chosen": -11.6875, |
|
"logits/rejected": -11.375, |
|
"logps/chosen": -498.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.7081, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 0.96875, |
|
"rewards/rejected": -10.8125, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 6.969800701972501, |
|
"learning_rate": 4.172939658977084e-05, |
|
"logits/chosen": -11.1875, |
|
"logits/rejected": -11.0, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.7148, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.625, |
|
"rewards/margins": 0.921875, |
|
"rewards/rejected": -11.5625, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6855049712192569, |
|
"grad_norm": 14.859556755269326, |
|
"learning_rate": 4.155899094006938e-05, |
|
"logits/chosen": -10.9375, |
|
"logits/rejected": -10.8125, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -528.0, |
|
"loss": 0.7416, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -10.875, |
|
"rewards/margins": 0.9609375, |
|
"rewards/rejected": -11.875, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 6.957089873593148, |
|
"learning_rate": 4.138720342337947e-05, |
|
"logits/chosen": -11.125, |
|
"logits/rejected": -11.0, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -11.75, |
|
"rewards/margins": 1.0859375, |
|
"rewards/rejected": -12.8125, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 9.707692830662324, |
|
"learning_rate": 4.121404837556851e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -11.5625, |
|
"logps/chosen": -580.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.5625, |
|
"rewards/margins": 1.546875, |
|
"rewards/rejected": -13.0625, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 8.770735737960996, |
|
"learning_rate": 4.103954024662594e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.719, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -11.25, |
|
"rewards/margins": 0.640625, |
|
"rewards/rejected": -11.875, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 7.1593445389385515, |
|
"learning_rate": 4.086369359945743e-05, |
|
"logits/chosen": -12.5, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -564.0, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -10.5625, |
|
"rewards/margins": 1.0546875, |
|
"rewards/rejected": -11.625, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 9.243196425255588, |
|
"learning_rate": 4.0686523108669496e-05, |
|
"logits/chosen": -12.5625, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -600.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.8775, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.9375, |
|
"rewards/margins": 1.125, |
|
"rewards/rejected": -12.0625, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.716902145473574, |
|
"grad_norm": 11.110076974797884, |
|
"learning_rate": 4.050804355934498e-05, |
|
"logits/chosen": -11.25, |
|
"logits/rejected": -11.1875, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 0.9375, |
|
"rewards/rejected": -12.375, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 6.00630186538614, |
|
"learning_rate": 4.032826984580914e-05, |
|
"logits/chosen": -12.4375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -484.0, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -10.3125, |
|
"rewards/margins": 1.078125, |
|
"rewards/rejected": -11.375, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.727367870225013, |
|
"grad_norm": 8.977754421118522, |
|
"learning_rate": 4.014721697038678e-05, |
|
"logits/chosen": -11.6875, |
|
"logits/rejected": -11.4375, |
|
"logps/chosen": -490.0, |
|
"logps/rejected": -482.0, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 1.1015625, |
|
"rewards/rejected": -10.75, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 9.068303784048501, |
|
"learning_rate": 3.996490004215021e-05, |
|
"logits/chosen": -11.8125, |
|
"logits/rejected": -11.6875, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -498.0, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -10.3125, |
|
"rewards/margins": 1.125, |
|
"rewards/rejected": -11.4375, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 10.091388237018714, |
|
"learning_rate": 3.978133427565842e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.125, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.7733, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -11.25, |
|
"rewards/margins": 0.84375, |
|
"rewards/rejected": -12.125, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 9.639237375339137, |
|
"learning_rate": 3.9596534989687416e-05, |
|
"logits/chosen": -12.4375, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -504.0, |
|
"loss": 0.7797, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -10.0, |
|
"rewards/margins": 1.8671875, |
|
"rewards/rejected": -11.875, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 6.806097018429323, |
|
"learning_rate": 3.9410517605951824e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -488.0, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.6875, |
|
"rewards/margins": 0.486328125, |
|
"rewards/rejected": -11.1875, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 9.16956662251956, |
|
"learning_rate": 3.922329764781793e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.7421, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -11.0, |
|
"rewards/margins": 1.1015625, |
|
"rewards/rejected": -12.125, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7587650444793302, |
|
"grad_norm": 7.958218718062474, |
|
"learning_rate": 3.903489073900828e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.7735, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -11.5625, |
|
"rewards/margins": 0.8203125, |
|
"rewards/rejected": -12.375, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 8.365628327112017, |
|
"learning_rate": 3.884531260229778e-05, |
|
"logits/chosen": -11.75, |
|
"logits/rejected": -11.625, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -490.0, |
|
"loss": 0.7042, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -11.1875, |
|
"rewards/margins": 0.5390625, |
|
"rewards/rejected": -11.75, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 7.681754910048477, |
|
"learning_rate": 3.8654579058201704e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -492.0, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -10.3125, |
|
"rewards/margins": 1.109375, |
|
"rewards/rejected": -11.4375, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 8.699313776529005, |
|
"learning_rate": 3.8462706023655404e-05, |
|
"logits/chosen": -10.9375, |
|
"logits/rejected": -11.0, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.7719, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -11.5625, |
|
"rewards/margins": 0.89453125, |
|
"rewards/rejected": -12.4375, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7796964939822083, |
|
"grad_norm": 10.20603489800159, |
|
"learning_rate": 3.8269709510686005e-05, |
|
"logits/chosen": -10.9375, |
|
"logits/rejected": -10.8125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.7257, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -11.1875, |
|
"rewards/margins": 0.7265625, |
|
"rewards/rejected": -11.9375, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 6.371725891794927, |
|
"learning_rate": 3.807560562507624e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.0, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.8125, |
|
"rewards/margins": 1.390625, |
|
"rewards/rejected": -12.1875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7901622187336473, |
|
"grad_norm": 8.044514431743275, |
|
"learning_rate": 3.7880410565020366e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -11.625, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.722, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 0.9765625, |
|
"rewards/rejected": -12.4375, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 6.3419982155290855, |
|
"learning_rate": 3.76841406197724e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -468.0, |
|
"logps/rejected": -460.0, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -9.6875, |
|
"rewards/margins": 1.0390625, |
|
"rewards/rejected": -10.6875, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 13.822774811604797, |
|
"learning_rate": 3.748681216828678e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -11.75, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -644.0, |
|
"loss": 0.7042, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 0.76953125, |
|
"rewards/rejected": -12.25, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 8.295919264278089, |
|
"learning_rate": 3.728844167785151e-05, |
|
"logits/chosen": -11.875, |
|
"logits/rejected": -11.625, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.7166, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.1875, |
|
"rewards/margins": 0.8359375, |
|
"rewards/rejected": -12.0, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8110936682365254, |
|
"grad_norm": 8.520816598792539, |
|
"learning_rate": 3.7089045702713976e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -510.0, |
|
"loss": 0.7012, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.3125, |
|
"rewards/margins": 1.0390625, |
|
"rewards/rejected": -11.375, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 8.142573842126938, |
|
"learning_rate": 3.6888640882699425e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.7397, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -11.625, |
|
"rewards/margins": 0.89453125, |
|
"rewards/rejected": -12.5, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8215593929879644, |
|
"grad_norm": 7.481987186912928, |
|
"learning_rate": 3.668724394182239e-05, |
|
"logits/chosen": -11.375, |
|
"logits/rejected": -10.9375, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -11.9375, |
|
"rewards/margins": 1.1484375, |
|
"rewards/rejected": -13.0625, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 8.042620411635573, |
|
"learning_rate": 3.648487168689104e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.375, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.7544, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -11.25, |
|
"rewards/margins": 0.69140625, |
|
"rewards/rejected": -11.9375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 6.330392212337275, |
|
"learning_rate": 3.628154100610463e-05, |
|
"logits/chosen": -11.875, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -496.0, |
|
"loss": 0.7201, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.9375, |
|
"rewards/margins": 1.0625, |
|
"rewards/rejected": -11.0, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 9.495665962948788, |
|
"learning_rate": 3.607726886764415e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -11.875, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.7175, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.3125, |
|
"rewards/margins": 0.96484375, |
|
"rewards/rejected": -11.25, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 8.88992804946847, |
|
"learning_rate": 3.5872072318256375e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -11.75, |
|
"logps/chosen": -596.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.7063, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.75, |
|
"rewards/margins": 1.25, |
|
"rewards/rejected": -12.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 6.549366426211227, |
|
"learning_rate": 3.566596848183117e-05, |
|
"logits/chosen": -11.375, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -488.0, |
|
"loss": 0.7635, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 0.74609375, |
|
"rewards/rejected": -12.1875, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8529565672422815, |
|
"grad_norm": 8.965681557356943, |
|
"learning_rate": 3.54589745579726e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -11.5625, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -564.0, |
|
"loss": 0.7312, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -11.125, |
|
"rewards/margins": 0.96875, |
|
"rewards/rejected": -12.0625, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 7.597359211830405, |
|
"learning_rate": 3.5251107820563565e-05, |
|
"logits/chosen": -12.4375, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -580.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.7307, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -9.875, |
|
"rewards/margins": 0.94140625, |
|
"rewards/rejected": -10.8125, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 16.306018432981723, |
|
"learning_rate": 3.504238561632424e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.788, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 0.9375, |
|
"rewards/rejected": -10.75, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 6.41824790740516, |
|
"learning_rate": 3.483282536336451e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -468.0, |
|
"logps/rejected": -464.0, |
|
"loss": 0.7286, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 1.0546875, |
|
"rewards/rejected": -10.125, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8738880167451596, |
|
"grad_norm": 8.997202419771773, |
|
"learning_rate": 3.46224445497304e-05, |
|
"logits/chosen": -11.8125, |
|
"logits/rejected": -11.8125, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -10.1875, |
|
"rewards/margins": 1.53125, |
|
"rewards/rejected": -11.75, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 8.646781069565186, |
|
"learning_rate": 3.441126073194468e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -11.75, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -9.9375, |
|
"rewards/margins": 1.546875, |
|
"rewards/rejected": -11.5, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 7.113786319184486, |
|
"learning_rate": 3.4199291533541735e-05, |
|
"logits/chosen": -11.625, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.4375, |
|
"rewards/margins": 0.8671875, |
|
"rewards/rejected": -11.3125, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 9.146215241563787, |
|
"learning_rate": 3.398655464359687e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -11.8125, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -484.0, |
|
"loss": 1.2266, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -12.8125, |
|
"rewards/margins": -1.1484375, |
|
"rewards/rejected": -11.6875, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 7.301197069927249, |
|
"learning_rate": 3.377306781525015e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -11.875, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.7462, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.9375, |
|
"rewards/margins": 1.0234375, |
|
"rewards/rejected": -11.9375, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 7.026924797144021, |
|
"learning_rate": 3.3558848864224876e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -506.0, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.4375, |
|
"rewards/margins": 0.85546875, |
|
"rewards/rejected": -11.3125, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9052851909994767, |
|
"grad_norm": 11.941983652096013, |
|
"learning_rate": 3.334391566734082e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.7589, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 1.1484375, |
|
"rewards/rejected": -10.5, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 8.065484818366723, |
|
"learning_rate": 3.3128286161022394e-05, |
|
"logits/chosen": -11.6875, |
|
"logits/rejected": -11.75, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -510.0, |
|
"loss": 0.7712, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.5, |
|
"rewards/margins": 0.984375, |
|
"rewards/rejected": -11.4375, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 8.045324511262809, |
|
"learning_rate": 3.2911978339801855e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.7152, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 1.2890625, |
|
"rewards/rejected": -11.6875, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 10.25223828631299, |
|
"learning_rate": 3.269501025481763e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -560.0, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -10.125, |
|
"rewards/margins": 0.89453125, |
|
"rewards/rejected": -11.0, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 6.658643984408585, |
|
"learning_rate": 3.2477400012307885e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.7548, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 1.0, |
|
"rewards/rejected": -10.75, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 7.583843351290834, |
|
"learning_rate": 3.2259165772099644e-05, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -528.0, |
|
"loss": 0.6979, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 1.1171875, |
|
"rewards/rejected": -10.875, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9366823652537938, |
|
"grad_norm": 6.695330181972035, |
|
"learning_rate": 3.204032574609318e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.7396, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -10.5, |
|
"rewards/margins": 1.046875, |
|
"rewards/rejected": -11.5, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 7.41109532240638, |
|
"learning_rate": 3.1820898196742335e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -548.0, |
|
"loss": 0.7853, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -11.0625, |
|
"rewards/margins": 0.8203125, |
|
"rewards/rejected": -11.875, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9471480900052328, |
|
"grad_norm": 7.998757902945492, |
|
"learning_rate": 3.160090143553049e-05, |
|
"logits/chosen": -11.875, |
|
"logits/rejected": -11.625, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -476.0, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 1.015625, |
|
"rewards/rejected": -11.4375, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 7.080219482241533, |
|
"learning_rate": 3.1380353821442354e-05, |
|
"logits/chosen": -12.0, |
|
"logits/rejected": -11.8125, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -446.0, |
|
"loss": 0.7265, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 0.76171875, |
|
"rewards/rejected": -10.5, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 10.03679929204167, |
|
"learning_rate": 3.1159273759431964e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -11.125, |
|
"rewards/margins": 1.0703125, |
|
"rewards/rejected": -12.25, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 6.830881800921508, |
|
"learning_rate": 3.0937679698886786e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -11.875, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.125, |
|
"rewards/margins": 1.3125, |
|
"rewards/rejected": -12.4375, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.968079539508111, |
|
"grad_norm": 8.529607259118398, |
|
"learning_rate": 3.071559013208801e-05, |
|
"logits/chosen": -11.75, |
|
"logits/rejected": -11.5625, |
|
"logps/chosen": -628.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.7147, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -11.4375, |
|
"rewards/margins": 1.2109375, |
|
"rewards/rejected": -12.625, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 6.915342277139341, |
|
"learning_rate": 3.0493023592667446e-05, |
|
"logits/chosen": -11.4375, |
|
"logits/rejected": -11.25, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -500.0, |
|
"loss": 0.7208, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -10.625, |
|
"rewards/margins": 0.76953125, |
|
"rewards/rejected": -11.375, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.97854526425955, |
|
"grad_norm": 9.778948710217877, |
|
"learning_rate": 3.0269998654060788e-05, |
|
"logits/chosen": -12.125, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -462.0, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 0.7421875, |
|
"rewards/rejected": -10.5, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 5.655349340296246, |
|
"learning_rate": 3.0046533927957677e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -10.25, |
|
"rewards/margins": 1.0, |
|
"rewards/rejected": -11.25, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 10.130846611986492, |
|
"learning_rate": 2.9822648062748536e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -612.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -10.5625, |
|
"rewards/margins": 1.125, |
|
"rewards/rejected": -11.6875, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 7.921271665865128, |
|
"learning_rate": 2.959835974196836e-05, |
|
"logits/chosen": -11.6875, |
|
"logits/rejected": -11.5625, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 1.0625, |
|
"rewards/rejected": -11.4375, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"grad_norm": 8.904276645319571, |
|
"learning_rate": 2.9373687682737484e-05, |
|
"logits/chosen": -12.0625, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -10.8125, |
|
"rewards/margins": 0.83203125, |
|
"rewards/rejected": -11.625, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -12.6875, |
|
"eval_logits/rejected": -12.5625, |
|
"eval_logps/chosen": -536.0, |
|
"eval_logps/rejected": -528.0, |
|
"eval_loss": 0.7097968459129333, |
|
"eval_rewards/accuracies": 0.69921875, |
|
"eval_rewards/chosen": -10.8125, |
|
"eval_rewards/margins": 1.1328125, |
|
"eval_rewards/rejected": -11.9375, |
|
"eval_runtime": 47.5241, |
|
"eval_samples_per_second": 42.084, |
|
"eval_steps_per_second": 0.673, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 1.0047095761381475, |
|
"grad_norm": 4.728380640009898, |
|
"learning_rate": 2.9148650634199674e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -472.0, |
|
"logps/rejected": -516.0, |
|
"loss": 0.2767, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -9.5625, |
|
"rewards/margins": 3.671875, |
|
"rewards/rejected": -13.25, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.0099424385138671, |
|
"grad_norm": 1.024609812555977, |
|
"learning_rate": 2.892326737595751e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -11.8125, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.1663, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 6.4375, |
|
"rewards/rejected": -15.8125, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0151753008895865, |
|
"grad_norm": 3.373687792077228, |
|
"learning_rate": 2.869755671650512e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.25, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.0, |
|
"rewards/margins": 6.8125, |
|
"rewards/rejected": -16.875, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 2.368031581220674, |
|
"learning_rate": 2.847153749165869e-05, |
|
"logits/chosen": -9.75, |
|
"logits/rejected": -9.375, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.1652, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -11.75, |
|
"rewards/margins": 4.5625, |
|
"rewards/rejected": -16.375, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 1.63741968428215, |
|
"learning_rate": 2.8245228562984516e-05, |
|
"logits/chosen": -10.8125, |
|
"logits/rejected": -10.125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.75, |
|
"rewards/margins": 6.4375, |
|
"rewards/rejected": -17.25, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0308738880167452, |
|
"grad_norm": 2.551100302367526, |
|
"learning_rate": 2.8018648816225025e-05, |
|
"logits/chosen": -11.5, |
|
"logits/rejected": -11.0, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.96875, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -16.375, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.0361067503924646, |
|
"grad_norm": 2.6749422209678073, |
|
"learning_rate": 2.7791817159722726e-05, |
|
"logits/chosen": -10.6875, |
|
"logits/rejected": -10.375, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 5.46875, |
|
"rewards/rejected": -14.5625, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0413396127681842, |
|
"grad_norm": 2.655504829474548, |
|
"learning_rate": 2.756475252284229e-05, |
|
"logits/chosen": -11.25, |
|
"logits/rejected": -10.9375, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 6.71875, |
|
"rewards/rejected": -15.875, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0465724751439036, |
|
"grad_norm": 2.5200245472186418, |
|
"learning_rate": 2.7337473854390865e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.375, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 6.25, |
|
"rewards/rejected": -15.25, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0518053375196232, |
|
"grad_norm": 4.499576388025018, |
|
"learning_rate": 2.7110000121036793e-05, |
|
"logits/chosen": -11.5625, |
|
"logits/rejected": -11.25, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.9375, |
|
"rewards/margins": 7.875, |
|
"rewards/rejected": -16.75, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0570381998953426, |
|
"grad_norm": 3.108328168380246, |
|
"learning_rate": 2.688235030572679e-05, |
|
"logits/chosen": -11.9375, |
|
"logits/rejected": -11.75, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.1356, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 7.0625, |
|
"rewards/rejected": -16.375, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0622710622710623, |
|
"grad_norm": 2.7373393989725763, |
|
"learning_rate": 2.6654543406101833e-05, |
|
"logits/chosen": -12.125, |
|
"logits/rejected": -11.6875, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.1524, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.125, |
|
"rewards/margins": 7.28125, |
|
"rewards/rejected": -17.375, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0675039246467817, |
|
"grad_norm": 2.6186672086656926, |
|
"learning_rate": 2.6426598432911763e-05, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -668.0, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 7.53125, |
|
"rewards/rejected": -16.75, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0727367870225013, |
|
"grad_norm": 1.4275150273517287, |
|
"learning_rate": 2.6198534408428804e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -17.0, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.077969649398221, |
|
"grad_norm": 4.626435577549045, |
|
"learning_rate": 2.5970370364860176e-05, |
|
"logits/chosen": -12.5625, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1145, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -17.625, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0832025117739403, |
|
"grad_norm": 21.753848528929037, |
|
"learning_rate": 2.574212534275978e-05, |
|
"logits/chosen": -11.75, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -464.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.875, |
|
"rewards/margins": 6.15625, |
|
"rewards/rejected": -16.0, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.08843537414966, |
|
"grad_norm": 2.4608658219211628, |
|
"learning_rate": 2.5513818389439304e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -11.6875, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 7.34375, |
|
"rewards/rejected": -17.75, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0936682365253794, |
|
"grad_norm": 2.691085082609704, |
|
"learning_rate": 2.5285468557378616e-05, |
|
"logits/chosen": -11.75, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 7.5625, |
|
"rewards/rejected": -17.25, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.098901098901099, |
|
"grad_norm": 2.6635752913927853, |
|
"learning_rate": 2.5057094902635918e-05, |
|
"logits/chosen": -11.75, |
|
"logits/rejected": -11.4375, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 7.4375, |
|
"rewards/rejected": -17.25, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1041339612768184, |
|
"grad_norm": 1.736617765978122, |
|
"learning_rate": 2.4828716483257418e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -11.625, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.875, |
|
"rewards/margins": 8.125, |
|
"rewards/rejected": -18.0, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.109366823652538, |
|
"grad_norm": 1.226880604327556, |
|
"learning_rate": 2.460035235768692e-05, |
|
"logits/chosen": -12.3125, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -644.0, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -10.1875, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -18.25, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1145996860282574, |
|
"grad_norm": 5.772340271804208, |
|
"learning_rate": 2.4372021583175446e-05, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.875, |
|
"rewards/margins": 6.0, |
|
"rewards/rejected": -15.875, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.119832548403977, |
|
"grad_norm": 4.404511520229935, |
|
"learning_rate": 2.4143743214190778e-05, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -510.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.1329, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 7.09375, |
|
"rewards/rejected": -16.875, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.1250654107796965, |
|
"grad_norm": 2.5920340144560723, |
|
"learning_rate": 2.3915536300827414e-05, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -13.3125, |
|
"logps/chosen": -458.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 7.28125, |
|
"rewards/rejected": -16.5, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.130298273155416, |
|
"grad_norm": 1.9315423437420907, |
|
"learning_rate": 2.3687419887216825e-05, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.1172, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 6.5625, |
|
"rewards/rejected": -15.8125, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1355311355311355, |
|
"grad_norm": 4.9387988320363085, |
|
"learning_rate": 2.345941300993812e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.1262, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 7.5, |
|
"rewards/rejected": -16.625, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1407639979068551, |
|
"grad_norm": 2.6490159969730476, |
|
"learning_rate": 2.3231534696429533e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 7.40625, |
|
"rewards/rejected": -17.25, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1459968602825745, |
|
"grad_norm": 1.8591406460035929, |
|
"learning_rate": 2.3003803963400468e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.1339, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 7.1875, |
|
"rewards/rejected": -16.75, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1512297226582942, |
|
"grad_norm": 3.440279490788567, |
|
"learning_rate": 2.2776239815244543e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 8.8125, |
|
"rewards/rejected": -17.625, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1564625850340136, |
|
"grad_norm": 2.3561174482239413, |
|
"learning_rate": 2.2548861242453742e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 7.125, |
|
"rewards/rejected": -16.875, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1616954474097332, |
|
"grad_norm": 3.129509901044873, |
|
"learning_rate": 2.2321687220033523e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -498.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 8.8125, |
|
"rewards/rejected": -18.375, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1669283097854526, |
|
"grad_norm": 6.073604137970551, |
|
"learning_rate": 2.2094736705919368e-05, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 8.25, |
|
"rewards/rejected": -18.0, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.1721611721611722, |
|
"grad_norm": 4.837786173506918, |
|
"learning_rate": 2.186802863939477e-05, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -664.0, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.625, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -19.25, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1773940345368916, |
|
"grad_norm": 8.171951988640048, |
|
"learning_rate": 2.1641581939510667e-05, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.5, |
|
"rewards/margins": 7.1875, |
|
"rewards/rejected": -17.75, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1826268969126112, |
|
"grad_norm": 1.7116523821577394, |
|
"learning_rate": 2.1415415503506653e-05, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.5625, |
|
"rewards/margins": 7.9375, |
|
"rewards/rejected": -18.5, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1878597592883307, |
|
"grad_norm": 2.8123145023686416, |
|
"learning_rate": 2.1189548205233975e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.25, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -728.0, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -19.75, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1930926216640503, |
|
"grad_norm": 1.3758247246151873, |
|
"learning_rate": 2.0963998893580487e-05, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -13.125, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.875, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -19.0, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1983254840397697, |
|
"grad_norm": 2.8815572728674486, |
|
"learning_rate": 2.0738786390897696e-05, |
|
"logits/chosen": -13.75, |
|
"logits/rejected": -13.4375, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.0962, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 8.875, |
|
"rewards/rejected": -18.625, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.2035583464154893, |
|
"grad_norm": 2.0943865882237183, |
|
"learning_rate": 2.0513929491430006e-05, |
|
"logits/chosen": -14.125, |
|
"logits/rejected": -13.8125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.0908, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 8.375, |
|
"rewards/rejected": -17.5, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2087912087912087, |
|
"grad_norm": 4.687232667728419, |
|
"learning_rate": 2.028944695974633e-05, |
|
"logits/chosen": -14.375, |
|
"logits/rejected": -13.8125, |
|
"logps/chosen": -488.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.5625, |
|
"rewards/margins": 7.28125, |
|
"rewards/rejected": -16.875, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2140240711669283, |
|
"grad_norm": 6.733912452591703, |
|
"learning_rate": 2.006535752917414e-05, |
|
"logits/chosen": -14.375, |
|
"logits/rejected": -14.0, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 7.6875, |
|
"rewards/rejected": -17.375, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2192569335426477, |
|
"grad_norm": 2.3990026267001263, |
|
"learning_rate": 1.9841679900236167e-05, |
|
"logits/chosen": -13.6875, |
|
"logits/rejected": -13.5625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1295, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 7.59375, |
|
"rewards/rejected": -16.875, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 1.5565379549024234, |
|
"learning_rate": 1.9618432739089843e-05, |
|
"logits/chosen": -13.8125, |
|
"logits/rejected": -13.5625, |
|
"logps/chosen": -456.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 7.28125, |
|
"rewards/rejected": -15.8125, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.2297226582940868, |
|
"grad_norm": 2.385154086116954, |
|
"learning_rate": 1.9395634675969525e-05, |
|
"logits/chosen": -13.75, |
|
"logits/rejected": -13.3125, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 7.46875, |
|
"rewards/rejected": -16.5, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.2349555206698064, |
|
"grad_norm": 0.7231966984051394, |
|
"learning_rate": 1.9173304303631848e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 7.46875, |
|
"rewards/rejected": -16.25, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2401883830455258, |
|
"grad_norm": 3.595599348043387, |
|
"learning_rate": 1.8951460175804104e-05, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.3125, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.8125, |
|
"rewards/margins": 7.875, |
|
"rewards/rejected": -17.625, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2454212454212454, |
|
"grad_norm": 2.0288579413034777, |
|
"learning_rate": 1.87301208056359e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 7.90625, |
|
"rewards/rejected": -17.25, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.250654107796965, |
|
"grad_norm": 2.3526669326687464, |
|
"learning_rate": 1.8509304664154255e-05, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -716.0, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.375, |
|
"rewards/margins": 7.84375, |
|
"rewards/rejected": -18.25, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2558869701726845, |
|
"grad_norm": 3.112985785125725, |
|
"learning_rate": 1.8289030178722132e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 9.625, |
|
"rewards/rejected": -18.5, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2611198325484039, |
|
"grad_norm": 1.2581184168231971, |
|
"learning_rate": 1.8069315731500666e-05, |
|
"logits/chosen": -13.75, |
|
"logits/rejected": -13.3125, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -692.0, |
|
"loss": 0.1438, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 8.5, |
|
"rewards/rejected": -17.75, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.2663526949241235, |
|
"grad_norm": 4.762221728293621, |
|
"learning_rate": 1.7850179657915183e-05, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -600.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.1127, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.0, |
|
"rewards/margins": 7.53125, |
|
"rewards/rejected": -17.5, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2715855572998431, |
|
"grad_norm": 3.447175858247355, |
|
"learning_rate": 1.7631640245125015e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1399, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -10.8125, |
|
"rewards/margins": 7.4375, |
|
"rewards/rejected": -18.25, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.2768184196755625, |
|
"grad_norm": 4.061321817953982, |
|
"learning_rate": 1.7413715730497494e-05, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -668.0, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -18.625, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 4.489903011834727, |
|
"learning_rate": 1.7196424300085978e-05, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.5625, |
|
"rewards/margins": 6.75, |
|
"rewards/rejected": -16.375, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2872841444270016, |
|
"grad_norm": 3.4346300831848433, |
|
"learning_rate": 1.6979784087112188e-05, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -468.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.1555, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.4375, |
|
"rewards/margins": 7.03125, |
|
"rewards/rejected": -16.5, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.2925170068027212, |
|
"grad_norm": 2.4661294584896933, |
|
"learning_rate": 1.6763813170453044e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -474.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 7.09375, |
|
"rewards/rejected": -16.0, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2977498691784406, |
|
"grad_norm": 3.149223335476965, |
|
"learning_rate": 1.6548529573131876e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -510.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 7.21875, |
|
"rewards/rejected": -16.875, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.30298273155416, |
|
"grad_norm": 2.862506339722175, |
|
"learning_rate": 1.6333951260814413e-05, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -592.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 8.0625, |
|
"rewards/rejected": -17.375, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.3082155939298796, |
|
"grad_norm": 2.7248445532753016, |
|
"learning_rate": 1.6120096140309572e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 7.25, |
|
"rewards/rejected": -16.125, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3134484563055993, |
|
"grad_norm": 1.4186556225169547, |
|
"learning_rate": 1.5906982058075038e-05, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 7.5, |
|
"rewards/rejected": -17.125, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3186813186813187, |
|
"grad_norm": 2.9852405667865174, |
|
"learning_rate": 1.569462679872801e-05, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -10.125, |
|
"rewards/margins": 6.9375, |
|
"rewards/rejected": -17.0, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.323914181057038, |
|
"grad_norm": 3.666318609283611, |
|
"learning_rate": 1.5483048083561036e-05, |
|
"logits/chosen": -13.5625, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.1064, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -18.5, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.3291470434327577, |
|
"grad_norm": 2.9887842389088215, |
|
"learning_rate": 1.527226356906314e-05, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -458.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 7.3125, |
|
"rewards/rejected": -16.375, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.3343799058084773, |
|
"grad_norm": 4.22908382077193, |
|
"learning_rate": 1.5062290845446403e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1073, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.75, |
|
"rewards/margins": 7.25, |
|
"rewards/rejected": -18.0, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3396127681841967, |
|
"grad_norm": 2.546115179849579, |
|
"learning_rate": 1.4853147435177992e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -12.0625, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -18.125, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.3448456305599163, |
|
"grad_norm": 2.964138623891713, |
|
"learning_rate": 1.4644850791517933e-05, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 8.1875, |
|
"rewards/rejected": -17.25, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.3500784929356358, |
|
"grad_norm": 2.833859042991735, |
|
"learning_rate": 1.4437418297062589e-05, |
|
"logits/chosen": -12.1875, |
|
"logits/rejected": -12.0625, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 7.875, |
|
"rewards/rejected": -16.875, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3553113553113554, |
|
"grad_norm": 4.611332803169445, |
|
"learning_rate": 1.4230867262294045e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -700.0, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -17.5, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"grad_norm": 2.585933332197896, |
|
"learning_rate": 1.4025214924135616e-05, |
|
"logits/chosen": -12.5, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -464.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.1002, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 6.875, |
|
"rewards/rejected": -15.9375, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3657770800627944, |
|
"grad_norm": 5.004771837552434, |
|
"learning_rate": 1.3820478444513288e-05, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 7.71875, |
|
"rewards/rejected": -17.0, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3710099424385138, |
|
"grad_norm": 3.3721942115741483, |
|
"learning_rate": 1.3616674908923585e-05, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 7.75, |
|
"rewards/rejected": -16.875, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3762428048142334, |
|
"grad_norm": 1.926096891230754, |
|
"learning_rate": 1.3413821325007834e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.0734, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.4375, |
|
"rewards/margins": 7.25, |
|
"rewards/rejected": -16.75, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3814756671899528, |
|
"grad_norm": 2.6811569703958438, |
|
"learning_rate": 1.321193462113272e-05, |
|
"logits/chosen": -12.125, |
|
"logits/rejected": -11.5, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.1341, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 6.65625, |
|
"rewards/rejected": -16.0, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.3867085295656725, |
|
"grad_norm": 1.5488117167812219, |
|
"learning_rate": 1.3011031644977716e-05, |
|
"logits/chosen": -12.5, |
|
"logits/rejected": -12.0, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1135, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 7.875, |
|
"rewards/rejected": -17.375, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.3919413919413919, |
|
"grad_norm": 5.7062514391226555, |
|
"learning_rate": 1.2811129162129065e-05, |
|
"logits/chosen": -12.375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -740.0, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -18.5, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.3971742543171115, |
|
"grad_norm": 1.7643119117081274, |
|
"learning_rate": 1.261224385468066e-05, |
|
"logits/chosen": -12.3125, |
|
"logits/rejected": -11.3125, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -10.25, |
|
"rewards/margins": 7.0, |
|
"rewards/rejected": -17.25, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.402407116692831, |
|
"grad_norm": 4.394449609840489, |
|
"learning_rate": 1.2414392319841957e-05, |
|
"logits/chosen": -12.3125, |
|
"logits/rejected": -11.875, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -724.0, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 11.1875, |
|
"rewards/rejected": -20.375, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.4076399790685505, |
|
"grad_norm": 5.0757669192975685, |
|
"learning_rate": 1.2217591068552894e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -12.1875, |
|
"logps/chosen": -456.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.0827, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 7.90625, |
|
"rewards/rejected": -17.125, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.41287284144427, |
|
"grad_norm": 1.2128787751321914, |
|
"learning_rate": 1.2021856524105992e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -18.25, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4181057038199896, |
|
"grad_norm": 4.371148246096117, |
|
"learning_rate": 1.1827205020775881e-05, |
|
"logits/chosen": -12.5, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -17.25, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.423338566195709, |
|
"grad_norm": 1.6427572152583667, |
|
"learning_rate": 1.163365280245615e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -480.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 7.34375, |
|
"rewards/rejected": -15.875, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 6.792621163838377, |
|
"learning_rate": 1.1441216021303777e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 7.3125, |
|
"rewards/rejected": -16.5, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.433804290947148, |
|
"grad_norm": 7.768597274251902, |
|
"learning_rate": 1.1249910736391203e-05, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -17.125, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4390371533228676, |
|
"grad_norm": 1.5634511255756072, |
|
"learning_rate": 1.1059752912366217e-05, |
|
"logits/chosen": -13.1875, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.375, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -17.125, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4442700156985873, |
|
"grad_norm": 5.473079860013893, |
|
"learning_rate": 1.0870758418119659e-05, |
|
"logits/chosen": -13.1875, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.0834, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 7.875, |
|
"rewards/rejected": -17.25, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.4495028780743067, |
|
"grad_norm": 3.4164753814709665, |
|
"learning_rate": 1.0682943025461136e-05, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 7.4375, |
|
"rewards/rejected": -16.625, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.454735740450026, |
|
"grad_norm": 4.0004197012792835, |
|
"learning_rate": 1.049632240780288e-05, |
|
"logits/chosen": -12.625, |
|
"logits/rejected": -11.875, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -556.0, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 6.46875, |
|
"rewards/rejected": -15.6875, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.4599686028257457, |
|
"grad_norm": 2.937844502391212, |
|
"learning_rate": 1.0310912138851769e-05, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -688.0, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.5625, |
|
"rewards/margins": 8.0625, |
|
"rewards/rejected": -17.625, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.4652014652014653, |
|
"grad_norm": 4.339889798014685, |
|
"learning_rate": 1.0126727691309638e-05, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -704.0, |
|
"loss": 0.1164, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -18.125, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.4704343275771847, |
|
"grad_norm": 1.8102160895635062, |
|
"learning_rate": 9.943784435582166e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.0842, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 8.1875, |
|
"rewards/rejected": -17.5, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4756671899529041, |
|
"grad_norm": 1.7389222823704746, |
|
"learning_rate": 9.76209763849609e-06, |
|
"logits/chosen": -12.5625, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 8.25, |
|
"rewards/rejected": -17.25, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.4809000523286238, |
|
"grad_norm": 1.3162427246282047, |
|
"learning_rate": 9.581682462025215e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 7.9375, |
|
"rewards/rejected": -17.0, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4861329147043434, |
|
"grad_norm": 2.015607418663913, |
|
"learning_rate": 9.40255396202518e-06, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1589, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.4375, |
|
"rewards/margins": 7.84375, |
|
"rewards/rejected": -17.25, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4913657770800628, |
|
"grad_norm": 1.0089278154821486, |
|
"learning_rate": 9.22472708697692e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1199, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.9375, |
|
"rewards/margins": 8.3125, |
|
"rewards/rejected": -17.25, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4965986394557822, |
|
"grad_norm": 7.8442767175126145, |
|
"learning_rate": 9.048216676739295e-06, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -716.0, |
|
"loss": 0.1101, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.4375, |
|
"rewards/margins": 10.375, |
|
"rewards/rejected": -18.875, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.5018315018315018, |
|
"grad_norm": 0.8512762475654531, |
|
"learning_rate": 8.87303746131066e-06, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 8.5625, |
|
"rewards/rejected": -18.0, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.5070643642072215, |
|
"grad_norm": 1.5593140512431443, |
|
"learning_rate": 8.699204059599578e-06, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1107, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -18.125, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.5122972265829409, |
|
"grad_norm": 3.0135057825122633, |
|
"learning_rate": 8.526730978204933e-06, |
|
"logits/chosen": -12.5, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -18.875, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5175300889586603, |
|
"grad_norm": 4.884151124038764, |
|
"learning_rate": 8.35563261020529e-06, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -728.0, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.9375, |
|
"rewards/margins": 10.75, |
|
"rewards/rejected": -19.75, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5227629513343799, |
|
"grad_norm": 5.5540339317729295, |
|
"learning_rate": 8.185923233957802e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.125, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -18.125, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.5279958137100995, |
|
"grad_norm": 3.6558685861938636, |
|
"learning_rate": 8.017617011906618e-06, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.3125, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -704.0, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 7.8125, |
|
"rewards/rejected": -17.5, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.533228676085819, |
|
"grad_norm": 3.6345000092419517, |
|
"learning_rate": 7.850727989401064e-06, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -740.0, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 8.8125, |
|
"rewards/rejected": -18.375, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 1.68599240926016, |
|
"learning_rate": 7.685270093523534e-06, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -704.0, |
|
"loss": 0.0917, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 10.125, |
|
"rewards/rejected": -19.125, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.543694400837258, |
|
"grad_norm": 4.5784671678935895, |
|
"learning_rate": 7.521257131927212e-06, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -508.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.1385, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -10.0, |
|
"rewards/margins": 7.28125, |
|
"rewards/rejected": -17.25, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5489272632129776, |
|
"grad_norm": 3.1832934865765763, |
|
"learning_rate": 7.358702791683869e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.0625, |
|
"rewards/rejected": -17.125, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.554160125588697, |
|
"grad_norm": 5.1042748038245565, |
|
"learning_rate": 7.197620638141633e-06, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -17.125, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5593929879644164, |
|
"grad_norm": 4.261194324303468, |
|
"learning_rate": 7.038024113792921e-06, |
|
"logits/chosen": -13.5, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.0934, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -17.5, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.564625850340136, |
|
"grad_norm": 5.838668015412529, |
|
"learning_rate": 6.879926537152695e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -684.0, |
|
"loss": 0.1091, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 9.75, |
|
"rewards/rejected": -18.375, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.5698587127158556, |
|
"grad_norm": 6.215751805578572, |
|
"learning_rate": 6.723341101646993e-06, |
|
"logits/chosen": -13.5, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.0991, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.375, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -17.75, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.575091575091575, |
|
"grad_norm": 6.288421625487859, |
|
"learning_rate": 6.568280874511904e-06, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -704.0, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 9.1875, |
|
"rewards/rejected": -17.875, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.5803244374672945, |
|
"grad_norm": 7.0546142777757845, |
|
"learning_rate": 6.414758795703122e-06, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1143, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 7.59375, |
|
"rewards/rejected": -16.75, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.585557299843014, |
|
"grad_norm": 4.589578434412954, |
|
"learning_rate": 6.262787676816093e-06, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 7.9375, |
|
"rewards/rejected": -17.375, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.5907901622187337, |
|
"grad_norm": 4.762993940765846, |
|
"learning_rate": 6.112380200016832e-06, |
|
"logits/chosen": -13.6875, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.1374, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 7.875, |
|
"rewards/rejected": -16.5, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5960230245944533, |
|
"grad_norm": 5.107564915010418, |
|
"learning_rate": 5.963548916983627e-06, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -452.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.0917, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 7.71875, |
|
"rewards/rejected": -16.25, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.6012558869701727, |
|
"grad_norm": 2.54635356048562, |
|
"learning_rate": 5.816306247859571e-06, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.1875, |
|
"rewards/margins": 9.375, |
|
"rewards/rejected": -17.5, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.6064887493458921, |
|
"grad_norm": 3.298880234845822, |
|
"learning_rate": 5.670664480216087e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.0884, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.0, |
|
"rewards/margins": 9.8125, |
|
"rewards/rejected": -17.75, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.6117216117216118, |
|
"grad_norm": 4.353379277791028, |
|
"learning_rate": 5.526635768027489e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.1106, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 8.875, |
|
"rewards/rejected": -17.375, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6169544740973314, |
|
"grad_norm": 1.7350907393275037, |
|
"learning_rate": 5.384232130656772e-06, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -704.0, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.25, |
|
"rewards/margins": 9.875, |
|
"rewards/rejected": -18.125, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6221873364730508, |
|
"grad_norm": 1.9702905557486132, |
|
"learning_rate": 5.243465451852547e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.5625, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -17.25, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.6274201988487702, |
|
"grad_norm": 2.119534360879486, |
|
"learning_rate": 5.104347478757313e-06, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -480.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 8.3125, |
|
"rewards/rejected": -17.125, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 5.305348928368352, |
|
"learning_rate": 4.9668898209272094e-06, |
|
"logits/chosen": -13.625, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.0, |
|
"rewards/margins": 9.6875, |
|
"rewards/rejected": -17.75, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.6378859236002095, |
|
"grad_norm": 3.865588511769586, |
|
"learning_rate": 4.831103949363103e-06, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.1875, |
|
"rewards/margins": 7.9375, |
|
"rewards/rejected": -16.125, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.6431187859759289, |
|
"grad_norm": 1.1087995031281264, |
|
"learning_rate": 4.697001195553366e-06, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.5, |
|
"rewards/rejected": -17.5, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6483516483516483, |
|
"grad_norm": 9.607813501681845, |
|
"learning_rate": 4.564592750528271e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.9375, |
|
"rewards/margins": 7.5625, |
|
"rewards/rejected": -16.5, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.653584510727368, |
|
"grad_norm": 2.166356498800773, |
|
"learning_rate": 4.4338896639260276e-06, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -508.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.0882, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.375, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -16.375, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.6588173731030875, |
|
"grad_norm": 1.3202863612431788, |
|
"learning_rate": 4.304902843070701e-06, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 7.5625, |
|
"rewards/rejected": -16.0, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.664050235478807, |
|
"grad_norm": 6.022229933522954, |
|
"learning_rate": 4.177643052062039e-06, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -510.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.9375, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -16.875, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6692830978545263, |
|
"grad_norm": 4.697489562167333, |
|
"learning_rate": 4.0521209108770945e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 8.5, |
|
"rewards/rejected": -17.5, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.674515960230246, |
|
"grad_norm": 7.606755469102715, |
|
"learning_rate": 3.928346894484056e-06, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.1338, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.375, |
|
"rewards/margins": 6.28125, |
|
"rewards/rejected": -15.625, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6797488226059656, |
|
"grad_norm": 6.996228868642036, |
|
"learning_rate": 3.8063313319680686e-06, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.6875, |
|
"rewards/rejected": -17.75, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.684981684981685, |
|
"grad_norm": 2.1894497750464494, |
|
"learning_rate": 3.686084405669249e-06, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -712.0, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.4375, |
|
"rewards/margins": 10.25, |
|
"rewards/rejected": -18.75, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.6902145473574044, |
|
"grad_norm": 3.29711389402712, |
|
"learning_rate": 3.567616150332992e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.1136, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 7.40625, |
|
"rewards/rejected": -16.0, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.695447409733124, |
|
"grad_norm": 3.5590799179096213, |
|
"learning_rate": 3.450936452272524e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -490.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.1291, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.4375, |
|
"rewards/margins": 7.8125, |
|
"rewards/rejected": -16.25, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"grad_norm": 2.1632084987538476, |
|
"learning_rate": 3.3360550485439067e-06, |
|
"logits/chosen": -13.5625, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -454.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.71875, |
|
"rewards/margins": 9.375, |
|
"rewards/rejected": -17.125, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.705913134484563, |
|
"grad_norm": 6.079477309011048, |
|
"learning_rate": 3.222981526133434e-06, |
|
"logits/chosen": -13.1875, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.1236, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 7.5, |
|
"rewards/rejected": -16.5, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.7111459968602825, |
|
"grad_norm": 1.5499240661013363, |
|
"learning_rate": 3.111725321157627e-06, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -13.1875, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -764.0, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.1875, |
|
"rewards/margins": 12.0625, |
|
"rewards/rejected": -19.25, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.716378859236002, |
|
"grad_norm": 3.7623282869444963, |
|
"learning_rate": 3.002295718075762e-06, |
|
"logits/chosen": -13.1875, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.25, |
|
"rewards/margins": 10.0625, |
|
"rewards/rejected": -18.375, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7216117216117217, |
|
"grad_norm": 7.519232363887075, |
|
"learning_rate": 2.8947018489150517e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.6875, |
|
"rewards/margins": 8.5, |
|
"rewards/rejected": -17.125, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.7268445839874411, |
|
"grad_norm": 5.31099596157704, |
|
"learning_rate": 2.7889526925085978e-06, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -644.0, |
|
"loss": 0.1053, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.0625, |
|
"rewards/margins": 9.4375, |
|
"rewards/rejected": -17.5, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.7320774463631605, |
|
"grad_norm": 7.1196364277612165, |
|
"learning_rate": 2.6850570737460916e-06, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -482.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.125, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -16.875, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.7373103087388801, |
|
"grad_norm": 2.680324074975716, |
|
"learning_rate": 2.5830236628373363e-06, |
|
"logits/chosen": -13.1875, |
|
"logits/rejected": -13.125, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -18.0, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.7425431711145998, |
|
"grad_norm": 2.2573623813902244, |
|
"learning_rate": 2.482860974588755e-06, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 8.9375, |
|
"rewards/rejected": -18.125, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.7477760334903192, |
|
"grad_norm": 2.3988175015656794, |
|
"learning_rate": 2.3845773676927863e-06, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -644.0, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -17.75, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.7530088958660386, |
|
"grad_norm": 2.499191138157377, |
|
"learning_rate": 2.288181044030341e-06, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -454.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.1102, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.1875, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -17.125, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.7582417582417582, |
|
"grad_norm": 1.7297577696934656, |
|
"learning_rate": 2.193680047986385e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.3125, |
|
"rewards/margins": 7.375, |
|
"rewards/rejected": -16.625, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.7634746206174778, |
|
"grad_norm": 1.3844986076148504, |
|
"learning_rate": 2.1010822657785673e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.1875, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -16.75, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.7687074829931972, |
|
"grad_norm": 2.5009715621795063, |
|
"learning_rate": 2.0103954247991525e-06, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -488.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 8.6875, |
|
"rewards/rejected": -17.25, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.7739403453689166, |
|
"grad_norm": 1.8945829950156903, |
|
"learning_rate": 1.9216270929701407e-06, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -474.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 8.0625, |
|
"rewards/rejected": -16.75, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.7791732077446363, |
|
"grad_norm": 5.069862077253051, |
|
"learning_rate": 1.8347846781117201e-06, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -17.875, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.784406070120356, |
|
"grad_norm": 1.3102672583456336, |
|
"learning_rate": 1.7498754273240713e-06, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.375, |
|
"rewards/margins": 9.5, |
|
"rewards/rejected": -17.875, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.7896389324960753, |
|
"grad_norm": 5.748063923557189, |
|
"learning_rate": 1.6669064263826028e-06, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 8.125, |
|
"rewards/rejected": -17.0, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.7948717948717947, |
|
"grad_norm": 4.829587618568215, |
|
"learning_rate": 1.5858845991466088e-06, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -472.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 6.65625, |
|
"rewards/rejected": -15.875, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.8001046572475143, |
|
"grad_norm": 0.92994359502391, |
|
"learning_rate": 1.5068167069814926e-06, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -692.0, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.3125, |
|
"rewards/margins": 9.9375, |
|
"rewards/rejected": -18.25, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.805337519623234, |
|
"grad_norm": 2.1999437018442163, |
|
"learning_rate": 1.4297093481945106e-06, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -608.0, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 7.59375, |
|
"rewards/rejected": -16.75, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.8105703819989536, |
|
"grad_norm": 4.960781699983769, |
|
"learning_rate": 1.3545689574841342e-06, |
|
"logits/chosen": -13.125, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.0625, |
|
"rewards/margins": 8.3125, |
|
"rewards/rejected": -17.375, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.815803244374673, |
|
"grad_norm": 3.66785793431449, |
|
"learning_rate": 1.2814018054030623e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 8.8125, |
|
"rewards/rejected": -17.75, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.8210361067503924, |
|
"grad_norm": 4.803515943576416, |
|
"learning_rate": 1.2102139978349497e-06, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -692.0, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -18.625, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.826268969126112, |
|
"grad_norm": 1.3014751486404437, |
|
"learning_rate": 1.14101147548486e-06, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.5625, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -17.875, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 7.2868142451084, |
|
"learning_rate": 1.0738000133834969e-06, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.75, |
|
"rewards/margins": 7.0625, |
|
"rewards/rejected": -16.75, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.836734693877551, |
|
"grad_norm": 2.182109722434071, |
|
"learning_rate": 1.008585220405278e-06, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 6.96875, |
|
"rewards/rejected": -16.5, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.8419675562532705, |
|
"grad_norm": 3.0642067374107125, |
|
"learning_rate": 9.453725388002821e-07, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -13.0, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.1071, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.0, |
|
"rewards/margins": 10.0, |
|
"rewards/rejected": -19.0, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.84720041862899, |
|
"grad_norm": 2.211282968905268, |
|
"learning_rate": 8.841672437400528e-07, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -644.0, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 8.25, |
|
"rewards/rejected": -17.125, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8524332810047097, |
|
"grad_norm": 2.306667763441843, |
|
"learning_rate": 8.249744428774103e-07, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 9.125, |
|
"rewards/rejected": -17.875, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.8576661433804291, |
|
"grad_norm": 5.435125332604937, |
|
"learning_rate": 7.677990759202086e-07, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -708.0, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 9.375, |
|
"rewards/rejected": -18.25, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.8628990057561485, |
|
"grad_norm": 3.3348576248427326, |
|
"learning_rate": 7.126459142190844e-07, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -688.0, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -18.125, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.8681318681318682, |
|
"grad_norm": 2.5808442085804644, |
|
"learning_rate": 6.595195603693205e-07, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -488.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 7.1875, |
|
"rewards/rejected": -15.9375, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.8733647305075878, |
|
"grad_norm": 2.7672202913255366, |
|
"learning_rate": 6.084244478267248e-07, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.1047, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 8.5, |
|
"rewards/rejected": -17.25, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.8785975928833072, |
|
"grad_norm": 2.02922718371571, |
|
"learning_rate": 5.593648405376711e-07, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.0819, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -18.5, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.8838304552590266, |
|
"grad_norm": 2.046652559740739, |
|
"learning_rate": 5.123448325832475e-07, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -472.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.1085, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.1875, |
|
"rewards/margins": 7.53125, |
|
"rewards/rejected": -16.75, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8890633176347462, |
|
"grad_norm": 5.589958818337675, |
|
"learning_rate": 4.6736834783762397e-07, |
|
"logits/chosen": -12.5, |
|
"logits/rejected": -12.0625, |
|
"logps/chosen": -484.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.1086, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 8.0625, |
|
"rewards/rejected": -17.25, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.8942961800104658, |
|
"grad_norm": 2.1397763501401443, |
|
"learning_rate": 4.24439139640595e-07, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 7.75, |
|
"rewards/rejected": -16.5, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.8995290423861853, |
|
"grad_norm": 4.234807002383162, |
|
"learning_rate": 3.835607904843358e-07, |
|
"logits/chosen": -13.25, |
|
"logits/rejected": -12.875, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 7.4375, |
|
"rewards/rejected": -17.125, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 5.447862432508138, |
|
"learning_rate": 3.4473671171447174e-07, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -488.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.5625, |
|
"rewards/margins": 8.875, |
|
"rewards/rejected": -17.375, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.9099947671376243, |
|
"grad_norm": 3.7446251130949766, |
|
"learning_rate": 3.079701432453841e-07, |
|
"logits/chosen": -12.6875, |
|
"logits/rejected": -12.375, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.9375, |
|
"rewards/margins": 8.5625, |
|
"rewards/rejected": -17.5, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.915227629513344, |
|
"grad_norm": 3.968745889768715, |
|
"learning_rate": 2.7326415328982056e-07, |
|
"logits/chosen": -12.75, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -10.125, |
|
"rewards/margins": 6.875, |
|
"rewards/rejected": -17.0, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9204604918890633, |
|
"grad_norm": 4.006875799984734, |
|
"learning_rate": 2.4062163810288365e-07, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.6875, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -17.25, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.9256933542647827, |
|
"grad_norm": 5.953542104090957, |
|
"learning_rate": 2.100453217402959e-07, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.5625, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.625, |
|
"rewards/margins": 8.125, |
|
"rewards/rejected": -17.75, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.9309262166405023, |
|
"grad_norm": 5.239490240155921, |
|
"learning_rate": 1.8153775583110156e-07, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -458.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -16.625, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.936159079016222, |
|
"grad_norm": 4.377024347096592, |
|
"learning_rate": 1.5510131936472273e-07, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.875, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -18.25, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.9413919413919414, |
|
"grad_norm": 1.5159821172630095, |
|
"learning_rate": 1.307382184924266e-07, |
|
"logits/chosen": -13.375, |
|
"logits/rejected": -12.8125, |
|
"logps/chosen": -478.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.25, |
|
"rewards/margins": 7.40625, |
|
"rewards/rejected": -16.625, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.9466248037676608, |
|
"grad_norm": 4.469755228752353, |
|
"learning_rate": 1.0845048634321731e-07, |
|
"logits/chosen": -12.5625, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.8125, |
|
"rewards/margins": 7.78125, |
|
"rewards/rejected": -16.625, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.9518576661433804, |
|
"grad_norm": 3.3123314260717414, |
|
"learning_rate": 8.823998285418522e-08, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.5625, |
|
"rewards/margins": 9.125, |
|
"rewards/rejected": -17.75, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.9570905285191, |
|
"grad_norm": 2.731361823113672, |
|
"learning_rate": 7.010839461526752e-08, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.5, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.0, |
|
"rewards/margins": 10.4375, |
|
"rewards/rejected": -18.375, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.9623233908948194, |
|
"grad_norm": 3.243299191382827, |
|
"learning_rate": 5.4057234728521756e-08, |
|
"logits/chosen": -12.9375, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -498.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.625, |
|
"rewards/margins": 8.1875, |
|
"rewards/rejected": -16.75, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9675562532705388, |
|
"grad_norm": 5.643560413836883, |
|
"learning_rate": 4.0087842681846286e-08, |
|
"logits/chosen": -12.25, |
|
"logits/rejected": -11.9375, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.1314, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.4375, |
|
"rewards/margins": 7.6875, |
|
"rewards/rejected": -17.125, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.9727891156462585, |
|
"grad_norm": 1.3002574150876818, |
|
"learning_rate": 2.820138423720309e-08, |
|
"logits/chosen": -13.0, |
|
"logits/rejected": -12.625, |
|
"logps/chosen": -476.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 7.78125, |
|
"rewards/rejected": -16.875, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.978021978021978, |
|
"grad_norm": 5.260548583290011, |
|
"learning_rate": 1.839885133332053e-08, |
|
"logits/chosen": -12.8125, |
|
"logits/rejected": -12.25, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -696.0, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.125, |
|
"rewards/margins": 9.875, |
|
"rewards/rejected": -19.0, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.9832548403976975, |
|
"grad_norm": 5.112452952032154, |
|
"learning_rate": 1.0681062002940167e-08, |
|
"logits/chosen": -13.0625, |
|
"logits/rejected": -12.75, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -684.0, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.4375, |
|
"rewards/margins": 9.5, |
|
"rewards/rejected": -18.0, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.988487702773417, |
|
"grad_norm": 0.9856562812739469, |
|
"learning_rate": 5.048660304524111e-09, |
|
"logits/chosen": -12.875, |
|
"logits/rejected": -12.4375, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1136, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -9.5, |
|
"rewards/margins": 8.8125, |
|
"rewards/rejected": -18.25, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.9937205651491365, |
|
"grad_norm": 1.084985961639873, |
|
"learning_rate": 1.502116268523035e-09, |
|
"logits/chosen": -13.3125, |
|
"logits/rejected": -13.0625, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.75, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -17.75, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.9989534275248562, |
|
"grad_norm": 1.7332241383915472, |
|
"learning_rate": 4.172585814643526e-11, |
|
"logits/chosen": -13.4375, |
|
"logits/rejected": -12.9375, |
|
"logps/chosen": -508.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.0506, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.5625, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -17.625, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -13.3125, |
|
"eval_logits/rejected": -13.0625, |
|
"eval_logps/chosen": -556.0, |
|
"eval_logps/rejected": -564.0, |
|
"eval_loss": 0.7793359160423279, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -11.75, |
|
"eval_rewards/margins": 1.9140625, |
|
"eval_rewards/rejected": -13.6875, |
|
"eval_runtime": 46.77, |
|
"eval_samples_per_second": 42.762, |
|
"eval_steps_per_second": 0.684, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3822, |
|
"total_flos": 0.0, |
|
"train_loss": 0.41369995401518184, |
|
"train_runtime": 7266.4225, |
|
"train_samples_per_second": 16.826, |
|
"train_steps_per_second": 0.526 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3822, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|