|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005234231876472127, |
|
"grad_norm": 7.5491774607562485, |
|
"learning_rate": 2.617801047120419e-09, |
|
"logits/chosen": 5773.244140625, |
|
"logits/rejected": 4887.3955078125, |
|
"logps/chosen": -261.77630615234375, |
|
"logps/rejected": -134.50271606445312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 7.564045160748545, |
|
"learning_rate": 2.6178010471204188e-08, |
|
"logits/chosen": 4445.29443359375, |
|
"logits/rejected": 4136.89404296875, |
|
"logps/chosen": -199.90216064453125, |
|
"logps/rejected": -178.72950744628906, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.0001119289590860717, |
|
"rewards/margins": 0.000557027175091207, |
|
"rewards/rejected": -0.0004450982087291777, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 7.04613658824832, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": 6441.7216796875, |
|
"logits/rejected": 5833.8310546875, |
|
"logps/chosen": -267.2023010253906, |
|
"logps/rejected": -242.09786987304688, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0004725625622086227, |
|
"rewards/margins": -0.0009369999170303345, |
|
"rewards/rejected": 0.00046443723840638995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 7.050014404404103, |
|
"learning_rate": 7.853403141361257e-08, |
|
"logits/chosen": 6073.69384765625, |
|
"logits/rejected": 4584.10400390625, |
|
"logps/chosen": -242.3122100830078, |
|
"logps/rejected": -186.73757934570312, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0008681340259499848, |
|
"rewards/margins": -0.0006206175312399864, |
|
"rewards/rejected": -0.0002475165529176593, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 7.0094537847752, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": 6178.7880859375, |
|
"logits/rejected": 5119.3330078125, |
|
"logps/chosen": -267.6510925292969, |
|
"logps/rejected": -238.3938446044922, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 5.8413388615008444e-05, |
|
"rewards/margins": 0.0008872878970578313, |
|
"rewards/rejected": -0.0008288744720630348, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 6.498624484675514, |
|
"learning_rate": 1.3089005235602092e-07, |
|
"logits/chosen": 5807.2255859375, |
|
"logits/rejected": 4976.87890625, |
|
"logps/chosen": -232.0266571044922, |
|
"logps/rejected": -215.0687255859375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -6.710218440275639e-05, |
|
"rewards/margins": 0.0002581426524557173, |
|
"rewards/rejected": -0.00032524490961804986, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 6.354896668199181, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": 5920.17041015625, |
|
"logits/rejected": 4380.2998046875, |
|
"logps/chosen": -276.4042053222656, |
|
"logps/rejected": -198.1670684814453, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0011509377509355545, |
|
"rewards/margins": 0.0029835705645382404, |
|
"rewards/rejected": -0.0018326330464333296, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 7.188225691003244, |
|
"learning_rate": 1.8324607329842932e-07, |
|
"logits/chosen": 5793.0302734375, |
|
"logits/rejected": 5064.73046875, |
|
"logps/chosen": -241.7870330810547, |
|
"logps/rejected": -217.55068969726562, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0016902139177545905, |
|
"rewards/margins": 0.005393642000854015, |
|
"rewards/rejected": -0.0037034284323453903, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 6.885409466782051, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": 5731.5439453125, |
|
"logits/rejected": 4790.80517578125, |
|
"logps/chosen": -230.2675018310547, |
|
"logps/rejected": -203.81747436523438, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0019947488326579332, |
|
"rewards/margins": 0.0073792897164821625, |
|
"rewards/rejected": -0.005384541116654873, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 7.01483850364403, |
|
"learning_rate": 2.356020942408377e-07, |
|
"logits/chosen": 6064.4345703125, |
|
"logits/rejected": 5340.29443359375, |
|
"logps/chosen": -245.2501983642578, |
|
"logps/rejected": -234.0878143310547, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0002847136929631233, |
|
"rewards/margins": 0.00501064071431756, |
|
"rewards/rejected": -0.0052953544072806835, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 6.584750614575209, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": 5483.78662109375, |
|
"logits/rejected": 4830.17626953125, |
|
"logps/chosen": -195.8482208251953, |
|
"logps/rejected": -172.69119262695312, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.006601253990083933, |
|
"rewards/margins": 0.006475942675024271, |
|
"rewards/rejected": -0.013077196665108204, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 7.00116071266525, |
|
"learning_rate": 2.879581151832461e-07, |
|
"logits/chosen": 4919.4482421875, |
|
"logits/rejected": 3946.84765625, |
|
"logps/chosen": -207.5120086669922, |
|
"logps/rejected": -149.10848999023438, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0063446699641644955, |
|
"rewards/margins": 0.012786591425538063, |
|
"rewards/rejected": -0.019131261855363846, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 6.875094615901205, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": 6150.2900390625, |
|
"logits/rejected": 5531.5439453125, |
|
"logps/chosen": -241.3804473876953, |
|
"logps/rejected": -234.3568572998047, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.007997828535735607, |
|
"rewards/margins": 0.03657924011349678, |
|
"rewards/rejected": -0.044577065855264664, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 7.22615793159286, |
|
"learning_rate": 3.4031413612565446e-07, |
|
"logits/chosen": 6236.9755859375, |
|
"logits/rejected": 4412.3017578125, |
|
"logps/chosen": -223.0286865234375, |
|
"logps/rejected": -177.5249786376953, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0182146318256855, |
|
"rewards/margins": 0.040880750864744186, |
|
"rewards/rejected": -0.059095390141010284, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 7.647819285658808, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": 5931.47900390625, |
|
"logits/rejected": 5780.89208984375, |
|
"logps/chosen": -238.3067169189453, |
|
"logps/rejected": -247.47079467773438, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05609896779060364, |
|
"rewards/margins": 0.04913746565580368, |
|
"rewards/rejected": -0.10523643344640732, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 8.236442048395077, |
|
"learning_rate": 3.926701570680628e-07, |
|
"logits/chosen": 5606.55029296875, |
|
"logits/rejected": 5088.86279296875, |
|
"logps/chosen": -234.2759246826172, |
|
"logps/rejected": -225.5093994140625, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.061849020421504974, |
|
"rewards/margins": 0.0713229849934578, |
|
"rewards/rejected": -0.13317202031612396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 7.993800474590215, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": 5549.6689453125, |
|
"logits/rejected": 4999.32763671875, |
|
"logps/chosen": -210.8323211669922, |
|
"logps/rejected": -230.56655883789062, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.14567852020263672, |
|
"rewards/margins": 0.10253773629665375, |
|
"rewards/rejected": -0.24821624159812927, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 8.807660704706082, |
|
"learning_rate": 4.450261780104712e-07, |
|
"logits/chosen": 6826.31787109375, |
|
"logits/rejected": 5490.9287109375, |
|
"logps/chosen": -267.2113952636719, |
|
"logps/rejected": -253.62295532226562, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.28720229864120483, |
|
"rewards/margins": 0.1500168889760971, |
|
"rewards/rejected": -0.4372192323207855, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 13.018768437683475, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": 6161.29736328125, |
|
"logits/rejected": 4387.1025390625, |
|
"logps/chosen": -280.9503479003906, |
|
"logps/rejected": -251.7024383544922, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4883364737033844, |
|
"rewards/margins": 0.13436347246170044, |
|
"rewards/rejected": -0.6226999163627625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 12.166316451485214, |
|
"learning_rate": 4.973821989528796e-07, |
|
"logits/chosen": 5830.9501953125, |
|
"logits/rejected": 5651.06298828125, |
|
"logps/chosen": -257.42633056640625, |
|
"logps/rejected": -298.8231506347656, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.46903976798057556, |
|
"rewards/margins": 0.2048400640487671, |
|
"rewards/rejected": -0.6738797426223755, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 10.296880781028285, |
|
"learning_rate": 4.999661831436498e-07, |
|
"logits/chosen": 5897.57373046875, |
|
"logits/rejected": 5823.5986328125, |
|
"logps/chosen": -264.2397155761719, |
|
"logps/rejected": -303.2627868652344, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4873962998390198, |
|
"rewards/margins": 0.25847315788269043, |
|
"rewards/rejected": -0.7458693981170654, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 12.312533931256393, |
|
"learning_rate": 4.998492971140339e-07, |
|
"logits/chosen": 5829.45654296875, |
|
"logits/rejected": 5781.94775390625, |
|
"logps/chosen": -262.94244384765625, |
|
"logps/rejected": -321.5575866699219, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5211669206619263, |
|
"rewards/margins": 0.3335101306438446, |
|
"rewards/rejected": -0.8546770215034485, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 11.413061792372044, |
|
"learning_rate": 4.996489634487865e-07, |
|
"logits/chosen": 5954.07958984375, |
|
"logits/rejected": 5074.4462890625, |
|
"logps/chosen": -295.57037353515625, |
|
"logps/rejected": -291.2997131347656, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.687902569770813, |
|
"rewards/margins": 0.26726865768432617, |
|
"rewards/rejected": -0.9551712870597839, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 16.092022253534562, |
|
"learning_rate": 4.993652490577246e-07, |
|
"logits/chosen": 6523.6455078125, |
|
"logits/rejected": 5203.65869140625, |
|
"logps/chosen": -303.7278137207031, |
|
"logps/rejected": -307.8695983886719, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7638736367225647, |
|
"rewards/margins": 0.3057602047920227, |
|
"rewards/rejected": -1.0696338415145874, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 10.894941993110562, |
|
"learning_rate": 4.9899824869915e-07, |
|
"logits/chosen": 5843.22705078125, |
|
"logits/rejected": 4340.3564453125, |
|
"logps/chosen": -299.8017578125, |
|
"logps/rejected": -266.58160400390625, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.778353214263916, |
|
"rewards/margins": 0.2908143997192383, |
|
"rewards/rejected": -1.0691677331924438, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 15.436510071051824, |
|
"learning_rate": 4.985480849482012e-07, |
|
"logits/chosen": 5789.1865234375, |
|
"logits/rejected": 5862.6337890625, |
|
"logps/chosen": -273.215087890625, |
|
"logps/rejected": -316.2986755371094, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.694969654083252, |
|
"rewards/margins": 0.2356947660446167, |
|
"rewards/rejected": -0.9306643605232239, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 16.967835475128144, |
|
"learning_rate": 4.980149081559142e-07, |
|
"logits/chosen": 6428.578125, |
|
"logits/rejected": 6090.5703125, |
|
"logps/chosen": -351.8347473144531, |
|
"logps/rejected": -366.26715087890625, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9397789239883423, |
|
"rewards/margins": 0.3180678188800812, |
|
"rewards/rejected": -1.2578465938568115, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 20.655525821311087, |
|
"learning_rate": 4.973988963990065e-07, |
|
"logits/chosen": 5191.80419921875, |
|
"logits/rejected": 4412.33642578125, |
|
"logps/chosen": -310.77447509765625, |
|
"logps/rejected": -351.3142395019531, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0443050861358643, |
|
"rewards/margins": 0.456368625164032, |
|
"rewards/rejected": -1.500673532485962, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 16.53683127766641, |
|
"learning_rate": 4.967002554204008e-07, |
|
"logits/chosen": 5606.6220703125, |
|
"logits/rejected": 4663.47998046875, |
|
"logps/chosen": -362.4611511230469, |
|
"logps/rejected": -385.1017761230469, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3439080715179443, |
|
"rewards/margins": 0.5687575936317444, |
|
"rewards/rejected": -1.9126653671264648, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 13.731548773970651, |
|
"learning_rate": 4.959192185605087e-07, |
|
"logits/chosen": 5860.9970703125, |
|
"logits/rejected": 5171.845703125, |
|
"logps/chosen": -345.3323974609375, |
|
"logps/rejected": -396.91387939453125, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2838389873504639, |
|
"rewards/margins": 0.4448428153991699, |
|
"rewards/rejected": -1.7286819219589233, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 15.516769429678961, |
|
"learning_rate": 4.950560466792969e-07, |
|
"logits/chosen": 6540.11181640625, |
|
"logits/rejected": 5237.14306640625, |
|
"logps/chosen": -370.7175598144531, |
|
"logps/rejected": -381.68731689453125, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0437076091766357, |
|
"rewards/margins": 0.41619840264320374, |
|
"rewards/rejected": -1.4599062204360962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 15.23495566455289, |
|
"learning_rate": 4.941110280691619e-07, |
|
"logits/chosen": 5895.0712890625, |
|
"logits/rejected": 4663.57666015625, |
|
"logps/chosen": -328.5111999511719, |
|
"logps/rejected": -317.84136962890625, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9885784983634949, |
|
"rewards/margins": 0.467812716960907, |
|
"rewards/rejected": -1.4563910961151123, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 12.994410953517146, |
|
"learning_rate": 4.930844783586424e-07, |
|
"logits/chosen": 5147.50830078125, |
|
"logits/rejected": 4891.75927734375, |
|
"logps/chosen": -270.1437072753906, |
|
"logps/rejected": -316.5980529785156, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0208574533462524, |
|
"rewards/margins": 0.3713577687740326, |
|
"rewards/rejected": -1.392215371131897, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 25.668033482423173, |
|
"learning_rate": 4.919767404070033e-07, |
|
"logits/chosen": 6307.4296875, |
|
"logits/rejected": 5151.60400390625, |
|
"logps/chosen": -341.2019958496094, |
|
"logps/rejected": -356.7355651855469, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1538581848144531, |
|
"rewards/margins": 0.4713706970214844, |
|
"rewards/rejected": -1.6252288818359375, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 18.566603418251706, |
|
"learning_rate": 4.907881841897216e-07, |
|
"logits/chosen": 5456.0732421875, |
|
"logits/rejected": 5621.28564453125, |
|
"logps/chosen": -366.95880126953125, |
|
"logps/rejected": -429.9764709472656, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5142645835876465, |
|
"rewards/margins": 0.40540844202041626, |
|
"rewards/rejected": -1.919672966003418, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 15.467065391000633, |
|
"learning_rate": 4.895192066749189e-07, |
|
"logits/chosen": 5902.5888671875, |
|
"logits/rejected": 4471.02490234375, |
|
"logps/chosen": -372.2309265136719, |
|
"logps/rejected": -398.52490234375, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5484896898269653, |
|
"rewards/margins": 0.45622071623802185, |
|
"rewards/rejected": -2.0047104358673096, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 15.119783236904505, |
|
"learning_rate": 4.881702316907768e-07, |
|
"logits/chosen": 6141.3212890625, |
|
"logits/rejected": 4610.8212890625, |
|
"logps/chosen": -334.36376953125, |
|
"logps/rejected": -341.06304931640625, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1254819631576538, |
|
"rewards/margins": 0.5175460577011108, |
|
"rewards/rejected": -1.6430280208587646, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 16.916135709316627, |
|
"learning_rate": 4.86741709783982e-07, |
|
"logits/chosen": 5536.07177734375, |
|
"logits/rejected": 4676.4970703125, |
|
"logps/chosen": -308.6365661621094, |
|
"logps/rejected": -361.42022705078125, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0314075946807861, |
|
"rewards/margins": 0.6450502276420593, |
|
"rewards/rejected": -1.6764577627182007, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 20.375718209590385, |
|
"learning_rate": 4.85234118069247e-07, |
|
"logits/chosen": 6313.5400390625, |
|
"logits/rejected": 5581.75537109375, |
|
"logps/chosen": -365.587646484375, |
|
"logps/rejected": -383.8091735839844, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2571805715560913, |
|
"rewards/margins": 0.49333277344703674, |
|
"rewards/rejected": -1.7505133152008057, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 22.004393446801256, |
|
"learning_rate": 4.836479600699578e-07, |
|
"logits/chosen": 5796.1845703125, |
|
"logits/rejected": 5391.08056640625, |
|
"logps/chosen": -358.70281982421875, |
|
"logps/rejected": -422.412841796875, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4709709882736206, |
|
"rewards/margins": 0.5307806730270386, |
|
"rewards/rejected": -2.0017518997192383, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 16.72031008823946, |
|
"learning_rate": 4.819837655500013e-07, |
|
"logits/chosen": 6321.2421875, |
|
"logits/rejected": 6179.9267578125, |
|
"logps/chosen": -391.6398620605469, |
|
"logps/rejected": -447.68701171875, |
|
"loss": 0.6263, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5102037191390991, |
|
"rewards/margins": 0.5057711601257324, |
|
"rewards/rejected": -2.015974998474121, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 13.254253162407238, |
|
"learning_rate": 4.802420903368285e-07, |
|
"logits/chosen": 5838.13427734375, |
|
"logits/rejected": 4767.97265625, |
|
"logps/chosen": -323.6955871582031, |
|
"logps/rejected": -403.03204345703125, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3097789287567139, |
|
"rewards/margins": 0.8338877558708191, |
|
"rewards/rejected": -2.1436662673950195, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 14.878076929512742, |
|
"learning_rate": 4.784235161358123e-07, |
|
"logits/chosen": 6580.14453125, |
|
"logits/rejected": 5022.2802734375, |
|
"logps/chosen": -370.36663818359375, |
|
"logps/rejected": -406.0109558105469, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3005058765411377, |
|
"rewards/margins": 0.645524263381958, |
|
"rewards/rejected": -1.9460302591323853, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 20.06439838050598, |
|
"learning_rate": 4.7652865033596314e-07, |
|
"logits/chosen": 6275.22607421875, |
|
"logits/rejected": 5113.31591796875, |
|
"logps/chosen": -382.3496398925781, |
|
"logps/rejected": -440.8421936035156, |
|
"loss": 0.6318, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6066843271255493, |
|
"rewards/margins": 0.5545600652694702, |
|
"rewards/rejected": -2.1612443923950195, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 22.120777825162968, |
|
"learning_rate": 4.7455812580706534e-07, |
|
"logits/chosen": 5785.953125, |
|
"logits/rejected": 4642.66162109375, |
|
"logps/chosen": -327.7315673828125, |
|
"logps/rejected": -375.60174560546875, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1555116176605225, |
|
"rewards/margins": 0.5638757944107056, |
|
"rewards/rejected": -1.719387412071228, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 13.51190093535208, |
|
"learning_rate": 4.725126006883046e-07, |
|
"logits/chosen": 5409.0078125, |
|
"logits/rejected": 5192.5322265625, |
|
"logps/chosen": -322.37652587890625, |
|
"logps/rejected": -383.2165832519531, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1335276365280151, |
|
"rewards/margins": 0.5543726682662964, |
|
"rewards/rejected": -1.687900185585022, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 15.29005551288156, |
|
"learning_rate": 4.703927581684539e-07, |
|
"logits/chosen": 5768.34326171875, |
|
"logits/rejected": 5688.51318359375, |
|
"logps/chosen": -342.89410400390625, |
|
"logps/rejected": -355.6271667480469, |
|
"loss": 0.6524, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.247072458267212, |
|
"rewards/margins": 0.38124534487724304, |
|
"rewards/rejected": -1.6283178329467773, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 14.004434288132737, |
|
"learning_rate": 4.68199306257695e-07, |
|
"logits/chosen": 5412.37744140625, |
|
"logits/rejected": 4303.890625, |
|
"logps/chosen": -360.8803405761719, |
|
"logps/rejected": -420.22076416015625, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.478992223739624, |
|
"rewards/margins": 0.6786683797836304, |
|
"rewards/rejected": -2.157660722732544, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 20.211543807599117, |
|
"learning_rate": 4.6593297755114776e-07, |
|
"logits/chosen": 6246.66943359375, |
|
"logits/rejected": 5820.33935546875, |
|
"logps/chosen": -369.6717834472656, |
|
"logps/rejected": -455.38494873046875, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.606078863143921, |
|
"rewards/margins": 0.5704205632209778, |
|
"rewards/rejected": -2.176499605178833, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 12.654030981602599, |
|
"learning_rate": 4.635945289841902e-07, |
|
"logits/chosen": 4824.7998046875, |
|
"logits/rejected": 4868.42724609375, |
|
"logps/chosen": -301.3868713378906, |
|
"logps/rejected": -385.3939208984375, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.299076795578003, |
|
"rewards/margins": 0.41370564699172974, |
|
"rewards/rejected": -1.7127822637557983, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 21.014153020532053, |
|
"learning_rate": 4.611847415796476e-07, |
|
"logits/chosen": 6195.263671875, |
|
"logits/rejected": 5270.9248046875, |
|
"logps/chosen": -342.86016845703125, |
|
"logps/rejected": -348.72308349609375, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.059452772140503, |
|
"rewards/margins": 0.3982711434364319, |
|
"rewards/rejected": -1.4577242136001587, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 15.629527805404802, |
|
"learning_rate": 4.5870442018693773e-07, |
|
"logits/chosen": 5918.3779296875, |
|
"logits/rejected": 5355.09912109375, |
|
"logps/chosen": -324.29803466796875, |
|
"logps/rejected": -372.2521667480469, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0098707675933838, |
|
"rewards/margins": 0.4723685681819916, |
|
"rewards/rejected": -1.4822394847869873, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 21.676809757975366, |
|
"learning_rate": 4.5615439321325735e-07, |
|
"logits/chosen": 6207.53173828125, |
|
"logits/rejected": 4946.9072265625, |
|
"logps/chosen": -332.4702453613281, |
|
"logps/rejected": -391.6280212402344, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.026963472366333, |
|
"rewards/margins": 0.6531401872634888, |
|
"rewards/rejected": -1.6801038980484009, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 23.79952337893574, |
|
"learning_rate": 4.535355123469008e-07, |
|
"logits/chosen": 5684.533203125, |
|
"logits/rejected": 5139.0107421875, |
|
"logps/chosen": -371.2861022949219, |
|
"logps/rejected": -437.2891540527344, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5409961938858032, |
|
"rewards/margins": 0.7230764627456665, |
|
"rewards/rejected": -2.2640726566314697, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 18.16354981413204, |
|
"learning_rate": 4.5084865227280366e-07, |
|
"logits/chosen": 5638.453125, |
|
"logits/rejected": 5075.7314453125, |
|
"logps/chosen": -398.3193054199219, |
|
"logps/rejected": -441.16033935546875, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6414705514907837, |
|
"rewards/margins": 0.6848443746566772, |
|
"rewards/rejected": -2.326314687728882, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 26.021483127779707, |
|
"learning_rate": 4.4809471038040437e-07, |
|
"logits/chosen": 5500.9501953125, |
|
"logits/rejected": 4291.2802734375, |
|
"logps/chosen": -389.2489013671875, |
|
"logps/rejected": -409.811279296875, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5569204092025757, |
|
"rewards/margins": 0.7008808851242065, |
|
"rewards/rejected": -2.2578012943267822, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 15.956576081472086, |
|
"learning_rate": 4.4527460646392386e-07, |
|
"logits/chosen": 5543.23193359375, |
|
"logits/rejected": 5107.40625, |
|
"logps/chosen": -328.09698486328125, |
|
"logps/rejected": -381.325439453125, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3435633182525635, |
|
"rewards/margins": 0.45007848739624023, |
|
"rewards/rejected": -1.7936416864395142, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 13.093007587120157, |
|
"learning_rate": 4.4238928241516163e-07, |
|
"logits/chosen": 6740.7314453125, |
|
"logits/rejected": 5075.4892578125, |
|
"logps/chosen": -383.84674072265625, |
|
"logps/rejected": -408.04046630859375, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2751758098602295, |
|
"rewards/margins": 0.8238226175308228, |
|
"rewards/rejected": -2.0989983081817627, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 24.06019117727656, |
|
"learning_rate": 4.394397019089116e-07, |
|
"logits/chosen": 5973.04150390625, |
|
"logits/rejected": 4739.271484375, |
|
"logps/chosen": -371.7142028808594, |
|
"logps/rejected": -389.0022888183594, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3149608373641968, |
|
"rewards/margins": 0.5819457173347473, |
|
"rewards/rejected": -1.8969066143035889, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 17.81896374953663, |
|
"learning_rate": 4.3642685008110246e-07, |
|
"logits/chosen": 5682.49365234375, |
|
"logits/rejected": 4360.3330078125, |
|
"logps/chosen": -321.8192138671875, |
|
"logps/rejected": -370.5431823730469, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.199681043624878, |
|
"rewards/margins": 0.7428802251815796, |
|
"rewards/rejected": -1.942561149597168, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 16.935052692220793, |
|
"learning_rate": 4.333517331997704e-07, |
|
"logits/chosen": 6167.5615234375, |
|
"logits/rejected": 5758.603515625, |
|
"logps/chosen": -402.3914794921875, |
|
"logps/rejected": -434.56158447265625, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5500683784484863, |
|
"rewards/margins": 0.46028876304626465, |
|
"rewards/rejected": -2.01035737991333, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 15.773609977818438, |
|
"learning_rate": 4.302153783289736e-07, |
|
"logits/chosen": 5890.45947265625, |
|
"logits/rejected": 4988.90380859375, |
|
"logps/chosen": -399.48944091796875, |
|
"logps/rejected": -501.8160705566406, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8650957345962524, |
|
"rewards/margins": 0.8637407422065735, |
|
"rewards/rejected": -2.7288365364074707, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 20.438404398459674, |
|
"learning_rate": 4.2701883298576124e-07, |
|
"logits/chosen": 5650.4580078125, |
|
"logits/rejected": 5150.5224609375, |
|
"logps/chosen": -462.61883544921875, |
|
"logps/rejected": -513.2371826171875, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.343827724456787, |
|
"rewards/margins": 0.8286565542221069, |
|
"rewards/rejected": -3.1724846363067627, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 28.24293371703605, |
|
"learning_rate": 4.237631647903115e-07, |
|
"logits/chosen": 5648.98046875, |
|
"logits/rejected": 4617.064453125, |
|
"logps/chosen": -411.988525390625, |
|
"logps/rejected": -463.56158447265625, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.997859239578247, |
|
"rewards/margins": 0.6983556747436523, |
|
"rewards/rejected": -2.6962146759033203, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 23.577036886324247, |
|
"learning_rate": 4.204494611093548e-07, |
|
"logits/chosen": 5993.8974609375, |
|
"logits/rejected": 4195.65283203125, |
|
"logps/chosen": -419.8607482910156, |
|
"logps/rejected": -440.91717529296875, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.721379280090332, |
|
"rewards/margins": 0.695422887802124, |
|
"rewards/rejected": -2.416802406311035, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 18.70040237006655, |
|
"learning_rate": 4.1707882869300235e-07, |
|
"logits/chosen": 6020.3857421875, |
|
"logits/rejected": 4892.1318359375, |
|
"logps/chosen": -388.27813720703125, |
|
"logps/rejected": -392.47674560546875, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5754492282867432, |
|
"rewards/margins": 0.5581509470939636, |
|
"rewards/rejected": -2.1335999965667725, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 18.77689044696186, |
|
"learning_rate": 4.136523933051005e-07, |
|
"logits/chosen": 6190.458984375, |
|
"logits/rejected": 5476.84912109375, |
|
"logps/chosen": -394.31134033203125, |
|
"logps/rejected": -425.36248779296875, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6519289016723633, |
|
"rewards/margins": 0.5381680130958557, |
|
"rewards/rejected": -2.190096616744995, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 18.186712218474053, |
|
"learning_rate": 4.101712993472348e-07, |
|
"logits/chosen": 6320.23828125, |
|
"logits/rejected": 5412.2626953125, |
|
"logps/chosen": -394.0950622558594, |
|
"logps/rejected": -413.16644287109375, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6630204916000366, |
|
"rewards/margins": 0.59214186668396, |
|
"rewards/rejected": -2.255162477493286, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 21.426538798598312, |
|
"learning_rate": 4.066367094765091e-07, |
|
"logits/chosen": 5823.1728515625, |
|
"logits/rejected": 4670.80224609375, |
|
"logps/chosen": -417.28515625, |
|
"logps/rejected": -464.26654052734375, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7154357433319092, |
|
"rewards/margins": 0.9158226251602173, |
|
"rewards/rejected": -2.631258487701416, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 19.144193841746027, |
|
"learning_rate": 4.0304980421722766e-07, |
|
"logits/chosen": 5696.5908203125, |
|
"logits/rejected": 5137.9638671875, |
|
"logps/chosen": -425.8158264160156, |
|
"logps/rejected": -490.96624755859375, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8618491888046265, |
|
"rewards/margins": 0.8498145937919617, |
|
"rewards/rejected": -2.7116637229919434, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 28.56372190962352, |
|
"learning_rate": 3.994117815666095e-07, |
|
"logits/chosen": 5727.22607421875, |
|
"logits/rejected": 4252.705078125, |
|
"logps/chosen": -492.46014404296875, |
|
"logps/rejected": -520.4065551757812, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1523029804229736, |
|
"rewards/margins": 0.9564183354377747, |
|
"rewards/rejected": -3.1087214946746826, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 13.063007551794367, |
|
"learning_rate": 3.957238565946671e-07, |
|
"logits/chosen": 5457.42041015625, |
|
"logits/rejected": 4502.88720703125, |
|
"logps/chosen": -379.50506591796875, |
|
"logps/rejected": -405.9420471191406, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.719842553138733, |
|
"rewards/margins": 0.5198991894721985, |
|
"rewards/rejected": -2.239741563796997, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 11.137969578259929, |
|
"learning_rate": 3.9198726103838306e-07, |
|
"logits/chosen": 5491.45947265625, |
|
"logits/rejected": 4884.5771484375, |
|
"logps/chosen": -358.10699462890625, |
|
"logps/rejected": -377.1960754394531, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.378875970840454, |
|
"rewards/margins": 0.5345520377159119, |
|
"rewards/rejected": -1.9134283065795898, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 18.42567249890633, |
|
"learning_rate": 3.8820324289031946e-07, |
|
"logits/chosen": 5650.734375, |
|
"logits/rejected": 4883.583984375, |
|
"logps/chosen": -329.21630859375, |
|
"logps/rejected": -421.2305603027344, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3386439085006714, |
|
"rewards/margins": 0.9097055196762085, |
|
"rewards/rejected": -2.248349666595459, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 21.014679051728024, |
|
"learning_rate": 3.84373065981799e-07, |
|
"logits/chosen": 6379.822265625, |
|
"logits/rejected": 4723.3544921875, |
|
"logps/chosen": -400.08380126953125, |
|
"logps/rejected": -476.69720458984375, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6094900369644165, |
|
"rewards/margins": 1.0389902591705322, |
|
"rewards/rejected": -2.648480176925659, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 25.5783449608529, |
|
"learning_rate": 3.8049800956079545e-07, |
|
"logits/chosen": 5933.28173828125, |
|
"logits/rejected": 5049.6416015625, |
|
"logps/chosen": -450.82745361328125, |
|
"logps/rejected": -519.0262451171875, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1224923133850098, |
|
"rewards/margins": 1.0625412464141846, |
|
"rewards/rejected": -3.1850337982177734, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 16.150618590693583, |
|
"learning_rate": 3.7657936786467525e-07, |
|
"logits/chosen": 5189.0732421875, |
|
"logits/rejected": 4285.34912109375, |
|
"logps/chosen": -424.62255859375, |
|
"logps/rejected": -479.2969665527344, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.2088141441345215, |
|
"rewards/margins": 0.7376548051834106, |
|
"rewards/rejected": -2.9464688301086426, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 15.760084999630747, |
|
"learning_rate": 3.7261844968793226e-07, |
|
"logits/chosen": 4326.27197265625, |
|
"logits/rejected": 4380.33544921875, |
|
"logps/chosen": -372.68756103515625, |
|
"logps/rejected": -481.65313720703125, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9684680700302124, |
|
"rewards/margins": 0.8767637014389038, |
|
"rewards/rejected": -2.8452320098876953, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 18.09652778784993, |
|
"learning_rate": 3.6861657794506187e-07, |
|
"logits/chosen": 4880.94482421875, |
|
"logits/rejected": 4508.5419921875, |
|
"logps/chosen": -407.27587890625, |
|
"logps/rejected": -466.6880798339844, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0973594188690186, |
|
"rewards/margins": 0.6051468253135681, |
|
"rewards/rejected": -2.7025063037872314, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 15.553054502461759, |
|
"learning_rate": 3.6457508922871777e-07, |
|
"logits/chosen": 6180.486328125, |
|
"logits/rejected": 4504.57763671875, |
|
"logps/chosen": -405.5555725097656, |
|
"logps/rejected": -487.57196044921875, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.869215726852417, |
|
"rewards/margins": 1.1324493885040283, |
|
"rewards/rejected": -3.0016651153564453, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 52.02343099220796, |
|
"learning_rate": 3.6049533336330084e-07, |
|
"logits/chosen": 6146.11865234375, |
|
"logits/rejected": 4862.7744140625, |
|
"logps/chosen": -443.3235778808594, |
|
"logps/rejected": -514.3902587890625, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.093003034591675, |
|
"rewards/margins": 1.0282524824142456, |
|
"rewards/rejected": -3.12125563621521, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 25.391701434361387, |
|
"learning_rate": 3.56378672954129e-07, |
|
"logits/chosen": 6351.4970703125, |
|
"logits/rejected": 4460.3125, |
|
"logps/chosen": -440.08294677734375, |
|
"logps/rejected": -489.60321044921875, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8706138134002686, |
|
"rewards/margins": 1.1428322792053223, |
|
"rewards/rejected": -3.01344633102417, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 17.33884318164809, |
|
"learning_rate": 3.5222648293233803e-07, |
|
"logits/chosen": 6334.86279296875, |
|
"logits/rejected": 5818.06591796875, |
|
"logps/chosen": -396.09466552734375, |
|
"logps/rejected": -470.11273193359375, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6134361028671265, |
|
"rewards/margins": 0.7463122606277466, |
|
"rewards/rejected": -2.359748363494873, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 21.34021081433511, |
|
"learning_rate": 3.480401500956657e-07, |
|
"logits/chosen": 5477.52587890625, |
|
"logits/rejected": 4610.40283203125, |
|
"logps/chosen": -352.7813415527344, |
|
"logps/rejected": -410.7137756347656, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.540126085281372, |
|
"rewards/margins": 0.4730333387851715, |
|
"rewards/rejected": -2.0131595134735107, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 17.95258525844177, |
|
"learning_rate": 3.438210726452724e-07, |
|
"logits/chosen": 6387.1103515625, |
|
"logits/rejected": 5639.19580078125, |
|
"logps/chosen": -402.55999755859375, |
|
"logps/rejected": -427.85400390625, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4374101161956787, |
|
"rewards/margins": 0.6155884265899658, |
|
"rewards/rejected": -2.0529983043670654, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 18.9222054407907, |
|
"learning_rate": 3.395706597187538e-07, |
|
"logits/chosen": 4786.2646484375, |
|
"logits/rejected": 4725.2626953125, |
|
"logps/chosen": -342.1614990234375, |
|
"logps/rejected": -403.74755859375, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.560929536819458, |
|
"rewards/margins": 0.6686034202575684, |
|
"rewards/rejected": -2.2295329570770264, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 24.87010650260379, |
|
"learning_rate": 3.3529033091949986e-07, |
|
"logits/chosen": 5798.42724609375, |
|
"logits/rejected": 5365.8623046875, |
|
"logps/chosen": -429.4087829589844, |
|
"logps/rejected": -528.0635375976562, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7890077829360962, |
|
"rewards/margins": 0.9684630632400513, |
|
"rewards/rejected": -2.7574710845947266, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 56.53886775450491, |
|
"learning_rate": 3.309815158425591e-07, |
|
"logits/chosen": 5630.0419921875, |
|
"logits/rejected": 5342.580078125, |
|
"logps/chosen": -417.60888671875, |
|
"logps/rejected": -509.32647705078125, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7594547271728516, |
|
"rewards/margins": 1.0495405197143555, |
|
"rewards/rejected": -2.808995008468628, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 24.277071765568724, |
|
"learning_rate": 3.2664565359716536e-07, |
|
"logits/chosen": 5669.77392578125, |
|
"logits/rejected": 4588.5927734375, |
|
"logps/chosen": -415.36163330078125, |
|
"logps/rejected": -488.67120361328125, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9911209344863892, |
|
"rewards/margins": 1.0688735246658325, |
|
"rewards/rejected": -3.0599944591522217, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 17.534117100677573, |
|
"learning_rate": 3.222841923260869e-07, |
|
"logits/chosen": 5307.109375, |
|
"logits/rejected": 4587.55029296875, |
|
"logps/chosen": -423.51629638671875, |
|
"logps/rejected": -494.17193603515625, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.087824821472168, |
|
"rewards/margins": 0.893652081489563, |
|
"rewards/rejected": -2.9814765453338623, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 20.56698549553084, |
|
"learning_rate": 3.1789858872195887e-07, |
|
"logits/chosen": 6439.45751953125, |
|
"logits/rejected": 5222.29833984375, |
|
"logps/chosen": -458.2245178222656, |
|
"logps/rejected": -531.4591674804688, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.106672525405884, |
|
"rewards/margins": 0.9118589162826538, |
|
"rewards/rejected": -3.018531322479248, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 15.634569986443797, |
|
"learning_rate": 3.1349030754075937e-07, |
|
"logits/chosen": 5356.185546875, |
|
"logits/rejected": 4248.3271484375, |
|
"logps/chosen": -420.09600830078125, |
|
"logps/rejected": -509.48101806640625, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.1424427032470703, |
|
"rewards/margins": 1.1177256107330322, |
|
"rewards/rejected": -3.2601680755615234, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 17.43008538687268, |
|
"learning_rate": 3.090608211125931e-07, |
|
"logits/chosen": 5311.978515625, |
|
"logits/rejected": 4518.35693359375, |
|
"logps/chosen": -421.0234375, |
|
"logps/rejected": -501.09527587890625, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.1865296363830566, |
|
"rewards/margins": 0.9108685255050659, |
|
"rewards/rejected": -3.097398281097412, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 23.081663273096012, |
|
"learning_rate": 3.0461160884994487e-07, |
|
"logits/chosen": 5700.06689453125, |
|
"logits/rejected": 5031.7353515625, |
|
"logps/chosen": -447.28936767578125, |
|
"logps/rejected": -512.2467651367188, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.2586405277252197, |
|
"rewards/margins": 0.7844768762588501, |
|
"rewards/rejected": -3.0431172847747803, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 18.627739282913765, |
|
"learning_rate": 3.001441567535681e-07, |
|
"logits/chosen": 6320.2421875, |
|
"logits/rejected": 5199.8828125, |
|
"logps/chosen": -429.02667236328125, |
|
"logps/rejected": -511.12457275390625, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9009828567504883, |
|
"rewards/margins": 1.0119611024856567, |
|
"rewards/rejected": -2.9129440784454346, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 20.9694437636251, |
|
"learning_rate": 2.956599569161724e-07, |
|
"logits/chosen": 5312.28173828125, |
|
"logits/rejected": 4129.46435546875, |
|
"logps/chosen": -352.3714294433594, |
|
"logps/rejected": -402.3336486816406, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.611181616783142, |
|
"rewards/margins": 0.5900977849960327, |
|
"rewards/rejected": -2.2012791633605957, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 16.66673110491197, |
|
"learning_rate": 2.91160507024077e-07, |
|
"logits/chosen": 5664.244140625, |
|
"logits/rejected": 4732.4833984375, |
|
"logps/chosen": -374.69970703125, |
|
"logps/rejected": -430.1102600097656, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5680463314056396, |
|
"rewards/margins": 0.7437410950660706, |
|
"rewards/rejected": -2.3117871284484863, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 14.965729396145859, |
|
"learning_rate": 2.866473098569953e-07, |
|
"logits/chosen": 5775.98291015625, |
|
"logits/rejected": 4830.63916015625, |
|
"logps/chosen": -399.218017578125, |
|
"logps/rejected": -450.00469970703125, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5897982120513916, |
|
"rewards/margins": 0.7888145446777344, |
|
"rewards/rejected": -2.378612518310547, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 12.518165998557452, |
|
"learning_rate": 2.8212187278611905e-07, |
|
"logits/chosen": 5487.87646484375, |
|
"logits/rejected": 4786.9697265625, |
|
"logps/chosen": -406.44769287109375, |
|
"logps/rejected": -478.30450439453125, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7121471166610718, |
|
"rewards/margins": 0.8904681205749512, |
|
"rewards/rejected": -2.6026151180267334, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 22.061851534247943, |
|
"learning_rate": 2.775857072706684e-07, |
|
"logits/chosen": 5991.2373046875, |
|
"logits/rejected": 4359.41357421875, |
|
"logps/chosen": -416.60516357421875, |
|
"logps/rejected": -461.73016357421875, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.810485601425171, |
|
"rewards/margins": 0.9933170080184937, |
|
"rewards/rejected": -2.803802967071533, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 22.46913725233362, |
|
"learning_rate": 2.7304032835307667e-07, |
|
"logits/chosen": 6123.0048828125, |
|
"logits/rejected": 5400.46240234375, |
|
"logps/chosen": -433.31829833984375, |
|
"logps/rejected": -514.8015747070312, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.062668561935425, |
|
"rewards/margins": 0.59827721118927, |
|
"rewards/rejected": -2.6609461307525635, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 16.396544720613925, |
|
"learning_rate": 2.6848725415297884e-07, |
|
"logits/chosen": 5970.46044921875, |
|
"logits/rejected": 5188.1962890625, |
|
"logps/chosen": -450.0951232910156, |
|
"logps/rejected": -460.515625, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9533536434173584, |
|
"rewards/margins": 0.6516803503036499, |
|
"rewards/rejected": -2.6050338745117188, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 34.998855163224775, |
|
"learning_rate": 2.6392800536017183e-07, |
|
"logits/chosen": 5251.8818359375, |
|
"logits/rejected": 4933.35546875, |
|
"logps/chosen": -433.3590393066406, |
|
"logps/rejected": -494.32366943359375, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9257965087890625, |
|
"rewards/margins": 0.7166542410850525, |
|
"rewards/rejected": -2.6424505710601807, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 20.45554516626394, |
|
"learning_rate": 2.59364104726716e-07, |
|
"logits/chosen": 5809.958984375, |
|
"logits/rejected": 5054.63037109375, |
|
"logps/chosen": -413.60357666015625, |
|
"logps/rejected": -492.5873107910156, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7435877323150635, |
|
"rewards/margins": 0.8188160061836243, |
|
"rewards/rejected": -2.562403678894043, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 25.933977698433374, |
|
"learning_rate": 2.547970765583491e-07, |
|
"logits/chosen": 5483.72412109375, |
|
"logits/rejected": 4852.462890625, |
|
"logps/chosen": -373.3037414550781, |
|
"logps/rejected": -430.94378662109375, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6192424297332764, |
|
"rewards/margins": 0.8005384206771851, |
|
"rewards/rejected": -2.419780969619751, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 18.23336853816008, |
|
"learning_rate": 2.502284462053799e-07, |
|
"logits/chosen": 6024.7958984375, |
|
"logits/rejected": 5882.58740234375, |
|
"logps/chosen": -410.0364685058594, |
|
"logps/rejected": -473.29779052734375, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7705657482147217, |
|
"rewards/margins": 0.7812509536743164, |
|
"rewards/rejected": -2.551816940307617, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 45.486266011389816, |
|
"learning_rate": 2.4565973955323374e-07, |
|
"logits/chosen": 5641.85302734375, |
|
"logits/rejected": 4873.16845703125, |
|
"logps/chosen": -415.40582275390625, |
|
"logps/rejected": -460.23077392578125, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7098748683929443, |
|
"rewards/margins": 0.8872604370117188, |
|
"rewards/rejected": -2.597135305404663, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 23.73611035678335, |
|
"learning_rate": 2.410924825128195e-07, |
|
"logits/chosen": 5291.748046875, |
|
"logits/rejected": 5004.06884765625, |
|
"logps/chosen": -400.042236328125, |
|
"logps/rejected": -488.37744140625, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.792931318283081, |
|
"rewards/margins": 0.8118869662284851, |
|
"rewards/rejected": -2.604818344116211, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 23.703780906245843, |
|
"learning_rate": 2.365282005108875e-07, |
|
"logits/chosen": 5615.40283203125, |
|
"logits/rejected": 4617.5302734375, |
|
"logps/chosen": -391.23028564453125, |
|
"logps/rejected": -494.76531982421875, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8765054941177368, |
|
"rewards/margins": 1.003303050994873, |
|
"rewards/rejected": -2.8798086643218994, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 32.00654280597893, |
|
"learning_rate": 2.319684179805491e-07, |
|
"logits/chosen": 5474.94189453125, |
|
"logits/rejected": 4257.7763671875, |
|
"logps/chosen": -418.8746032714844, |
|
"logps/rejected": -479.42205810546875, |
|
"loss": 0.6239, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8052211999893188, |
|
"rewards/margins": 1.1022889614105225, |
|
"rewards/rejected": -2.907510280609131, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 15.09375460303486, |
|
"learning_rate": 2.2741465785212902e-07, |
|
"logits/chosen": 5132.87255859375, |
|
"logits/rejected": 3877.443359375, |
|
"logps/chosen": -369.39129638671875, |
|
"logps/rejected": -445.2359313964844, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5681183338165283, |
|
"rewards/margins": 1.1039445400238037, |
|
"rewards/rejected": -2.672062635421753, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 15.752950958144131, |
|
"learning_rate": 2.2286844104451843e-07, |
|
"logits/chosen": 5614.02734375, |
|
"logits/rejected": 4852.61962890625, |
|
"logps/chosen": -421.18035888671875, |
|
"logps/rejected": -493.23944091796875, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8397204875946045, |
|
"rewards/margins": 0.82035893201828, |
|
"rewards/rejected": -2.6600797176361084, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 20.061686761620173, |
|
"learning_rate": 2.183312859572008e-07, |
|
"logits/chosen": 6473.8583984375, |
|
"logits/rejected": 5419.43115234375, |
|
"logps/chosen": -412.7747497558594, |
|
"logps/rejected": -464.63446044921875, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6665458679199219, |
|
"rewards/margins": 0.8658057451248169, |
|
"rewards/rejected": -2.53235125541687, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 17.630546844566275, |
|
"learning_rate": 2.138047079631184e-07, |
|
"logits/chosen": 5279.314453125, |
|
"logits/rejected": 5356.86962890625, |
|
"logps/chosen": -409.72161865234375, |
|
"logps/rejected": -491.9193420410156, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9969879388809204, |
|
"rewards/margins": 0.7077668905258179, |
|
"rewards/rejected": -2.70475435256958, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 20.142582983294798, |
|
"learning_rate": 2.0929021890255068e-07, |
|
"logits/chosen": 6199.505859375, |
|
"logits/rejected": 5334.6689453125, |
|
"logps/chosen": -431.4466247558594, |
|
"logps/rejected": -511.4515075683594, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7895443439483643, |
|
"rewards/margins": 0.8201072812080383, |
|
"rewards/rejected": -2.609651803970337, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 19.471822868052573, |
|
"learning_rate": 2.0478932657817102e-07, |
|
"logits/chosen": 5034.8251953125, |
|
"logits/rejected": 4781.177734375, |
|
"logps/chosen": -387.94140625, |
|
"logps/rejected": -474.83636474609375, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.82735276222229, |
|
"rewards/margins": 0.8202959299087524, |
|
"rewards/rejected": -2.647648572921753, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 45.513438143142956, |
|
"learning_rate": 2.0030353425145374e-07, |
|
"logits/chosen": 7131.70166015625, |
|
"logits/rejected": 6376.83056640625, |
|
"logps/chosen": -501.9178161621094, |
|
"logps/rejected": -538.24658203125, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0238595008850098, |
|
"rewards/margins": 0.638025164604187, |
|
"rewards/rejected": -2.6618847846984863, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 18.61685092469, |
|
"learning_rate": 1.9583434014059635e-07, |
|
"logits/chosen": 5769.359375, |
|
"logits/rejected": 4956.7412109375, |
|
"logps/chosen": -418.234375, |
|
"logps/rejected": -483.03814697265625, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.870987892150879, |
|
"rewards/margins": 0.8069852590560913, |
|
"rewards/rejected": -2.677973508834839, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 19.392180606978926, |
|
"learning_rate": 1.9138323692012733e-07, |
|
"logits/chosen": 5019.05419921875, |
|
"logits/rejected": 4895.45458984375, |
|
"logps/chosen": -433.4505310058594, |
|
"logps/rejected": -480.860107421875, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0441999435424805, |
|
"rewards/margins": 0.6482217311859131, |
|
"rewards/rejected": -2.6924219131469727, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 50.383157244491294, |
|
"learning_rate": 1.8695171122236442e-07, |
|
"logits/chosen": 5166.943359375, |
|
"logits/rejected": 5133.3642578125, |
|
"logps/chosen": -406.5730285644531, |
|
"logps/rejected": -516.8052978515625, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9044840335845947, |
|
"rewards/margins": 0.8772269487380981, |
|
"rewards/rejected": -2.781710386276245, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 23.25471727050923, |
|
"learning_rate": 1.8254124314089223e-07, |
|
"logits/chosen": 5613.8095703125, |
|
"logits/rejected": 5036.1220703125, |
|
"logps/chosen": -431.58013916015625, |
|
"logps/rejected": -522.5189208984375, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9861242771148682, |
|
"rewards/margins": 1.0060144662857056, |
|
"rewards/rejected": -2.992138385772705, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 27.427712896477214, |
|
"learning_rate": 1.7815330573622205e-07, |
|
"logits/chosen": 5823.63671875, |
|
"logits/rejected": 5659.783203125, |
|
"logps/chosen": -410.86138916015625, |
|
"logps/rejected": -526.7249755859375, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8027637004852295, |
|
"rewards/margins": 0.8670876622200012, |
|
"rewards/rejected": -2.669851303100586, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 17.16161963024681, |
|
"learning_rate": 1.7378936454380274e-07, |
|
"logits/chosen": 5706.4755859375, |
|
"logits/rejected": 4772.328125, |
|
"logps/chosen": -412.3294982910156, |
|
"logps/rejected": -477.41192626953125, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9385788440704346, |
|
"rewards/margins": 0.7884070873260498, |
|
"rewards/rejected": -2.7269861698150635, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 26.23316113841427, |
|
"learning_rate": 1.694508770845427e-07, |
|
"logits/chosen": 6720.44677734375, |
|
"logits/rejected": 5618.7529296875, |
|
"logps/chosen": -475.612060546875, |
|
"logps/rejected": -506.27984619140625, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.977423071861267, |
|
"rewards/margins": 0.6886818408966064, |
|
"rewards/rejected": -2.666104793548584, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 21.8651357246224, |
|
"learning_rate": 1.651392923780105e-07, |
|
"logits/chosen": 6241.5029296875, |
|
"logits/rejected": 4998.0126953125, |
|
"logps/chosen": -414.9952697753906, |
|
"logps/rejected": -458.4529724121094, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8033950328826904, |
|
"rewards/margins": 0.8357815742492676, |
|
"rewards/rejected": -2.639176845550537, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 19.845703065114936, |
|
"learning_rate": 1.6085605045847367e-07, |
|
"logits/chosen": 5718.64404296875, |
|
"logits/rejected": 4613.75634765625, |
|
"logps/chosen": -417.8412170410156, |
|
"logps/rejected": -497.18701171875, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8895454406738281, |
|
"rewards/margins": 0.7920354604721069, |
|
"rewards/rejected": -2.6815807819366455, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 18.36104314119822, |
|
"learning_rate": 1.5660258189393944e-07, |
|
"logits/chosen": 5908.99951171875, |
|
"logits/rejected": 4583.3828125, |
|
"logps/chosen": -426.84161376953125, |
|
"logps/rejected": -481.43865966796875, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8158848285675049, |
|
"rewards/margins": 0.921142578125, |
|
"rewards/rejected": -2.737027406692505, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 27.47339811147932, |
|
"learning_rate": 1.5238030730835577e-07, |
|
"logits/chosen": 5228.90576171875, |
|
"logits/rejected": 5379.51708984375, |
|
"logps/chosen": -355.2702941894531, |
|
"logps/rejected": -476.2916564941406, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5809125900268555, |
|
"rewards/margins": 1.1285021305084229, |
|
"rewards/rejected": -2.7094149589538574, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 21.733099164416224, |
|
"learning_rate": 1.4819063690713564e-07, |
|
"logits/chosen": 5919.9453125, |
|
"logits/rejected": 4732.36865234375, |
|
"logps/chosen": -406.5284118652344, |
|
"logps/rejected": -480.59552001953125, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.690146803855896, |
|
"rewards/margins": 0.9789739847183228, |
|
"rewards/rejected": -2.669121026992798, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 27.01797105501278, |
|
"learning_rate": 1.4403497000615883e-07, |
|
"logits/chosen": 5621.28515625, |
|
"logits/rejected": 4914.8369140625, |
|
"logps/chosen": -453.36248779296875, |
|
"logps/rejected": -479.4039611816406, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8445937633514404, |
|
"rewards/margins": 0.883182168006897, |
|
"rewards/rejected": -2.727776050567627, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 29.582455929961025, |
|
"learning_rate": 1.3991469456441272e-07, |
|
"logits/chosen": 5492.75341796875, |
|
"logits/rejected": 5214.58740234375, |
|
"logps/chosen": -382.15350341796875, |
|
"logps/rejected": -472.4346618652344, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4912300109863281, |
|
"rewards/margins": 0.8933757543563843, |
|
"rewards/rejected": -2.384605646133423, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 16.98125254775057, |
|
"learning_rate": 1.358311867204244e-07, |
|
"logits/chosen": 4601.31982421875, |
|
"logits/rejected": 4569.09765625, |
|
"logps/chosen": -333.4889831542969, |
|
"logps/rejected": -421.4237365722656, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4790998697280884, |
|
"rewards/margins": 0.8222945928573608, |
|
"rewards/rejected": -2.30139422416687, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 18.693048899733224, |
|
"learning_rate": 1.3178581033264216e-07, |
|
"logits/chosen": 6154.45166015625, |
|
"logits/rejected": 5227.0224609375, |
|
"logps/chosen": -430.81890869140625, |
|
"logps/rejected": -505.7598571777344, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8447399139404297, |
|
"rewards/margins": 0.7838276624679565, |
|
"rewards/rejected": -2.628567695617676, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 23.20538962752919, |
|
"learning_rate": 1.2777991652391757e-07, |
|
"logits/chosen": 5333.5048828125, |
|
"logits/rejected": 3960.68212890625, |
|
"logps/chosen": -402.9344177246094, |
|
"logps/rejected": -442.1331481933594, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.700577735900879, |
|
"rewards/margins": 0.9019187688827515, |
|
"rewards/rejected": -2.60249662399292, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 19.43268415725634, |
|
"learning_rate": 1.2381484323024178e-07, |
|
"logits/chosen": 6016.5185546875, |
|
"logits/rejected": 5181.9228515625, |
|
"logps/chosen": -408.1551818847656, |
|
"logps/rejected": -457.7464904785156, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6999537944793701, |
|
"rewards/margins": 0.7662817239761353, |
|
"rewards/rejected": -2.466235637664795, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 22.3621515216726, |
|
"learning_rate": 1.1989191475388516e-07, |
|
"logits/chosen": 4984.4111328125, |
|
"logits/rejected": 4563.0322265625, |
|
"logps/chosen": -346.7846374511719, |
|
"logps/rejected": -447.44586181640625, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6289829015731812, |
|
"rewards/margins": 0.8670762181282043, |
|
"rewards/rejected": -2.496058940887451, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 16.827916345332202, |
|
"learning_rate": 1.1601244132109179e-07, |
|
"logits/chosen": 4982.31103515625, |
|
"logits/rejected": 4440.9169921875, |
|
"logps/chosen": -379.25128173828125, |
|
"logps/rejected": -465.8182067871094, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8500652313232422, |
|
"rewards/margins": 0.7695325016975403, |
|
"rewards/rejected": -2.619597911834717, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 19.10478789750096, |
|
"learning_rate": 1.1217771864447395e-07, |
|
"logits/chosen": 5696.0634765625, |
|
"logits/rejected": 4793.515625, |
|
"logps/chosen": -422.21905517578125, |
|
"logps/rejected": -524.974609375, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7729225158691406, |
|
"rewards/margins": 0.9866235852241516, |
|
"rewards/rejected": -2.7595460414886475, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 19.571481210859417, |
|
"learning_rate": 1.0838902749025499e-07, |
|
"logits/chosen": 6979.7353515625, |
|
"logits/rejected": 5534.80615234375, |
|
"logps/chosen": -437.5282287597656, |
|
"logps/rejected": -475.3587341308594, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6889175176620483, |
|
"rewards/margins": 0.7310249209403992, |
|
"rewards/rejected": -2.4199423789978027, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 23.479770735886802, |
|
"learning_rate": 1.0464763325050358e-07, |
|
"logits/chosen": 5203.9345703125, |
|
"logits/rejected": 4617.71630859375, |
|
"logps/chosen": -415.99737548828125, |
|
"logps/rejected": -473.8778381347656, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8423852920532227, |
|
"rewards/margins": 0.7913864850997925, |
|
"rewards/rejected": -2.6337718963623047, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 28.458417507814094, |
|
"learning_rate": 1.0095478552050346e-07, |
|
"logits/chosen": 6179.98046875, |
|
"logits/rejected": 4097.23828125, |
|
"logps/chosen": -432.69146728515625, |
|
"logps/rejected": -464.85992431640625, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6340433359146118, |
|
"rewards/margins": 0.9390060305595398, |
|
"rewards/rejected": -2.573049306869507, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 22.061291739222355, |
|
"learning_rate": 9.731171768139806e-08, |
|
"logits/chosen": 5738.4248046875, |
|
"logits/rejected": 4614.5322265625, |
|
"logps/chosen": -385.05133056640625, |
|
"logps/rejected": -455.3321838378906, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6064504384994507, |
|
"rewards/margins": 0.9582611322402954, |
|
"rewards/rejected": -2.564711570739746, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 25.413288039384696, |
|
"learning_rate": 9.37196464882522e-08, |
|
"logits/chosen": 5494.5439453125, |
|
"logits/rejected": 4928.0751953125, |
|
"logps/chosen": -385.5731201171875, |
|
"logps/rejected": -464.8663024902344, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7576345205307007, |
|
"rewards/margins": 0.8199461698532104, |
|
"rewards/rejected": -2.577580690383911, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 22.45781701506148, |
|
"learning_rate": 9.017977166366444e-08, |
|
"logits/chosen": 5656.9072265625, |
|
"logits/rejected": 4975.0439453125, |
|
"logps/chosen": -404.0146789550781, |
|
"logps/rejected": -485.17022705078125, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6433677673339844, |
|
"rewards/margins": 0.8800700306892395, |
|
"rewards/rejected": -2.523437976837158, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 13.945507178550827, |
|
"learning_rate": 8.669327549707095e-08, |
|
"logits/chosen": 5781.94189453125, |
|
"logits/rejected": 4841.93994140625, |
|
"logps/chosen": -427.2398376464844, |
|
"logps/rejected": -485.5018615722656, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.718334436416626, |
|
"rewards/margins": 0.9542592763900757, |
|
"rewards/rejected": -2.672593593597412, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 17.966049413367486, |
|
"learning_rate": 8.326132244986931e-08, |
|
"logits/chosen": 5145.71875, |
|
"logits/rejected": 4337.2958984375, |
|
"logps/chosen": -398.82135009765625, |
|
"logps/rejected": -474.75933837890625, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7171170711517334, |
|
"rewards/margins": 1.0231225490570068, |
|
"rewards/rejected": -2.7402396202087402, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 20.586662671394684, |
|
"learning_rate": 7.988505876649862e-08, |
|
"logits/chosen": 5346.1103515625, |
|
"logits/rejected": 4014.310546875, |
|
"logps/chosen": -407.9379577636719, |
|
"logps/rejected": -500.1922302246094, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.785790205001831, |
|
"rewards/margins": 1.0074379444122314, |
|
"rewards/rejected": -2.7932276725769043, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 17.218488686000693, |
|
"learning_rate": 7.656561209160248e-08, |
|
"logits/chosen": 5829.01416015625, |
|
"logits/rejected": 4944.89208984375, |
|
"logps/chosen": -427.6463928222656, |
|
"logps/rejected": -475.11236572265625, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6583614349365234, |
|
"rewards/margins": 0.945914626121521, |
|
"rewards/rejected": -2.604275941848755, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 27.7313611604028, |
|
"learning_rate": 7.330409109340562e-08, |
|
"logits/chosen": 5904.09912109375, |
|
"logits/rejected": 5181.5791015625, |
|
"logps/chosen": -440.94451904296875, |
|
"logps/rejected": -501.65545654296875, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.647769570350647, |
|
"rewards/margins": 0.962969183921814, |
|
"rewards/rejected": -2.610738754272461, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 28.768549723017788, |
|
"learning_rate": 7.010158509342681e-08, |
|
"logits/chosen": 6550.0625, |
|
"logits/rejected": 4658.27978515625, |
|
"logps/chosen": -417.83758544921875, |
|
"logps/rejected": -465.58209228515625, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.625791311264038, |
|
"rewards/margins": 1.0529232025146484, |
|
"rewards/rejected": -2.6787142753601074, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 26.074328942084968, |
|
"learning_rate": 6.695916370265527e-08, |
|
"logits/chosen": 5247.5302734375, |
|
"logits/rejected": 4586.5869140625, |
|
"logps/chosen": -395.1465148925781, |
|
"logps/rejected": -413.99884033203125, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7414640188217163, |
|
"rewards/margins": 0.6474174857139587, |
|
"rewards/rejected": -2.3888819217681885, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 21.80364567121782, |
|
"learning_rate": 6.387787646430853e-08, |
|
"logits/chosen": 6516.0478515625, |
|
"logits/rejected": 5851.53369140625, |
|
"logps/chosen": -426.70318603515625, |
|
"logps/rejected": -492.4895935058594, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.723693609237671, |
|
"rewards/margins": 0.7622456550598145, |
|
"rewards/rejected": -2.4859395027160645, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 23.451371826789497, |
|
"learning_rate": 6.0858752503294e-08, |
|
"logits/chosen": 5100.3837890625, |
|
"logits/rejected": 4843.9755859375, |
|
"logps/chosen": -410.7384338378906, |
|
"logps/rejected": -452.9171447753906, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6687591075897217, |
|
"rewards/margins": 0.6757498383522034, |
|
"rewards/rejected": -2.344508647918701, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 18.4137285906291, |
|
"learning_rate": 5.7902800182489385e-08, |
|
"logits/chosen": 5347.9619140625, |
|
"logits/rejected": 5055.91455078125, |
|
"logps/chosen": -371.74029541015625, |
|
"logps/rejected": -444.6211853027344, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6094753742218018, |
|
"rewards/margins": 0.9651139974594116, |
|
"rewards/rejected": -2.574589252471924, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 18.372297005488328, |
|
"learning_rate": 5.5011006765957604e-08, |
|
"logits/chosen": 6517.6826171875, |
|
"logits/rejected": 5801.03955078125, |
|
"logps/chosen": -430.2518615722656, |
|
"logps/rejected": -544.8726806640625, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.744037389755249, |
|
"rewards/margins": 0.9321613311767578, |
|
"rewards/rejected": -2.676198720932007, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 24.974440327502748, |
|
"learning_rate": 5.218433808920883e-08, |
|
"logits/chosen": 5668.3994140625, |
|
"logits/rejected": 5112.5869140625, |
|
"logps/chosen": -416.13336181640625, |
|
"logps/rejected": -498.39453125, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7290430068969727, |
|
"rewards/margins": 0.8825391530990601, |
|
"rewards/rejected": -2.611582040786743, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 24.66708515929543, |
|
"learning_rate": 4.942373823661927e-08, |
|
"logits/chosen": 6769.8955078125, |
|
"logits/rejected": 5016.2587890625, |
|
"logps/chosen": -447.3492736816406, |
|
"logps/rejected": -503.0823669433594, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7596700191497803, |
|
"rewards/margins": 1.0560283660888672, |
|
"rewards/rejected": -2.8156983852386475, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 15.540461473239736, |
|
"learning_rate": 4.6730129226114354e-08, |
|
"logits/chosen": 5088.92236328125, |
|
"logits/rejected": 4692.33349609375, |
|
"logps/chosen": -409.94024658203125, |
|
"logps/rejected": -442.9159240722656, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9526259899139404, |
|
"rewards/margins": 0.727096676826477, |
|
"rewards/rejected": -2.679722547531128, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 22.30927140417861, |
|
"learning_rate": 4.41044107012227e-08, |
|
"logits/chosen": 6509.494140625, |
|
"logits/rejected": 5121.66162109375, |
|
"logps/chosen": -454.4883728027344, |
|
"logps/rejected": -491.09814453125, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6590086221694946, |
|
"rewards/margins": 0.8761310577392578, |
|
"rewards/rejected": -2.535139560699463, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 47.249244932789814, |
|
"learning_rate": 4.1547459630601966e-08, |
|
"logits/chosen": 5681.8876953125, |
|
"logits/rejected": 5076.9794921875, |
|
"logps/chosen": -435.9734802246094, |
|
"logps/rejected": -483.70458984375, |
|
"loss": 0.6239, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8887542486190796, |
|
"rewards/margins": 0.6841882467269897, |
|
"rewards/rejected": -2.5729424953460693, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 19.509237361503633, |
|
"learning_rate": 3.9060130015138857e-08, |
|
"logits/chosen": 5260.7138671875, |
|
"logits/rejected": 4629.92578125, |
|
"logps/chosen": -414.8975524902344, |
|
"logps/rejected": -494.1025390625, |
|
"loss": 0.6117, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.839999794960022, |
|
"rewards/margins": 1.0193700790405273, |
|
"rewards/rejected": -2.8593695163726807, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 15.758769361501436, |
|
"learning_rate": 3.664325260271953e-08, |
|
"logits/chosen": 6010.47119140625, |
|
"logits/rejected": 5069.5751953125, |
|
"logps/chosen": -467.64404296875, |
|
"logps/rejected": -507.5274963378906, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0030617713928223, |
|
"rewards/margins": 0.7443469166755676, |
|
"rewards/rejected": -2.747408390045166, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 21.123986793744674, |
|
"learning_rate": 3.429763461076676e-08, |
|
"logits/chosen": 5870.20068359375, |
|
"logits/rejected": 5074.16357421875, |
|
"logps/chosen": -405.6874084472656, |
|
"logps/rejected": -476.35211181640625, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7369863986968994, |
|
"rewards/margins": 0.9186028242111206, |
|
"rewards/rejected": -2.6555895805358887, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 19.05302083047077, |
|
"learning_rate": 3.202405945663555e-08, |
|
"logits/chosen": 5784.2412109375, |
|
"logits/rejected": 3889.80126953125, |
|
"logps/chosen": -427.1604919433594, |
|
"logps/rejected": -439.701904296875, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9810470342636108, |
|
"rewards/margins": 0.740452766418457, |
|
"rewards/rejected": -2.7214999198913574, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 29.86452301634578, |
|
"learning_rate": 2.9823286495958556e-08, |
|
"logits/chosen": 4778.2958984375, |
|
"logits/rejected": 5450.62451171875, |
|
"logps/chosen": -398.36407470703125, |
|
"logps/rejected": -521.3021240234375, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9749752283096313, |
|
"rewards/margins": 0.7352627515792847, |
|
"rewards/rejected": -2.710237979888916, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 18.974661489747966, |
|
"learning_rate": 2.769605076902695e-08, |
|
"logits/chosen": 6121.0751953125, |
|
"logits/rejected": 5588.75439453125, |
|
"logps/chosen": -424.2884826660156, |
|
"logps/rejected": -515.7366943359375, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8259862661361694, |
|
"rewards/margins": 0.7989758253097534, |
|
"rewards/rejected": -2.624962329864502, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 20.830223854892928, |
|
"learning_rate": 2.5643062755293403e-08, |
|
"logits/chosen": 5408.017578125, |
|
"logits/rejected": 4577.1982421875, |
|
"logps/chosen": -427.53997802734375, |
|
"logps/rejected": -462.0577087402344, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8826709985733032, |
|
"rewards/margins": 0.7450687885284424, |
|
"rewards/rejected": -2.627739906311035, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 30.839808557441238, |
|
"learning_rate": 2.366500813607733e-08, |
|
"logits/chosen": 6019.47412109375, |
|
"logits/rejected": 4637.82763671875, |
|
"logps/chosen": -409.47406005859375, |
|
"logps/rejected": -507.8202209472656, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7381088733673096, |
|
"rewards/margins": 1.1539865732192993, |
|
"rewards/rejected": -2.8920950889587402, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 22.32621549985474, |
|
"learning_rate": 2.176254756555329e-08, |
|
"logits/chosen": 6369.30859375, |
|
"logits/rejected": 5620.3662109375, |
|
"logps/chosen": -467.0570373535156, |
|
"logps/rejected": -547.2705078125, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8894094228744507, |
|
"rewards/margins": 1.0848562717437744, |
|
"rewards/rejected": -2.9742655754089355, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 20.301098233070547, |
|
"learning_rate": 1.9936316450097468e-08, |
|
"logits/chosen": 5071.96142578125, |
|
"logits/rejected": 4552.37353515625, |
|
"logps/chosen": -400.34100341796875, |
|
"logps/rejected": -446.0146484375, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8436905145645142, |
|
"rewards/margins": 0.716572105884552, |
|
"rewards/rejected": -2.560262680053711, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 23.493546384450056, |
|
"learning_rate": 1.8186924736067477e-08, |
|
"logits/chosen": 5736.19921875, |
|
"logits/rejected": 4311.3408203125, |
|
"logps/chosen": -420.8236389160156, |
|
"logps/rejected": -512.0423583984375, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7245066165924072, |
|
"rewards/margins": 1.156449317932129, |
|
"rewards/rejected": -2.880955219268799, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 18.623486803085754, |
|
"learning_rate": 1.651495670608488e-08, |
|
"logits/chosen": 6630.7412109375, |
|
"logits/rejected": 5112.56396484375, |
|
"logps/chosen": -430.5503845214844, |
|
"logps/rejected": -508.31304931640625, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7643120288848877, |
|
"rewards/margins": 1.1240522861480713, |
|
"rewards/rejected": -2.888363838195801, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 21.977526068073495, |
|
"learning_rate": 1.4920970783889737e-08, |
|
"logits/chosen": 6202.2060546875, |
|
"logits/rejected": 4598.1708984375, |
|
"logps/chosen": -452.6166076660156, |
|
"logps/rejected": -524.5369262695312, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9582983255386353, |
|
"rewards/margins": 0.9155516624450684, |
|
"rewards/rejected": -2.873849868774414, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 12.755570308165497, |
|
"learning_rate": 1.340549934783164e-08, |
|
"logits/chosen": 5910.86328125, |
|
"logits/rejected": 5579.3876953125, |
|
"logps/chosen": -443.11163330078125, |
|
"logps/rejected": -530.6002197265625, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8624699115753174, |
|
"rewards/margins": 0.8643971681594849, |
|
"rewards/rejected": -2.726867198944092, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 23.464328832306045, |
|
"learning_rate": 1.1969048553059608e-08, |
|
"logits/chosen": 5595.259765625, |
|
"logits/rejected": 4795.32080078125, |
|
"logps/chosen": -382.4716796875, |
|
"logps/rejected": -451.7056579589844, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7278823852539062, |
|
"rewards/margins": 0.8011847734451294, |
|
"rewards/rejected": -2.529067277908325, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 22.662637254674035, |
|
"learning_rate": 1.06120981624703e-08, |
|
"logits/chosen": 5303.560546875, |
|
"logits/rejected": 5642.16650390625, |
|
"logps/chosen": -418.61309814453125, |
|
"logps/rejected": -528.3426513671875, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.819700837135315, |
|
"rewards/margins": 0.8951080441474915, |
|
"rewards/rejected": -2.714808702468872, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 23.37220649579407, |
|
"learning_rate": 9.335101386471284e-09, |
|
"logits/chosen": 6105.37158203125, |
|
"logits/rejected": 5412.89892578125, |
|
"logps/chosen": -447.61993408203125, |
|
"logps/rejected": -507.3324279785156, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9051244258880615, |
|
"rewards/margins": 0.883420467376709, |
|
"rewards/rejected": -2.7885448932647705, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 31.57553761420153, |
|
"learning_rate": 8.138484731612273e-09, |
|
"logits/chosen": 5806.66064453125, |
|
"logits/rejected": 4830.857421875, |
|
"logps/chosen": -429.99420166015625, |
|
"logps/rejected": -527.69140625, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8986709117889404, |
|
"rewards/margins": 0.999901294708252, |
|
"rewards/rejected": -2.8985724449157715, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 24.672880887648823, |
|
"learning_rate": 7.0226478581355e-09, |
|
"logits/chosen": 5885.85205078125, |
|
"logits/rejected": 5139.58203125, |
|
"logps/chosen": -445.98675537109375, |
|
"logps/rejected": -503.46337890625, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0940308570861816, |
|
"rewards/margins": 0.7923761606216431, |
|
"rewards/rejected": -2.886406660079956, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 18.080254178645642, |
|
"learning_rate": 5.987963446492383e-09, |
|
"logits/chosen": 5920.791015625, |
|
"logits/rejected": 5237.79833984375, |
|
"logps/chosen": -406.27386474609375, |
|
"logps/rejected": -479.7198181152344, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7285455465316772, |
|
"rewards/margins": 0.9587591886520386, |
|
"rewards/rejected": -2.687304735183716, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 35.07844691929086, |
|
"learning_rate": 5.0347770728713935e-09, |
|
"logits/chosen": 5880.59228515625, |
|
"logits/rejected": 4549.359375, |
|
"logps/chosen": -462.1459045410156, |
|
"logps/rejected": -468.9349670410156, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7892353534698486, |
|
"rewards/margins": 0.8386019468307495, |
|
"rewards/rejected": -2.6278374195098877, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 19.945059521235283, |
|
"learning_rate": 4.1634070937782424e-09, |
|
"logits/chosen": 5899.3720703125, |
|
"logits/rejected": 5313.3671875, |
|
"logps/chosen": -451.93212890625, |
|
"logps/rejected": -543.2415771484375, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9903990030288696, |
|
"rewards/margins": 0.9100092649459839, |
|
"rewards/rejected": -2.9004082679748535, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 24.094584349575342, |
|
"learning_rate": 3.3741445397075797e-09, |
|
"logits/chosen": 6125.74267578125, |
|
"logits/rejected": 5158.01171875, |
|
"logps/chosen": -463.64044189453125, |
|
"logps/rejected": -555.1447143554688, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9464343786239624, |
|
"rewards/margins": 1.044654130935669, |
|
"rewards/rejected": -2.9910888671875, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 25.276279664246026, |
|
"learning_rate": 2.667253017941018e-09, |
|
"logits/chosen": 6131.8310546875, |
|
"logits/rejected": 4804.04150390625, |
|
"logps/chosen": -452.3642578125, |
|
"logps/rejected": -507.6914978027344, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.9133832454681396, |
|
"rewards/margins": 0.8848444223403931, |
|
"rewards/rejected": -2.798227548599243, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 22.802704931718225, |
|
"learning_rate": 2.0429686245045097e-09, |
|
"logits/chosen": 5988.15625, |
|
"logits/rejected": 4626.0927734375, |
|
"logps/chosen": -486.51708984375, |
|
"logps/rejected": -504.944091796875, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9659137725830078, |
|
"rewards/margins": 0.8604008555412292, |
|
"rewards/rejected": -2.826314687728882, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 26.969071687122177, |
|
"learning_rate": 1.5014998653141708e-09, |
|
"logits/chosen": 5640.72021484375, |
|
"logits/rejected": 4785.45068359375, |
|
"logps/chosen": -440.749267578125, |
|
"logps/rejected": -500.2676696777344, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8496116399765015, |
|
"rewards/margins": 1.0721490383148193, |
|
"rewards/rejected": -2.9217605590820312, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 22.885075554568353, |
|
"learning_rate": 1.0430275865371263e-09, |
|
"logits/chosen": 5859.7861328125, |
|
"logits/rejected": 4826.97119140625, |
|
"logps/chosen": -409.632568359375, |
|
"logps/rejected": -510.0669860839844, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9688892364501953, |
|
"rewards/margins": 1.0203845500946045, |
|
"rewards/rejected": -2.9892735481262207, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 25.424962808525937, |
|
"learning_rate": 6.677049141901314e-10, |
|
"logits/chosen": 4790.49072265625, |
|
"logits/rejected": 4639.8623046875, |
|
"logps/chosen": -394.59674072265625, |
|
"logps/rejected": -495.4620666503906, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8832927942276, |
|
"rewards/margins": 0.9284135103225708, |
|
"rewards/rejected": -2.811706066131592, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 16.314513060865362, |
|
"learning_rate": 3.7565720299687077e-10, |
|
"logits/chosen": 6143.9091796875, |
|
"logits/rejected": 5207.35400390625, |
|
"logps/chosen": -465.2191467285156, |
|
"logps/rejected": -504.1424865722656, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.917109727859497, |
|
"rewards/margins": 0.8995591998100281, |
|
"rewards/rejected": -2.81666898727417, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 26.393655113815115, |
|
"learning_rate": 1.6698199452053197e-10, |
|
"logits/chosen": 4443.6845703125, |
|
"logits/rejected": 4451.62548828125, |
|
"logps/chosen": -400.55633544921875, |
|
"logps/rejected": -473.33331298828125, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8365901708602905, |
|
"rewards/margins": 0.7946940064430237, |
|
"rewards/rejected": -2.631284236907959, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 28.937103875297968, |
|
"learning_rate": 4.174898458556009e-11, |
|
"logits/chosen": 6005.9638671875, |
|
"logits/rejected": 4214.5224609375, |
|
"logps/chosen": -429.625, |
|
"logps/rejected": -486.3451232910156, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9642302989959717, |
|
"rewards/margins": 0.9053429365158081, |
|
"rewards/rejected": -2.8695731163024902, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 57.71415226213478, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 6091.05859375, |
|
"logits/rejected": 4940.8408203125, |
|
"logps/chosen": -462.4815979003906, |
|
"logps/rejected": -539.9644165039062, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0189812183380127, |
|
"rewards/margins": 0.8955272436141968, |
|
"rewards/rejected": -2.91450834274292, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6271847719921492, |
|
"train_runtime": 17433.9091, |
|
"train_samples_per_second": 3.507, |
|
"train_steps_per_second": 0.11 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|