|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997056226081837, |
|
"eval_steps": 500, |
|
"global_step": 1698, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005887547836326171, |
|
"grad_norm": 5408.07341365347, |
|
"learning_rate": 2.941176470588235e-09, |
|
"logits/chosen": 6646.15966796875, |
|
"logits/rejected": 3119.63818359375, |
|
"logps/chosen": -368.2507019042969, |
|
"logps/rejected": -168.0050048828125, |
|
"loss": 515.6205, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0058875478363261706, |
|
"grad_norm": 4902.986980984481, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": 4868.25439453125, |
|
"logits/rejected": 4348.16943359375, |
|
"logps/chosen": -285.6579895019531, |
|
"logps/rejected": -243.44537353515625, |
|
"loss": 530.529, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.015141813084483147, |
|
"rewards/margins": -0.0011655373964458704, |
|
"rewards/rejected": 0.016307353973388672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011775095672652341, |
|
"grad_norm": 4763.6399184264565, |
|
"learning_rate": 5.88235294117647e-08, |
|
"logits/chosen": 5758.640625, |
|
"logits/rejected": 5330.70556640625, |
|
"logps/chosen": -287.11346435546875, |
|
"logps/rejected": -279.5744323730469, |
|
"loss": 576.8336, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 0.2108406275510788, |
|
"rewards/margins": 0.08644243329763412, |
|
"rewards/rejected": 0.12439820915460587, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01766264350897851, |
|
"grad_norm": 4716.064892413704, |
|
"learning_rate": 8.823529411764706e-08, |
|
"logits/chosen": 6295.79150390625, |
|
"logits/rejected": 5043.77001953125, |
|
"logps/chosen": -334.138916015625, |
|
"logps/rejected": -277.32574462890625, |
|
"loss": 562.3636, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.4467318058013916, |
|
"rewards/margins": 0.04832734167575836, |
|
"rewards/rejected": 1.3984044790267944, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023550191345304682, |
|
"grad_norm": 4755.394333837509, |
|
"learning_rate": 1.176470588235294e-07, |
|
"logits/chosen": 5310.7216796875, |
|
"logits/rejected": 4463.6806640625, |
|
"logps/chosen": -257.0677185058594, |
|
"logps/rejected": -237.11923217773438, |
|
"loss": 526.118, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 4.58669376373291, |
|
"rewards/margins": 0.1042458787560463, |
|
"rewards/rejected": 4.482447624206543, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02943773918163085, |
|
"grad_norm": 4087.670323308074, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": 5372.1640625, |
|
"logits/rejected": 4366.4892578125, |
|
"logps/chosen": -257.9158935546875, |
|
"logps/rejected": -243.09597778320312, |
|
"loss": 525.4981, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 9.666664123535156, |
|
"rewards/margins": 0.8064844012260437, |
|
"rewards/rejected": 8.860177993774414, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03532528701795702, |
|
"grad_norm": 3523.9425676434603, |
|
"learning_rate": 1.764705882352941e-07, |
|
"logits/chosen": 5514.87255859375, |
|
"logits/rejected": 4266.23291015625, |
|
"logps/chosen": -282.6498107910156, |
|
"logps/rejected": -233.466552734375, |
|
"loss": 506.6934, |
|
"rewards/accuracies": 0.5166667103767395, |
|
"rewards/chosen": 14.391616821289062, |
|
"rewards/margins": 0.46752357482910156, |
|
"rewards/rejected": 13.924093246459961, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04121283485428319, |
|
"grad_norm": 2850.4471378562707, |
|
"learning_rate": 2.0588235294117645e-07, |
|
"logits/chosen": 5830.0673828125, |
|
"logits/rejected": 5022.267578125, |
|
"logps/chosen": -273.00885009765625, |
|
"logps/rejected": -273.3937072753906, |
|
"loss": 522.7681, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 19.875988006591797, |
|
"rewards/margins": 2.0218400955200195, |
|
"rewards/rejected": 17.85414695739746, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.047100382690609364, |
|
"grad_norm": 3096.4559467407053, |
|
"learning_rate": 2.352941176470588e-07, |
|
"logits/chosen": 5268.9208984375, |
|
"logits/rejected": 4395.24169921875, |
|
"logps/chosen": -250.491455078125, |
|
"logps/rejected": -238.2079620361328, |
|
"loss": 512.6993, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": 22.815217971801758, |
|
"rewards/margins": 1.410942554473877, |
|
"rewards/rejected": 21.404273986816406, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05298793052693553, |
|
"grad_norm": 2723.779795541685, |
|
"learning_rate": 2.6470588235294114e-07, |
|
"logits/chosen": 4833.4150390625, |
|
"logits/rejected": 4357.6396484375, |
|
"logps/chosen": -252.0074005126953, |
|
"logps/rejected": -247.14529418945312, |
|
"loss": 462.3297, |
|
"rewards/accuracies": 0.533333420753479, |
|
"rewards/chosen": 25.25003433227539, |
|
"rewards/margins": 0.849066436290741, |
|
"rewards/rejected": 24.400968551635742, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0588754783632617, |
|
"grad_norm": 2708.243095974589, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": 5773.37646484375, |
|
"logits/rejected": 5027.79248046875, |
|
"logps/chosen": -262.8667907714844, |
|
"logps/rejected": -230.8015594482422, |
|
"loss": 479.7382, |
|
"rewards/accuracies": 0.491666704416275, |
|
"rewards/chosen": 27.713184356689453, |
|
"rewards/margins": 0.3247580826282501, |
|
"rewards/rejected": 27.388423919677734, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06476302619958788, |
|
"grad_norm": 2732.3097350474873, |
|
"learning_rate": 3.2352941176470586e-07, |
|
"logits/chosen": 5362.26171875, |
|
"logits/rejected": 4178.3798828125, |
|
"logps/chosen": -232.09896850585938, |
|
"logps/rejected": -222.7689208984375, |
|
"loss": 461.3743, |
|
"rewards/accuracies": 0.5166667103767395, |
|
"rewards/chosen": 28.291431427001953, |
|
"rewards/margins": -0.7825387120246887, |
|
"rewards/rejected": 29.073970794677734, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07065057403591404, |
|
"grad_norm": 3009.873907616259, |
|
"learning_rate": 3.529411764705882e-07, |
|
"logits/chosen": 5400.1884765625, |
|
"logits/rejected": 5186.6875, |
|
"logps/chosen": -260.42864990234375, |
|
"logps/rejected": -224.1237030029297, |
|
"loss": 507.8262, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 29.294321060180664, |
|
"rewards/margins": -0.6264778971672058, |
|
"rewards/rejected": 29.920801162719727, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07653812187224021, |
|
"grad_norm": 3086.941224435285, |
|
"learning_rate": 3.8235294117647053e-07, |
|
"logits/chosen": 5547.7021484375, |
|
"logits/rejected": 4927.2275390625, |
|
"logps/chosen": -256.7834167480469, |
|
"logps/rejected": -220.9555206298828, |
|
"loss": 503.0002, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 33.36304473876953, |
|
"rewards/margins": -3.1545910835266113, |
|
"rewards/rejected": 36.51763153076172, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08242566970856638, |
|
"grad_norm": 2711.0260281027327, |
|
"learning_rate": 4.117647058823529e-07, |
|
"logits/chosen": 5286.3466796875, |
|
"logits/rejected": 4290.73681640625, |
|
"logps/chosen": -239.8748779296875, |
|
"logps/rejected": -204.95193481445312, |
|
"loss": 494.5457, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 34.8304443359375, |
|
"rewards/margins": 0.9059675335884094, |
|
"rewards/rejected": 33.92447280883789, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08831321754489255, |
|
"grad_norm": 3265.015457879219, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": 5884.11962890625, |
|
"logits/rejected": 5200.5625, |
|
"logps/chosen": -247.31649780273438, |
|
"logps/rejected": -240.7677001953125, |
|
"loss": 469.0736, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 33.2108154296875, |
|
"rewards/margins": -0.9206498861312866, |
|
"rewards/rejected": 34.1314697265625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09420076538121873, |
|
"grad_norm": 2961.354597919322, |
|
"learning_rate": 4.705882352941176e-07, |
|
"logits/chosen": 5862.7861328125, |
|
"logits/rejected": 4168.9775390625, |
|
"logps/chosen": -273.9336853027344, |
|
"logps/rejected": -230.8885498046875, |
|
"loss": 503.9816, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 36.73265075683594, |
|
"rewards/margins": -0.693622887134552, |
|
"rewards/rejected": 37.42627716064453, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1000883132175449, |
|
"grad_norm": 8765.049216620555, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 5769.8330078125, |
|
"logits/rejected": 4647.8681640625, |
|
"logps/chosen": -261.44683837890625, |
|
"logps/rejected": -218.4137725830078, |
|
"loss": 464.5215, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 34.85304641723633, |
|
"rewards/margins": 1.9801647663116455, |
|
"rewards/rejected": 32.87287521362305, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10597586105387106, |
|
"grad_norm": 3053.638703416719, |
|
"learning_rate": 4.999471618320338e-07, |
|
"logits/chosen": 5154.2353515625, |
|
"logits/rejected": 5318.90771484375, |
|
"logps/chosen": -236.9143829345703, |
|
"logps/rejected": -244.16336059570312, |
|
"loss": 477.8162, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 36.37485885620117, |
|
"rewards/margins": 2.0390517711639404, |
|
"rewards/rejected": 34.33580780029297, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11186340889019723, |
|
"grad_norm": 2672.8458181578076, |
|
"learning_rate": 4.997886696631114e-07, |
|
"logits/chosen": 5255.52392578125, |
|
"logits/rejected": 5058.0771484375, |
|
"logps/chosen": -255.951171875, |
|
"logps/rejected": -250.9419403076172, |
|
"loss": 495.5926, |
|
"rewards/accuracies": 0.5583333969116211, |
|
"rewards/chosen": 37.5173454284668, |
|
"rewards/margins": 3.499251127243042, |
|
"rewards/rejected": 34.018096923828125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1177509567265234, |
|
"grad_norm": 3420.636866295782, |
|
"learning_rate": 4.995245904887195e-07, |
|
"logits/chosen": 5120.73779296875, |
|
"logits/rejected": 4294.0244140625, |
|
"logps/chosen": -234.7885284423828, |
|
"logps/rejected": -197.4206085205078, |
|
"loss": 437.2042, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 35.10551071166992, |
|
"rewards/margins": -1.353625774383545, |
|
"rewards/rejected": 36.459136962890625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12363850456284957, |
|
"grad_norm": 5077.60876689098, |
|
"learning_rate": 4.991550359365359e-07, |
|
"logits/chosen": 5475.88623046875, |
|
"logits/rejected": 4547.7431640625, |
|
"logps/chosen": -241.307373046875, |
|
"logps/rejected": -231.92361450195312, |
|
"loss": 467.975, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 40.85314178466797, |
|
"rewards/margins": 5.875087261199951, |
|
"rewards/rejected": 34.978065490722656, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12952605239917575, |
|
"grad_norm": 3020.4929051704753, |
|
"learning_rate": 4.986801622192453e-07, |
|
"logits/chosen": 5030.41748046875, |
|
"logits/rejected": 4329.310546875, |
|
"logps/chosen": -230.5275421142578, |
|
"logps/rejected": -212.1551513671875, |
|
"loss": 443.3945, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 36.29066848754883, |
|
"rewards/margins": -0.13148090243339539, |
|
"rewards/rejected": 36.422149658203125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13541360023550192, |
|
"grad_norm": 2899.0447768694994, |
|
"learning_rate": 4.98100170068505e-07, |
|
"logits/chosen": 5328.3818359375, |
|
"logits/rejected": 4376.3935546875, |
|
"logps/chosen": -238.4833984375, |
|
"logps/rejected": -229.43783569335938, |
|
"loss": 473.2981, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 37.569854736328125, |
|
"rewards/margins": 4.89124059677124, |
|
"rewards/rejected": 32.678611755371094, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1413011480718281, |
|
"grad_norm": 2663.071444573461, |
|
"learning_rate": 4.974153046500967e-07, |
|
"logits/chosen": 4790.46044921875, |
|
"logits/rejected": 3993.68017578125, |
|
"logps/chosen": -219.05197143554688, |
|
"logps/rejected": -215.5940399169922, |
|
"loss": 459.7012, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": 38.187828063964844, |
|
"rewards/margins": -2.219409227371216, |
|
"rewards/rejected": 40.4072380065918, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14718869590815425, |
|
"grad_norm": 2983.4293165585104, |
|
"learning_rate": 4.966258554602924e-07, |
|
"logits/chosen": 5798.11181640625, |
|
"logits/rejected": 4796.2119140625, |
|
"logps/chosen": -287.9047546386719, |
|
"logps/rejected": -235.2962188720703, |
|
"loss": 463.4111, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 48.266197204589844, |
|
"rewards/margins": 11.696329116821289, |
|
"rewards/rejected": 36.56986618041992, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15307624374448042, |
|
"grad_norm": 3316.6681927395334, |
|
"learning_rate": 4.957321562034833e-07, |
|
"logits/chosen": 5329.43408203125, |
|
"logits/rejected": 4054.041015625, |
|
"logps/chosen": -220.0008087158203, |
|
"logps/rejected": -181.2195587158203, |
|
"loss": 469.6311, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 38.309364318847656, |
|
"rewards/margins": 4.661072254180908, |
|
"rewards/rejected": 33.648292541503906, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1589637915808066, |
|
"grad_norm": 3010.7792525814666, |
|
"learning_rate": 4.94734584651121e-07, |
|
"logits/chosen": 5138.2724609375, |
|
"logits/rejected": 4814.1396484375, |
|
"logps/chosen": -246.031005859375, |
|
"logps/rejected": -220.73782348632812, |
|
"loss": 495.8528, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 46.25344467163086, |
|
"rewards/margins": -3.6372578144073486, |
|
"rewards/rejected": 49.89070510864258, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16485133941713276, |
|
"grad_norm": 2682.596823641572, |
|
"learning_rate": 4.936335624820313e-07, |
|
"logits/chosen": 5137.6005859375, |
|
"logits/rejected": 4301.2041015625, |
|
"logps/chosen": -215.6089630126953, |
|
"logps/rejected": -189.99356079101562, |
|
"loss": 427.9188, |
|
"rewards/accuracies": 0.5250000357627869, |
|
"rewards/chosen": 34.9045295715332, |
|
"rewards/margins": -3.837693452835083, |
|
"rewards/rejected": 38.742218017578125, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17073888725345893, |
|
"grad_norm": 2706.0873254037065, |
|
"learning_rate": 4.924295551041687e-07, |
|
"logits/chosen": 5884.3798828125, |
|
"logits/rejected": 4964.26611328125, |
|
"logps/chosen": -255.06588745117188, |
|
"logps/rejected": -220.08206176757812, |
|
"loss": 452.4157, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": 42.134403228759766, |
|
"rewards/margins": 0.6829560995101929, |
|
"rewards/rejected": 41.45145034790039, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1766264350897851, |
|
"grad_norm": 2703.151210592447, |
|
"learning_rate": 4.911230714578858e-07, |
|
"logits/chosen": 5192.1044921875, |
|
"logits/rejected": 4181.39599609375, |
|
"logps/chosen": -228.7882080078125, |
|
"logps/rejected": -204.47088623046875, |
|
"loss": 458.6937, |
|
"rewards/accuracies": 0.6166666150093079, |
|
"rewards/chosen": 39.62172317504883, |
|
"rewards/margins": 2.2139618396759033, |
|
"rewards/rejected": 37.40776062011719, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18251398292611126, |
|
"grad_norm": 2902.6849576623777, |
|
"learning_rate": 4.897146638008011e-07, |
|
"logits/chosen": 4957.9501953125, |
|
"logits/rejected": 4260.34423828125, |
|
"logps/chosen": -221.3501434326172, |
|
"logps/rejected": -205.3225860595703, |
|
"loss": 466.1617, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 42.29875564575195, |
|
"rewards/margins": 6.2964630126953125, |
|
"rewards/rejected": 36.00229263305664, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18840153076243746, |
|
"grad_norm": 2631.6984767702515, |
|
"learning_rate": 4.882049274743577e-07, |
|
"logits/chosen": 5455.85986328125, |
|
"logits/rejected": 4963.1669921875, |
|
"logps/chosen": -277.1952819824219, |
|
"logps/rejected": -246.1964874267578, |
|
"loss": 467.9413, |
|
"rewards/accuracies": 0.5583333969116211, |
|
"rewards/chosen": 41.07880783081055, |
|
"rewards/margins": 2.369145154953003, |
|
"rewards/rejected": 38.70966339111328, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19428907859876363, |
|
"grad_norm": 2594.7782737752327, |
|
"learning_rate": 4.865945006521683e-07, |
|
"logits/chosen": 4765.1201171875, |
|
"logits/rejected": 4544.0693359375, |
|
"logps/chosen": -196.0832977294922, |
|
"logps/rejected": -211.2700958251953, |
|
"loss": 422.8422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 37.63249588012695, |
|
"rewards/margins": -2.908146381378174, |
|
"rewards/rejected": 40.5406379699707, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2001766264350898, |
|
"grad_norm": 2317.1385525955225, |
|
"learning_rate": 4.848840640702564e-07, |
|
"logits/chosen": 4887.91748046875, |
|
"logits/rejected": 4849.06982421875, |
|
"logps/chosen": -214.12826538085938, |
|
"logps/rejected": -232.1658172607422, |
|
"loss": 473.8225, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": 41.86516571044922, |
|
"rewards/margins": 2.8288767337799072, |
|
"rewards/rejected": 39.03628158569336, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.20606417427141596, |
|
"grad_norm": 2802.6550557671103, |
|
"learning_rate": 4.83074340739305e-07, |
|
"logits/chosen": 5358.7685546875, |
|
"logits/rejected": 4593.4169921875, |
|
"logps/chosen": -254.9344940185547, |
|
"logps/rejected": -211.921875, |
|
"loss": 473.8788, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 44.27037811279297, |
|
"rewards/margins": 5.671798229217529, |
|
"rewards/rejected": 38.59857940673828, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21195172210774213, |
|
"grad_norm": 2184.313945380869, |
|
"learning_rate": 4.811660956390372e-07, |
|
"logits/chosen": 4931.3828125, |
|
"logits/rejected": 4783.0810546875, |
|
"logps/chosen": -215.4976348876953, |
|
"logps/rejected": -207.6850128173828, |
|
"loss": 439.7563, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 38.79738235473633, |
|
"rewards/margins": -7.209702491760254, |
|
"rewards/rejected": 46.007080078125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2178392699440683, |
|
"grad_norm": 2653.3204513683368, |
|
"learning_rate": 4.791601353948536e-07, |
|
"logits/chosen": 6008.3359375, |
|
"logits/rejected": 5451.3759765625, |
|
"logps/chosen": -249.03750610351562, |
|
"logps/rejected": -227.90884399414062, |
|
"loss": 427.3288, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 40.34531021118164, |
|
"rewards/margins": 4.3100738525390625, |
|
"rewards/rejected": 36.03523635864258, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22372681778039447, |
|
"grad_norm": 3077.404166225713, |
|
"learning_rate": 4.77057307936869e-07, |
|
"logits/chosen": 6118.6044921875, |
|
"logits/rejected": 5277.0595703125, |
|
"logps/chosen": -251.12112426757812, |
|
"logps/rejected": -247.95870971679688, |
|
"loss": 502.4088, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 44.55669403076172, |
|
"rewards/margins": 4.328840255737305, |
|
"rewards/rejected": 40.22785186767578, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22961436561672063, |
|
"grad_norm": 2533.1551132660197, |
|
"learning_rate": 4.748585021414868e-07, |
|
"logits/chosen": 5564.0263671875, |
|
"logits/rejected": 4396.427734375, |
|
"logps/chosen": -245.1484375, |
|
"logps/rejected": -219.17623901367188, |
|
"loss": 478.5578, |
|
"rewards/accuracies": 0.5583333969116211, |
|
"rewards/chosen": 38.58305358886719, |
|
"rewards/margins": 3.148397445678711, |
|
"rewards/rejected": 35.434654235839844, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2355019134530468, |
|
"grad_norm": 2512.4648549451554, |
|
"learning_rate": 4.7256464745566647e-07, |
|
"logits/chosen": 4976.19189453125, |
|
"logits/rejected": 4289.302734375, |
|
"logps/chosen": -223.78713989257812, |
|
"logps/rejected": -202.24887084960938, |
|
"loss": 473.5983, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 39.46695327758789, |
|
"rewards/margins": -2.2979044914245605, |
|
"rewards/rejected": 41.764854431152344, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24138946128937297, |
|
"grad_norm": 2733.908890696179, |
|
"learning_rate": 4.7017671350404144e-07, |
|
"logits/chosen": 5178.62890625, |
|
"logits/rejected": 4491.7177734375, |
|
"logps/chosen": -238.9555206298828, |
|
"logps/rejected": -209.7996063232422, |
|
"loss": 460.1955, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 44.30680847167969, |
|
"rewards/margins": 7.162436485290527, |
|
"rewards/rejected": 37.14437484741211, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24727700912569914, |
|
"grad_norm": 2844.9689261331014, |
|
"learning_rate": 4.676957096790536e-07, |
|
"logits/chosen": 4867.9912109375, |
|
"logits/rejected": 4110.99658203125, |
|
"logps/chosen": -222.1897430419922, |
|
"logps/rejected": -214.99496459960938, |
|
"loss": 447.1411, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 43.50336456298828, |
|
"rewards/margins": 8.540742874145508, |
|
"rewards/rejected": 34.96261978149414, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 2563.6479429015913, |
|
"learning_rate": 4.651226847142774e-07, |
|
"logits/chosen": 5018.8154296875, |
|
"logits/rejected": 3969.06298828125, |
|
"logps/chosen": -221.9290771484375, |
|
"logps/rejected": -184.1993865966797, |
|
"loss": 464.5543, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": 48.811912536621094, |
|
"rewards/margins": 7.742800712585449, |
|
"rewards/rejected": 41.06911087036133, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2590521047983515, |
|
"grad_norm": 2122.0109615432093, |
|
"learning_rate": 4.6245872624111524e-07, |
|
"logits/chosen": 5818.7001953125, |
|
"logits/rejected": 4908.8974609375, |
|
"logps/chosen": -259.19195556640625, |
|
"logps/rejected": -225.0427703857422, |
|
"loss": 474.2507, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 41.42036056518555, |
|
"rewards/margins": -0.13602538406848907, |
|
"rewards/rejected": 41.55638885498047, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26493965263467767, |
|
"grad_norm": 2624.9452504951946, |
|
"learning_rate": 4.59704960329049e-07, |
|
"logits/chosen": 4902.2177734375, |
|
"logits/rejected": 4126.1279296875, |
|
"logps/chosen": -221.35159301757812, |
|
"logps/rejected": -205.9199676513672, |
|
"loss": 447.7888, |
|
"rewards/accuracies": 0.5166667103767395, |
|
"rewards/chosen": 38.81290817260742, |
|
"rewards/margins": 0.12556418776512146, |
|
"rewards/rejected": 38.687339782714844, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27082720047100384, |
|
"grad_norm": 3346.54251454803, |
|
"learning_rate": 4.5686255100964534e-07, |
|
"logits/chosen": 5134.34228515625, |
|
"logits/rejected": 4823.8193359375, |
|
"logps/chosen": -239.41934204101562, |
|
"logps/rejected": -235.3642578125, |
|
"loss": 485.9119, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 43.27761459350586, |
|
"rewards/margins": 1.6956627368927002, |
|
"rewards/rejected": 41.58195114135742, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27671474830733, |
|
"grad_norm": 2976.593415018288, |
|
"learning_rate": 4.539326997845123e-07, |
|
"logits/chosen": 6470.69140625, |
|
"logits/rejected": 5451.03955078125, |
|
"logps/chosen": -285.16925048828125, |
|
"logps/rejected": -258.68975830078125, |
|
"loss": 488.3334, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": 42.999271392822266, |
|
"rewards/margins": 1.3085330724716187, |
|
"rewards/rejected": 41.690738677978516, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2826022961436562, |
|
"grad_norm": 2939.989789108279, |
|
"learning_rate": 4.509166451174194e-07, |
|
"logits/chosen": 5950.25048828125, |
|
"logits/rejected": 5111.1640625, |
|
"logps/chosen": -259.44683837890625, |
|
"logps/rejected": -235.40414428710938, |
|
"loss": 487.8371, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 37.737037658691406, |
|
"rewards/margins": -5.529684543609619, |
|
"rewards/rejected": 43.266719818115234, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.28848984397998234, |
|
"grad_norm": 2953.1970852829686, |
|
"learning_rate": 4.4781566191079116e-07, |
|
"logits/chosen": 4588.8037109375, |
|
"logits/rejected": 4577.138671875, |
|
"logps/chosen": -239.47250366210938, |
|
"logps/rejected": -204.98898315429688, |
|
"loss": 456.105, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 37.66023635864258, |
|
"rewards/margins": -3.8221187591552734, |
|
"rewards/rejected": 41.48235321044922, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2943773918163085, |
|
"grad_norm": 2736.224570719795, |
|
"learning_rate": 4.446310609668e-07, |
|
"logits/chosen": 5530.6298828125, |
|
"logits/rejected": 4615.0556640625, |
|
"logps/chosen": -249.34030151367188, |
|
"logps/rejected": -225.1228790283203, |
|
"loss": 459.8451, |
|
"rewards/accuracies": 0.5583333969116211, |
|
"rewards/chosen": 43.233238220214844, |
|
"rewards/margins": -1.8535034656524658, |
|
"rewards/rejected": 45.08674240112305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3002649396526347, |
|
"grad_norm": 2615.8589721258495, |
|
"learning_rate": 4.413641884332824e-07, |
|
"logits/chosen": 5124.1259765625, |
|
"logits/rejected": 4400.78515625, |
|
"logps/chosen": -232.1068115234375, |
|
"logps/rejected": -212.48538208007812, |
|
"loss": 463.0996, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 45.848487854003906, |
|
"rewards/margins": 1.3726880550384521, |
|
"rewards/rejected": 44.475799560546875, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.30615248748896084, |
|
"grad_norm": 2756.553889805137, |
|
"learning_rate": 4.3801642523471585e-07, |
|
"logits/chosen": 5130.49609375, |
|
"logits/rejected": 4525.7822265625, |
|
"logps/chosen": -227.8628692626953, |
|
"logps/rejected": -220.9073028564453, |
|
"loss": 443.5771, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 41.145599365234375, |
|
"rewards/margins": 3.313695192337036, |
|
"rewards/rejected": 37.83190155029297, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.312040035325287, |
|
"grad_norm": 2864.6376168471875, |
|
"learning_rate": 4.3458918648849363e-07, |
|
"logits/chosen": 5254.4072265625, |
|
"logits/rejected": 4547.3408203125, |
|
"logps/chosen": -233.91171264648438, |
|
"logps/rejected": -220.1038055419922, |
|
"loss": 437.4953, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 43.519020080566406, |
|
"rewards/margins": -5.650818824768066, |
|
"rewards/rejected": 49.169837951660156, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3179275831616132, |
|
"grad_norm": 2599.9218788148514, |
|
"learning_rate": 4.3108392090674813e-07, |
|
"logits/chosen": 6104.6904296875, |
|
"logits/rejected": 5016.74658203125, |
|
"logps/chosen": -281.6202087402344, |
|
"logps/rejected": -253.33285522460938, |
|
"loss": 482.9291, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 47.8477897644043, |
|
"rewards/margins": 8.502013206481934, |
|
"rewards/rejected": 39.34577941894531, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32381513099793935, |
|
"grad_norm": 3199.922596928601, |
|
"learning_rate": 4.2750211018397197e-07, |
|
"logits/chosen": 5082.48046875, |
|
"logits/rejected": 4866.55859375, |
|
"logps/chosen": -250.6102752685547, |
|
"logps/rejected": -231.0320281982422, |
|
"loss": 472.6031, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 42.02797317504883, |
|
"rewards/margins": -2.5889058113098145, |
|
"rewards/rejected": 44.61688232421875, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3297026788342655, |
|
"grad_norm": 2817.3733503855756, |
|
"learning_rate": 4.2384526837069784e-07, |
|
"logits/chosen": 4878.5634765625, |
|
"logits/rejected": 3915.231201171875, |
|
"logps/chosen": -200.32920837402344, |
|
"logps/rejected": -187.05667114257812, |
|
"loss": 467.9761, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 44.21455383300781, |
|
"rewards/margins": 1.7118313312530518, |
|
"rewards/rejected": 42.50272750854492, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3355902266705917, |
|
"grad_norm": 2821.591086826825, |
|
"learning_rate": 4.2011494123350146e-07, |
|
"logits/chosen": 5307.7412109375, |
|
"logits/rejected": 4781.70361328125, |
|
"logps/chosen": -228.97842407226562, |
|
"logps/rejected": -248.53994750976562, |
|
"loss": 465.2356, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 46.05214309692383, |
|
"rewards/margins": 3.274325132369995, |
|
"rewards/rejected": 42.77781295776367, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.34147777450691785, |
|
"grad_norm": 2609.2990914642696, |
|
"learning_rate": 4.1631270560159744e-07, |
|
"logits/chosen": 5694.88623046875, |
|
"logits/rejected": 4307.6611328125, |
|
"logps/chosen": -249.42398071289062, |
|
"logps/rejected": -207.21530151367188, |
|
"loss": 466.227, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 41.66138458251953, |
|
"rewards/margins": 3.035097122192383, |
|
"rewards/rejected": 38.626285552978516, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.347365322343244, |
|
"grad_norm": 2794.6706694766654, |
|
"learning_rate": 4.1244016870030565e-07, |
|
"logits/chosen": 4977.28955078125, |
|
"logits/rejected": 4125.67822265625, |
|
"logps/chosen": -245.0275421142578, |
|
"logps/rejected": -179.28305053710938, |
|
"loss": 438.7925, |
|
"rewards/accuracies": 0.5583333969116211, |
|
"rewards/chosen": 39.37556076049805, |
|
"rewards/margins": -1.698758840560913, |
|
"rewards/rejected": 41.074317932128906, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3532528701795702, |
|
"grad_norm": 2607.2196348404323, |
|
"learning_rate": 4.084989674716679e-07, |
|
"logits/chosen": 5479.78369140625, |
|
"logits/rejected": 4455.615234375, |
|
"logps/chosen": -242.08688354492188, |
|
"logps/rejected": -193.136474609375, |
|
"loss": 458.9058, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": 42.43415069580078, |
|
"rewards/margins": 2.274672031402588, |
|
"rewards/rejected": 40.159481048583984, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35914041801589636, |
|
"grad_norm": 2718.3257214827568, |
|
"learning_rate": 4.0449076788250443e-07, |
|
"logits/chosen": 5217.408203125, |
|
"logits/rejected": 4447.7919921875, |
|
"logps/chosen": -236.44717407226562, |
|
"logps/rejected": -184.24591064453125, |
|
"loss": 464.5568, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 39.97568893432617, |
|
"rewards/margins": 0.8229917287826538, |
|
"rewards/rejected": 39.1526985168457, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3650279658522225, |
|
"grad_norm": 2786.272576235552, |
|
"learning_rate": 4.0041726422020015e-07, |
|
"logits/chosen": 5721.35107421875, |
|
"logits/rejected": 4428.142578125, |
|
"logps/chosen": -246.13412475585938, |
|
"logps/rejected": -206.4453887939453, |
|
"loss": 501.951, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 44.817237854003906, |
|
"rewards/margins": 7.289865970611572, |
|
"rewards/rejected": 37.527374267578125, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3709155136885487, |
|
"grad_norm": 2768.798117142065, |
|
"learning_rate": 3.962801783765209e-07, |
|
"logits/chosen": 6276.0185546875, |
|
"logits/rejected": 5337.60986328125, |
|
"logps/chosen": -271.36322021484375, |
|
"logps/rejected": -255.06948852539062, |
|
"loss": 456.5855, |
|
"rewards/accuracies": 0.491666704416275, |
|
"rewards/chosen": 46.203147888183594, |
|
"rewards/margins": 0.8436892628669739, |
|
"rewards/rejected": 45.359458923339844, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3768030615248749, |
|
"grad_norm": 2697.761608609681, |
|
"learning_rate": 3.920812591197603e-07, |
|
"logits/chosen": 5017.4580078125, |
|
"logits/rejected": 4359.14208984375, |
|
"logps/chosen": -211.0869598388672, |
|
"logps/rejected": -190.4265594482422, |
|
"loss": 423.3793, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 38.125648498535156, |
|
"rewards/margins": -3.213513135910034, |
|
"rewards/rejected": 41.33915710449219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3826906093612011, |
|
"grad_norm": 2574.762524301188, |
|
"learning_rate": 3.878222813555261e-07, |
|
"logits/chosen": 5633.08447265625, |
|
"logits/rejected": 5116.6396484375, |
|
"logps/chosen": -233.45870971679688, |
|
"logps/rejected": -225.9169921875, |
|
"loss": 455.4143, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 44.39647674560547, |
|
"rewards/margins": -3.767230987548828, |
|
"rewards/rejected": 48.1637077331543, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38857815719752725, |
|
"grad_norm": 2594.9637687624113, |
|
"learning_rate": 3.8350504537647787e-07, |
|
"logits/chosen": 4977.634765625, |
|
"logits/rejected": 4156.2880859375, |
|
"logps/chosen": -228.33230590820312, |
|
"logps/rejected": -204.38168334960938, |
|
"loss": 465.9269, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 42.252296447753906, |
|
"rewards/margins": -1.8904374837875366, |
|
"rewards/rejected": 44.142738342285156, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3944657050338534, |
|
"grad_norm": 2712.9964855523226, |
|
"learning_rate": 3.7913137610133425e-07, |
|
"logits/chosen": 5011.603515625, |
|
"logits/rejected": 4176.4658203125, |
|
"logps/chosen": -205.63040161132812, |
|
"logps/rejected": -193.8191680908203, |
|
"loss": 461.3146, |
|
"rewards/accuracies": 0.5250000357627869, |
|
"rewards/chosen": 43.163169860839844, |
|
"rewards/margins": 1.5009454488754272, |
|
"rewards/rejected": 41.66222381591797, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4003532528701796, |
|
"grad_norm": 2700.0766282434847, |
|
"learning_rate": 3.747031223034695e-07, |
|
"logits/chosen": 5478.8486328125, |
|
"logits/rejected": 4809.7490234375, |
|
"logps/chosen": -226.5070037841797, |
|
"logps/rejected": -205.4853515625, |
|
"loss": 433.533, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 41.19525146484375, |
|
"rewards/margins": -1.973081350326538, |
|
"rewards/rejected": 43.1683349609375, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.40624080070650576, |
|
"grad_norm": 2734.175956036057, |
|
"learning_rate": 3.7022215582942734e-07, |
|
"logits/chosen": 4257.45654296875, |
|
"logits/rejected": 3948.198486328125, |
|
"logps/chosen": -223.28622436523438, |
|
"logps/rejected": -202.68695068359375, |
|
"loss": 455.9671, |
|
"rewards/accuracies": 0.47499996423721313, |
|
"rewards/chosen": 41.13948059082031, |
|
"rewards/margins": -1.048543930053711, |
|
"rewards/rejected": 42.188026428222656, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4121283485428319, |
|
"grad_norm": 2667.6389753883586, |
|
"learning_rate": 3.656903708076815e-07, |
|
"logits/chosen": 5091.544921875, |
|
"logits/rejected": 4615.90576171875, |
|
"logps/chosen": -216.5720672607422, |
|
"logps/rejected": -212.180908203125, |
|
"loss": 457.6879, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 41.81556701660156, |
|
"rewards/margins": -1.126306176185608, |
|
"rewards/rejected": 42.94187545776367, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4180158963791581, |
|
"grad_norm": 3140.337616775453, |
|
"learning_rate": 3.611096828479773e-07, |
|
"logits/chosen": 5586.228515625, |
|
"logits/rejected": 4623.2958984375, |
|
"logps/chosen": -257.47991943359375, |
|
"logps/rejected": -214.2526092529297, |
|
"loss": 487.5771, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 42.64170455932617, |
|
"rewards/margins": 0.10084114223718643, |
|
"rewards/rejected": 42.540863037109375, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42390344421548426, |
|
"grad_norm": 2603.7761578281047, |
|
"learning_rate": 3.564820282315931e-07, |
|
"logits/chosen": 5537.7119140625, |
|
"logits/rejected": 4168.7373046875, |
|
"logps/chosen": -262.38360595703125, |
|
"logps/rejected": -204.13397216796875, |
|
"loss": 449.7505, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": 45.0696907043457, |
|
"rewards/margins": 5.273316383361816, |
|
"rewards/rejected": 39.79636764526367, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4297909920518104, |
|
"grad_norm": 2866.6876808933243, |
|
"learning_rate": 3.518093630928644e-07, |
|
"logits/chosen": 5515.57275390625, |
|
"logits/rejected": 4701.93994140625, |
|
"logps/chosen": -248.15963745117188, |
|
"logps/rejected": -229.8058624267578, |
|
"loss": 476.8922, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 46.25033187866211, |
|
"rewards/margins": 8.252941131591797, |
|
"rewards/rejected": 37.99739456176758, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4356785398881366, |
|
"grad_norm": 3058.8100866920245, |
|
"learning_rate": 3.4709366259231464e-07, |
|
"logits/chosen": 5491.2119140625, |
|
"logits/rejected": 5104.46337890625, |
|
"logps/chosen": -250.91342163085938, |
|
"logps/rejected": -231.6546173095703, |
|
"loss": 469.3656, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 44.03254699707031, |
|
"rewards/margins": -2.906583309173584, |
|
"rewards/rejected": 46.93913269042969, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44156608772446276, |
|
"grad_norm": 3017.47651137017, |
|
"learning_rate": 3.423369200817449e-07, |
|
"logits/chosen": 5202.6728515625, |
|
"logits/rejected": 4578.47265625, |
|
"logps/chosen": -249.1278839111328, |
|
"logps/rejected": -213.2310333251953, |
|
"loss": 462.8963, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 44.32584762573242, |
|
"rewards/margins": -3.8503451347351074, |
|
"rewards/rejected": 48.17619705200195, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44745363556078893, |
|
"grad_norm": 2656.5967747745717, |
|
"learning_rate": 3.3754114626163314e-07, |
|
"logits/chosen": 5392.62646484375, |
|
"logits/rejected": 5032.01953125, |
|
"logps/chosen": -269.23895263671875, |
|
"logps/rejected": -242.6970672607422, |
|
"loss": 461.1163, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 46.39586639404297, |
|
"rewards/margins": 0.5340154767036438, |
|
"rewards/rejected": 45.861846923828125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4533411833971151, |
|
"grad_norm": 2855.313253763874, |
|
"learning_rate": 3.327083683312004e-07, |
|
"logits/chosen": 5523.498046875, |
|
"logits/rejected": 5086.7783203125, |
|
"logps/chosen": -261.9441223144531, |
|
"logps/rejected": -227.2518768310547, |
|
"loss": 481.2854, |
|
"rewards/accuracies": 0.5083333849906921, |
|
"rewards/chosen": 45.44671630859375, |
|
"rewards/margins": 0.4767987132072449, |
|
"rewards/rejected": 44.96991729736328, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45922873123344127, |
|
"grad_norm": 2722.639941986274, |
|
"learning_rate": 3.2784062913150293e-07, |
|
"logits/chosen": 5799.98828125, |
|
"logits/rejected": 4591.291015625, |
|
"logps/chosen": -287.0743408203125, |
|
"logps/rejected": -208.4379425048828, |
|
"loss": 483.4277, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": 42.68096923828125, |
|
"rewards/margins": -0.6198533773422241, |
|
"rewards/rejected": 43.30082321166992, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 2318.299759399404, |
|
"learning_rate": 3.229399862819124e-07, |
|
"logits/chosen": 5658.1552734375, |
|
"logits/rejected": 4616.27490234375, |
|
"logps/chosen": -259.0816955566406, |
|
"logps/rejected": -202.31143188476562, |
|
"loss": 453.9431, |
|
"rewards/accuracies": 0.5583332777023315, |
|
"rewards/chosen": 45.26428985595703, |
|
"rewards/margins": 2.1579227447509766, |
|
"rewards/rejected": 43.106361389160156, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4710038269060936, |
|
"grad_norm": 3232.511560323512, |
|
"learning_rate": 3.18008511310349e-07, |
|
"logits/chosen": 5601.29150390625, |
|
"logits/rejected": 4995.228515625, |
|
"logps/chosen": -233.19369506835938, |
|
"logps/rejected": -235.1713409423828, |
|
"loss": 457.5126, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 48.8902702331543, |
|
"rewards/margins": 3.462005615234375, |
|
"rewards/rejected": 45.42826461791992, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47689137474241977, |
|
"grad_norm": 2903.24125926808, |
|
"learning_rate": 3.1304828877763564e-07, |
|
"logits/chosen": 5097.60693359375, |
|
"logits/rejected": 4684.20068359375, |
|
"logps/chosen": -222.4737091064453, |
|
"logps/rejected": -205.9313201904297, |
|
"loss": 449.5271, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 43.99319839477539, |
|
"rewards/margins": -6.208525657653809, |
|
"rewards/rejected": 50.20172882080078, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.48277892257874594, |
|
"grad_norm": 2895.193360278281, |
|
"learning_rate": 3.080614153963429e-07, |
|
"logits/chosen": 5215.91552734375, |
|
"logits/rejected": 4266.2978515625, |
|
"logps/chosen": -210.92129516601562, |
|
"logps/rejected": -204.11961364746094, |
|
"loss": 443.6631, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": 45.588417053222656, |
|
"rewards/margins": 4.789834022521973, |
|
"rewards/rejected": 40.798583984375, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4886664704150721, |
|
"grad_norm": 2695.9508775371423, |
|
"learning_rate": 3.030499991444977e-07, |
|
"logits/chosen": 5659.1708984375, |
|
"logits/rejected": 4685.5791015625, |
|
"logps/chosen": -240.7261962890625, |
|
"logps/rejected": -228.7866973876953, |
|
"loss": 464.7709, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 45.628719329833984, |
|
"rewards/margins": 2.6973090171813965, |
|
"rewards/rejected": 42.931419372558594, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4945540182513983, |
|
"grad_norm": 2836.335274661169, |
|
"learning_rate": 2.980161583745294e-07, |
|
"logits/chosen": 5090.1796875, |
|
"logits/rejected": 4844.7646484375, |
|
"logps/chosen": -241.19583129882812, |
|
"logps/rejected": -240.20263671875, |
|
"loss": 456.9758, |
|
"rewards/accuracies": 0.5250000357627869, |
|
"rewards/chosen": 43.398475646972656, |
|
"rewards/margins": -6.495508670806885, |
|
"rewards/rejected": 49.893985748291016, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5004415660877245, |
|
"grad_norm": 2558.4226398723663, |
|
"learning_rate": 2.929620209178307e-07, |
|
"logits/chosen": 5289.33203125, |
|
"logits/rejected": 4530.0595703125, |
|
"logps/chosen": -245.92941284179688, |
|
"logps/rejected": -206.4706573486328, |
|
"loss": 453.3887, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": 45.08998489379883, |
|
"rewards/margins": 5.154721260070801, |
|
"rewards/rejected": 39.935264587402344, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 2717.829404938614, |
|
"learning_rate": 2.8788972318531267e-07, |
|
"logits/chosen": 5749.87890625, |
|
"logits/rejected": 4572.9990234375, |
|
"logps/chosen": -237.6320037841797, |
|
"logps/rejected": -211.4043426513672, |
|
"loss": 450.152, |
|
"rewards/accuracies": 0.5166667103767395, |
|
"rewards/chosen": 46.66907501220703, |
|
"rewards/margins": 4.650607109069824, |
|
"rewards/rejected": 42.01846694946289, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5122166617603768, |
|
"grad_norm": 2861.2377752356074, |
|
"learning_rate": 2.8280140926433187e-07, |
|
"logits/chosen": 5393.77392578125, |
|
"logits/rejected": 5325.40380859375, |
|
"logps/chosen": -242.5797882080078, |
|
"logps/rejected": -222.5009765625, |
|
"loss": 481.2984, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 43.943992614746094, |
|
"rewards/margins": -2.422114133834839, |
|
"rewards/rejected": 46.36610412597656, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.518104209596703, |
|
"grad_norm": 2659.2035494791103, |
|
"learning_rate": 2.7769923001237316e-07, |
|
"logits/chosen": 5558.83935546875, |
|
"logits/rejected": 4822.5126953125, |
|
"logps/chosen": -220.2300262451172, |
|
"logps/rejected": -225.2122802734375, |
|
"loss": 449.2192, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 44.88111114501953, |
|
"rewards/margins": 4.092176914215088, |
|
"rewards/rejected": 40.78893280029297, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5239917574330292, |
|
"grad_norm": 2617.742422402943, |
|
"learning_rate": 2.7258534214787107e-07, |
|
"logits/chosen": 5243.423828125, |
|
"logits/rejected": 4836.48046875, |
|
"logps/chosen": -228.9081268310547, |
|
"logps/rejected": -223.1492156982422, |
|
"loss": 452.9875, |
|
"rewards/accuracies": 0.60833340883255, |
|
"rewards/chosen": 44.30738067626953, |
|
"rewards/margins": 4.894416809082031, |
|
"rewards/rejected": 39.412967681884766, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5298793052693553, |
|
"grad_norm": 2262.030014981174, |
|
"learning_rate": 2.6746190733855306e-07, |
|
"logits/chosen": 5679.38623046875, |
|
"logits/rejected": 4615.3876953125, |
|
"logps/chosen": -269.0588073730469, |
|
"logps/rejected": -209.2182159423828, |
|
"loss": 445.8765, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 45.74579620361328, |
|
"rewards/margins": 4.458695411682129, |
|
"rewards/rejected": 41.2870979309082, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5357668531056815, |
|
"grad_norm": 2839.0904187282845, |
|
"learning_rate": 2.6233109128769133e-07, |
|
"logits/chosen": 5576.9326171875, |
|
"logits/rejected": 4600.5908203125, |
|
"logps/chosen": -252.0655975341797, |
|
"logps/rejected": -201.52325439453125, |
|
"loss": 472.7852, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 43.91907501220703, |
|
"rewards/margins": 0.6527482867240906, |
|
"rewards/rejected": 43.266319274902344, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5416544009420077, |
|
"grad_norm": 2527.8677929651662, |
|
"learning_rate": 2.571950628186483e-07, |
|
"logits/chosen": 5142.1513671875, |
|
"logits/rejected": 4847.85888671875, |
|
"logps/chosen": -233.6755828857422, |
|
"logps/rejected": -223.87100219726562, |
|
"loss": 457.4038, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 49.908050537109375, |
|
"rewards/margins": 8.85273265838623, |
|
"rewards/rejected": 41.0553092956543, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5475419487783338, |
|
"grad_norm": 2434.5446784038063, |
|
"learning_rate": 2.520559929581034e-07, |
|
"logits/chosen": 5560.0634765625, |
|
"logits/rejected": 5184.27685546875, |
|
"logps/chosen": -219.471923828125, |
|
"logps/rejected": -230.39956665039062, |
|
"loss": 445.1723, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 45.55453872680664, |
|
"rewards/margins": 3.9521491527557373, |
|
"rewards/rejected": 41.602394104003906, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.55342949661466, |
|
"grad_norm": 2623.9403337631743, |
|
"learning_rate": 2.469160540183484e-07, |
|
"logits/chosen": 4994.5146484375, |
|
"logits/rejected": 4167.3115234375, |
|
"logps/chosen": -216.19406127929688, |
|
"logps/rejected": -202.72320556640625, |
|
"loss": 437.8886, |
|
"rewards/accuracies": 0.5333333015441895, |
|
"rewards/chosen": 45.913734436035156, |
|
"rewards/margins": 5.58524227142334, |
|
"rewards/rejected": 40.328495025634766, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5593170444509862, |
|
"grad_norm": 2507.348851307183, |
|
"learning_rate": 2.417774186790396e-07, |
|
"logits/chosen": 5547.2744140625, |
|
"logits/rejected": 4807.9375, |
|
"logps/chosen": -230.7436981201172, |
|
"logps/rejected": -207.02481079101562, |
|
"loss": 452.8866, |
|
"rewards/accuracies": 0.5500000715255737, |
|
"rewards/chosen": 47.687828063964844, |
|
"rewards/margins": 4.640194416046143, |
|
"rewards/rejected": 43.047645568847656, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5652045922873123, |
|
"grad_norm": 3119.519288688434, |
|
"learning_rate": 2.366422590687945e-07, |
|
"logits/chosen": 5354.3486328125, |
|
"logits/rejected": 4346.8291015625, |
|
"logps/chosen": -213.7137908935547, |
|
"logps/rejected": -186.5005645751953, |
|
"loss": 458.5086, |
|
"rewards/accuracies": 0.5416666269302368, |
|
"rewards/chosen": 45.78757858276367, |
|
"rewards/margins": -1.3455828428268433, |
|
"rewards/rejected": 47.13316345214844, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5710921401236385, |
|
"grad_norm": 2711.9281079875923, |
|
"learning_rate": 2.3151274584702116e-07, |
|
"logits/chosen": 5499.513671875, |
|
"logits/rejected": 4687.3408203125, |
|
"logps/chosen": -237.8612823486328, |
|
"logps/rejected": -205.35922241210938, |
|
"loss": 463.1298, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 48.72194290161133, |
|
"rewards/margins": 4.278817176818848, |
|
"rewards/rejected": 44.4431266784668, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5769796879599647, |
|
"grad_norm": 2630.306237104176, |
|
"learning_rate": 2.2639104728636912e-07, |
|
"logits/chosen": 5086.7822265625, |
|
"logits/rejected": 4054.61962890625, |
|
"logps/chosen": -220.7304229736328, |
|
"logps/rejected": -206.5825958251953, |
|
"loss": 440.4308, |
|
"rewards/accuracies": 0.5583333969116211, |
|
"rewards/chosen": 43.90907669067383, |
|
"rewards/margins": 2.613150119781494, |
|
"rewards/rejected": 41.29592514038086, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5828672357962909, |
|
"grad_norm": 2710.6636823106182, |
|
"learning_rate": 2.2127932835618895e-07, |
|
"logits/chosen": 5302.921875, |
|
"logits/rejected": 4638.28955078125, |
|
"logps/chosen": -234.52285766601562, |
|
"logps/rejected": -225.45364379882812, |
|
"loss": 427.9638, |
|
"rewards/accuracies": 0.5583332777023315, |
|
"rewards/chosen": 42.980873107910156, |
|
"rewards/margins": 4.057049751281738, |
|
"rewards/rejected": 38.923824310302734, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.588754783632617, |
|
"grad_norm": 2870.711490574312, |
|
"learning_rate": 2.1617974980738814e-07, |
|
"logits/chosen": 5824.0576171875, |
|
"logits/rejected": 5266.1982421875, |
|
"logps/chosen": -258.0989685058594, |
|
"logps/rejected": -251.48828125, |
|
"loss": 482.1707, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 45.20808792114258, |
|
"rewards/margins": -5.770603656768799, |
|
"rewards/rejected": 50.97869110107422, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5946423314689432, |
|
"grad_norm": 2447.752518434989, |
|
"learning_rate": 2.1109446725907e-07, |
|
"logits/chosen": 5894.18603515625, |
|
"logits/rejected": 4373.1640625, |
|
"logps/chosen": -254.66024780273438, |
|
"logps/rejected": -196.70751953125, |
|
"loss": 450.6618, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": 45.06776428222656, |
|
"rewards/margins": 5.682547569274902, |
|
"rewards/rejected": 39.385215759277344, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6005298793052694, |
|
"grad_norm": 3026.8393568831943, |
|
"learning_rate": 2.060256302873421e-07, |
|
"logits/chosen": 4870.3251953125, |
|
"logits/rejected": 4591.98876953125, |
|
"logps/chosen": -234.90676879882812, |
|
"logps/rejected": -216.5286865234375, |
|
"loss": 443.9214, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": 50.04943084716797, |
|
"rewards/margins": 7.063135623931885, |
|
"rewards/rejected": 42.98630142211914, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6064174271415955, |
|
"grad_norm": 2795.814283569102, |
|
"learning_rate": 2.0097538151667884e-07, |
|
"logits/chosen": 6684.375, |
|
"logits/rejected": 5485.0751953125, |
|
"logps/chosen": -276.08843994140625, |
|
"logps/rejected": -246.08056640625, |
|
"loss": 496.8952, |
|
"rewards/accuracies": 0.60833340883255, |
|
"rewards/chosen": 54.97471237182617, |
|
"rewards/margins": 7.3708295822143555, |
|
"rewards/rejected": 47.6038818359375, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6123049749779217, |
|
"grad_norm": 2455.354784189058, |
|
"learning_rate": 1.9594585571422276e-07, |
|
"logits/chosen": 4663.0888671875, |
|
"logits/rejected": 4298.4326171875, |
|
"logps/chosen": -186.22332763671875, |
|
"logps/rejected": -195.13868713378906, |
|
"loss": 447.868, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 37.82415008544922, |
|
"rewards/margins": -4.475025177001953, |
|
"rewards/rejected": 42.29917907714844, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6181925228142479, |
|
"grad_norm": 3037.059899209514, |
|
"learning_rate": 1.9093917888740688e-07, |
|
"logits/chosen": 5092.6142578125, |
|
"logits/rejected": 4403.24267578125, |
|
"logps/chosen": -212.946533203125, |
|
"logps/rejected": -192.6261749267578, |
|
"loss": 438.1319, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 40.9649772644043, |
|
"rewards/margins": -2.0067009925842285, |
|
"rewards/rejected": 42.9716796875, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.624080070650574, |
|
"grad_norm": 2583.707994006676, |
|
"learning_rate": 1.8595746738528043e-07, |
|
"logits/chosen": 5215.65625, |
|
"logits/rejected": 4705.4794921875, |
|
"logps/chosen": -240.82656860351562, |
|
"logps/rejected": -210.69119262695312, |
|
"loss": 481.238, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 42.98539352416992, |
|
"rewards/margins": 1.0407254695892334, |
|
"rewards/rejected": 41.94466781616211, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6299676184869002, |
|
"grad_norm": 2888.281846249585, |
|
"learning_rate": 1.8100282700391615e-07, |
|
"logits/chosen": 5474.8203125, |
|
"logits/rejected": 5022.8955078125, |
|
"logps/chosen": -238.95706176757812, |
|
"logps/rejected": -246.790283203125, |
|
"loss": 473.3039, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 45.00455093383789, |
|
"rewards/margins": -1.4118093252182007, |
|
"rewards/rejected": 46.416358947753906, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6358551663232264, |
|
"grad_norm": 2919.6730132023235, |
|
"learning_rate": 1.7607735209627948e-07, |
|
"logits/chosen": 6066.033203125, |
|
"logits/rejected": 5340.1376953125, |
|
"logps/chosen": -255.93374633789062, |
|
"logps/rejected": -219.3169708251953, |
|
"loss": 442.0081, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 59.42621994018555, |
|
"rewards/margins": 10.818987846374512, |
|
"rewards/rejected": 48.60723114013672, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6417427141595525, |
|
"grad_norm": 3061.31389303788, |
|
"learning_rate": 1.7118312468693435e-07, |
|
"logits/chosen": 5364.8857421875, |
|
"logits/rejected": 4527.67236328125, |
|
"logps/chosen": -265.9931945800781, |
|
"logps/rejected": -206.59375, |
|
"loss": 456.8328, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 45.197242736816406, |
|
"rewards/margins": 4.963152885437012, |
|
"rewards/rejected": 40.23408889770508, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6476302619958787, |
|
"grad_norm": 2675.5740514632685, |
|
"learning_rate": 1.6632221359196007e-07, |
|
"logits/chosen": 5273.853515625, |
|
"logits/rejected": 4304.39306640625, |
|
"logps/chosen": -215.75680541992188, |
|
"logps/rejected": -205.96499633789062, |
|
"loss": 450.398, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 47.502655029296875, |
|
"rewards/margins": -0.780138373374939, |
|
"rewards/rejected": 48.28279113769531, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6535178098322049, |
|
"grad_norm": 2656.567276511366, |
|
"learning_rate": 1.614966735444519e-07, |
|
"logits/chosen": 5670.87890625, |
|
"logits/rejected": 4776.35400390625, |
|
"logps/chosen": -263.34454345703125, |
|
"logps/rejected": -211.91018676757812, |
|
"loss": 443.2492, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 47.74077224731445, |
|
"rewards/margins": 4.260870933532715, |
|
"rewards/rejected": 43.47990036010742, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.659405357668531, |
|
"grad_norm": 2727.588533971273, |
|
"learning_rate": 1.567085443259743e-07, |
|
"logits/chosen": 5641.5869140625, |
|
"logits/rejected": 4489.1552734375, |
|
"logps/chosen": -251.7860870361328, |
|
"logps/rejected": -210.0601348876953, |
|
"loss": 473.6551, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": 48.41345977783203, |
|
"rewards/margins": 0.43558159470558167, |
|
"rewards/rejected": 47.977882385253906, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6652929055048572, |
|
"grad_norm": 2815.3853893624587, |
|
"learning_rate": 1.5195984990433436e-07, |
|
"logits/chosen": 5388.6455078125, |
|
"logits/rejected": 5199.6357421875, |
|
"logps/chosen": -247.1171875, |
|
"logps/rejected": -233.29345703125, |
|
"loss": 436.6213, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 45.90008544921875, |
|
"rewards/margins": 3.9734444618225098, |
|
"rewards/rejected": 41.926639556884766, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6711804533411834, |
|
"grad_norm": 2726.3125711774583, |
|
"learning_rate": 1.4725259757803982e-07, |
|
"logits/chosen": 6462.54296875, |
|
"logits/rejected": 5156.8828125, |
|
"logps/chosen": -273.95050048828125, |
|
"logps/rejected": -233.63723754882812, |
|
"loss": 469.2894, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 49.0769157409668, |
|
"rewards/margins": 1.6615930795669556, |
|
"rewards/rejected": 47.41532516479492, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6770680011775095, |
|
"grad_norm": 2654.672736980306, |
|
"learning_rate": 1.4258877712780331e-07, |
|
"logits/chosen": 5129.0439453125, |
|
"logits/rejected": 3892.001953125, |
|
"logps/chosen": -231.34765625, |
|
"logps/rejected": -182.238037109375, |
|
"loss": 432.7688, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 45.699745178222656, |
|
"rewards/margins": 3.295746326446533, |
|
"rewards/rejected": 42.40399932861328, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6829555490138357, |
|
"grad_norm": 2482.74159412, |
|
"learning_rate": 1.3797035997545142e-07, |
|
"logits/chosen": 4968.6845703125, |
|
"logits/rejected": 4853.14501953125, |
|
"logps/chosen": -205.07754516601562, |
|
"logps/rejected": -228.11337280273438, |
|
"loss": 420.7995, |
|
"rewards/accuracies": 0.5166667103767395, |
|
"rewards/chosen": 44.83478546142578, |
|
"rewards/margins": -1.369265079498291, |
|
"rewards/rejected": 46.20404815673828, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6888430968501619, |
|
"grad_norm": 2542.0640885944986, |
|
"learning_rate": 1.333992983505939e-07, |
|
"logits/chosen": 5638.7607421875, |
|
"logits/rejected": 5253.87451171875, |
|
"logps/chosen": -258.0821228027344, |
|
"logps/rejected": -242.6870880126953, |
|
"loss": 496.0406, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 49.918190002441406, |
|
"rewards/margins": 5.044798851013184, |
|
"rewards/rejected": 44.873390197753906, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.694730644686488, |
|
"grad_norm": 2840.4883093419694, |
|
"learning_rate": 1.2887752446540616e-07, |
|
"logits/chosen": 5160.63232421875, |
|
"logits/rejected": 4702.83837890625, |
|
"logps/chosen": -234.97921752929688, |
|
"logps/rejected": -211.44931030273438, |
|
"loss": 454.1117, |
|
"rewards/accuracies": 0.42500004172325134, |
|
"rewards/chosen": 39.74894332885742, |
|
"rewards/margins": -7.0438995361328125, |
|
"rewards/rejected": 46.792850494384766, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7006181925228142, |
|
"grad_norm": 2534.3268190191707, |
|
"learning_rate": 1.244069496978726e-07, |
|
"logits/chosen": 5515.9306640625, |
|
"logits/rejected": 5091.92578125, |
|
"logps/chosen": -253.8005828857422, |
|
"logps/rejected": -215.34494018554688, |
|
"loss": 455.0434, |
|
"rewards/accuracies": 0.5416666269302368, |
|
"rewards/chosen": 41.3018684387207, |
|
"rewards/margins": -2.4822511672973633, |
|
"rewards/rejected": 43.78411865234375, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7065057403591404, |
|
"grad_norm": 2216.18831688337, |
|
"learning_rate": 1.1998946378383697e-07, |
|
"logits/chosen": 4838.0625, |
|
"logits/rejected": 4621.58837890625, |
|
"logps/chosen": -195.82528686523438, |
|
"logps/rejected": -200.3052520751953, |
|
"loss": 406.3944, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 49.4088134765625, |
|
"rewards/margins": 6.762750148773193, |
|
"rewards/rejected": 42.64606857299805, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7123932881954665, |
|
"grad_norm": 2925.190203960196, |
|
"learning_rate": 1.1562693401820092e-07, |
|
"logits/chosen": 5704.892578125, |
|
"logits/rejected": 4735.81982421875, |
|
"logps/chosen": -250.4612274169922, |
|
"logps/rejected": -221.1834716796875, |
|
"loss": 475.8371, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 49.47187423706055, |
|
"rewards/margins": 1.9925483465194702, |
|
"rewards/rejected": 47.47932815551758, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7182808360317927, |
|
"grad_norm": 2740.5903742398273, |
|
"learning_rate": 1.113212044656087e-07, |
|
"logits/chosen": 5618.9365234375, |
|
"logits/rejected": 4587.16064453125, |
|
"logps/chosen": -237.82760620117188, |
|
"logps/rejected": -221.23837280273438, |
|
"loss": 487.2423, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 57.44464874267578, |
|
"rewards/margins": 8.748071670532227, |
|
"rewards/rejected": 48.696571350097656, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7241683838681189, |
|
"grad_norm": 2501.9000780368424, |
|
"learning_rate": 1.0707409518095079e-07, |
|
"logits/chosen": 5306.55322265625, |
|
"logits/rejected": 4526.52197265625, |
|
"logps/chosen": -240.0721893310547, |
|
"logps/rejected": -203.85665893554688, |
|
"loss": 449.9376, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 51.15095901489258, |
|
"rewards/margins": 12.58459758758545, |
|
"rewards/rejected": 38.56635665893555, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.730055931704445, |
|
"grad_norm": 2940.399273853818, |
|
"learning_rate": 1.028874014400172e-07, |
|
"logits/chosen": 5354.55908203125, |
|
"logits/rejected": 4197.5888671875, |
|
"logps/chosen": -245.00277709960938, |
|
"logps/rejected": -194.13980102539062, |
|
"loss": 468.1962, |
|
"rewards/accuracies": 0.5250000357627869, |
|
"rewards/chosen": 46.04726791381836, |
|
"rewards/margins": -0.03993086889386177, |
|
"rewards/rejected": 46.08720016479492, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7359434795407712, |
|
"grad_norm": 3023.1812955166797, |
|
"learning_rate": 9.876289298062476e-08, |
|
"logits/chosen": 5727.68603515625, |
|
"logits/rejected": 4590.14453125, |
|
"logps/chosen": -241.8575897216797, |
|
"logps/rejected": -218.59616088867188, |
|
"loss": 456.2886, |
|
"rewards/accuracies": 0.5166667103767395, |
|
"rewards/chosen": 47.04845428466797, |
|
"rewards/margins": 0.41756492853164673, |
|
"rewards/rejected": 46.630889892578125, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7418310273770974, |
|
"grad_norm": 2636.2027748224828, |
|
"learning_rate": 9.470231325453956e-08, |
|
"logits/chosen": 5583.41552734375, |
|
"logits/rejected": 4478.337890625, |
|
"logps/chosen": -221.7116241455078, |
|
"logps/rejected": -211.3160858154297, |
|
"loss": 433.6968, |
|
"rewards/accuracies": 0.5333333015441895, |
|
"rewards/chosen": 45.1245002746582, |
|
"rewards/margins": -2.6019275188446045, |
|
"rewards/rejected": 47.7264289855957, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7477185752134237, |
|
"grad_norm": 2662.2777753772584, |
|
"learning_rate": 9.070737869051043e-08, |
|
"logits/chosen": 5027.3095703125, |
|
"logits/rejected": 4796.9765625, |
|
"logps/chosen": -228.14697265625, |
|
"logps/rejected": -206.49560546875, |
|
"loss": 417.4956, |
|
"rewards/accuracies": 0.5083333849906921, |
|
"rewards/chosen": 46.9410285949707, |
|
"rewards/margins": 2.322603464126587, |
|
"rewards/rejected": 44.61842727661133, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7536061230497498, |
|
"grad_norm": 2745.160316396736, |
|
"learning_rate": 8.67797779687254e-08, |
|
"logits/chosen": 5756.96875, |
|
"logits/rejected": 4928.97900390625, |
|
"logps/chosen": -264.42279052734375, |
|
"logps/rejected": -216.61502075195312, |
|
"loss": 462.2225, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": 49.881675720214844, |
|
"rewards/margins": 8.946965217590332, |
|
"rewards/rejected": 40.93471145629883, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 2914.979019476111, |
|
"learning_rate": 8.292117130699766e-08, |
|
"logits/chosen": 5330.68212890625, |
|
"logits/rejected": 4864.1455078125, |
|
"logps/chosen": -260.3185119628906, |
|
"logps/rejected": -222.41830444335938, |
|
"loss": 459.8443, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": 46.853370666503906, |
|
"rewards/margins": 5.266936302185059, |
|
"rewards/rejected": 41.58643341064453, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7653812187224022, |
|
"grad_norm": 2681.0728179420385, |
|
"learning_rate": 7.913318975898237e-08, |
|
"logits/chosen": 5588.1552734375, |
|
"logits/rejected": 3932.55126953125, |
|
"logps/chosen": -226.26220703125, |
|
"logps/rejected": -192.8434600830078, |
|
"loss": 444.3821, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": 50.132713317871094, |
|
"rewards/margins": 10.740438461303711, |
|
"rewards/rejected": 39.392276763916016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7712687665587283, |
|
"grad_norm": 2644.708915509074, |
|
"learning_rate": 7.541743452472193e-08, |
|
"logits/chosen": 5802.92822265625, |
|
"logits/rejected": 5005.15478515625, |
|
"logps/chosen": -258.23095703125, |
|
"logps/rejected": -227.35104370117188, |
|
"loss": 470.9071, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 44.41248321533203, |
|
"rewards/margins": 1.734694242477417, |
|
"rewards/rejected": 42.67778778076172, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7771563143950545, |
|
"grad_norm": 2760.1370388445303, |
|
"learning_rate": 7.177547627380987e-08, |
|
"logits/chosen": 5510.3876953125, |
|
"logits/rejected": 4903.322265625, |
|
"logps/chosen": -229.8451385498047, |
|
"logps/rejected": -202.1870574951172, |
|
"loss": 435.5435, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 44.152503967285156, |
|
"rewards/margins": 1.354308843612671, |
|
"rewards/rejected": 42.79819869995117, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7830438622313807, |
|
"grad_norm": 2614.2018106928704, |
|
"learning_rate": 6.820885448146041e-08, |
|
"logits/chosen": 6023.060546875, |
|
"logits/rejected": 5090.50537109375, |
|
"logps/chosen": -282.25433349609375, |
|
"logps/rejected": -261.2765197753906, |
|
"loss": 499.2624, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 52.391563415527344, |
|
"rewards/margins": 6.237324237823486, |
|
"rewards/rejected": 46.15424346923828, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7889314100677068, |
|
"grad_norm": 2597.7263605767535, |
|
"learning_rate": 6.471907677776426e-08, |
|
"logits/chosen": 5775.0126953125, |
|
"logits/rejected": 4832.48876953125, |
|
"logps/chosen": -239.5498809814453, |
|
"logps/rejected": -222.05859375, |
|
"loss": 468.5632, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 46.0155143737793, |
|
"rewards/margins": -3.5307693481445312, |
|
"rewards/rejected": 49.54628372192383, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.794818957904033, |
|
"grad_norm": 2640.8038388162845, |
|
"learning_rate": 6.13076183104052e-08, |
|
"logits/chosen": 4657.62841796875, |
|
"logits/rejected": 4134.35107421875, |
|
"logps/chosen": -207.19058227539062, |
|
"logps/rejected": -190.02688598632812, |
|
"loss": 449.3236, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 42.933380126953125, |
|
"rewards/margins": -1.6155602931976318, |
|
"rewards/rejected": 44.54894256591797, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8007065057403592, |
|
"grad_norm": 2360.3636823333177, |
|
"learning_rate": 5.797592112110733e-08, |
|
"logits/chosen": 5882.80078125, |
|
"logits/rejected": 4898.4326171875, |
|
"logps/chosen": -250.59024047851562, |
|
"logps/rejected": -225.48971557617188, |
|
"loss": 450.8052, |
|
"rewards/accuracies": 0.5083333253860474, |
|
"rewards/chosen": 56.6795768737793, |
|
"rewards/margins": 4.181834697723389, |
|
"rewards/rejected": 52.49774169921875, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8065940535766853, |
|
"grad_norm": 2871.53542970714, |
|
"learning_rate": 5.4725393536076106e-08, |
|
"logits/chosen": 5734.0810546875, |
|
"logits/rejected": 5196.62255859375, |
|
"logps/chosen": -246.6741485595703, |
|
"logps/rejected": -223.33871459960938, |
|
"loss": 467.8894, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 46.92644119262695, |
|
"rewards/margins": 2.722069501876831, |
|
"rewards/rejected": 44.204368591308594, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8124816014130115, |
|
"grad_norm": 2506.469471261304, |
|
"learning_rate": 5.1557409570691854e-08, |
|
"logits/chosen": 5755.55615234375, |
|
"logits/rejected": 4602.2119140625, |
|
"logps/chosen": -258.3332214355469, |
|
"logps/rejected": -208.54736328125, |
|
"loss": 459.4171, |
|
"rewards/accuracies": 0.6000000834465027, |
|
"rewards/chosen": 61.886474609375, |
|
"rewards/margins": 9.517420768737793, |
|
"rewards/rejected": 52.369056701660156, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8183691492493377, |
|
"grad_norm": 2795.549014141379, |
|
"learning_rate": 4.84733083487055e-08, |
|
"logits/chosen": 5162.1298828125, |
|
"logits/rejected": 4927.0302734375, |
|
"logps/chosen": -224.22134399414062, |
|
"logps/rejected": -214.3109130859375, |
|
"loss": 451.7745, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 50.72800064086914, |
|
"rewards/margins": 5.281952857971191, |
|
"rewards/rejected": 45.446044921875, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8242566970856638, |
|
"grad_norm": 3099.872127662192, |
|
"learning_rate": 4.547439353618421e-08, |
|
"logits/chosen": 5943.78369140625, |
|
"logits/rejected": 4675.40234375, |
|
"logps/chosen": -254.889892578125, |
|
"logps/rejected": -221.0095977783203, |
|
"loss": 476.1811, |
|
"rewards/accuracies": 0.5416666269302368, |
|
"rewards/chosen": 47.358482360839844, |
|
"rewards/margins": 1.6082299947738647, |
|
"rewards/rejected": 45.75025177001953, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.83014424492199, |
|
"grad_norm": 2768.108043645052, |
|
"learning_rate": 4.2561932790444594e-08, |
|
"logits/chosen": 5309.0048828125, |
|
"logits/rejected": 4402.3564453125, |
|
"logps/chosen": -220.2199249267578, |
|
"logps/rejected": -178.3611602783203, |
|
"loss": 446.4285, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 43.55487823486328, |
|
"rewards/margins": -0.8950740694999695, |
|
"rewards/rejected": 44.449951171875, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8360317927583162, |
|
"grad_norm": 2416.6350844671583, |
|
"learning_rate": 3.973715722420726e-08, |
|
"logits/chosen": 5374.509765625, |
|
"logits/rejected": 5026.10693359375, |
|
"logps/chosen": -238.96817016601562, |
|
"logps/rejected": -227.57754516601562, |
|
"loss": 473.0174, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 46.53357696533203, |
|
"rewards/margins": -1.398461103439331, |
|
"rewards/rejected": 47.932037353515625, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8419193405946424, |
|
"grad_norm": 2571.15473257983, |
|
"learning_rate": 3.700126088519892e-08, |
|
"logits/chosen": 5649.8388671875, |
|
"logits/rejected": 5096.2265625, |
|
"logps/chosen": -219.5747833251953, |
|
"logps/rejected": -230.01559448242188, |
|
"loss": 464.0558, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": 55.169036865234375, |
|
"rewards/margins": 8.111885070800781, |
|
"rewards/rejected": 47.05714416503906, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8478068884309685, |
|
"grad_norm": 2500.922610681479, |
|
"learning_rate": 3.435540025142197e-08, |
|
"logits/chosen": 5119.20068359375, |
|
"logits/rejected": 4506.8935546875, |
|
"logps/chosen": -232.45101928710938, |
|
"logps/rejected": -196.38507080078125, |
|
"loss": 460.6399, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 42.9712028503418, |
|
"rewards/margins": -2.353074312210083, |
|
"rewards/rejected": 45.324283599853516, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.8536944362672947, |
|
"grad_norm": 3072.586081400429, |
|
"learning_rate": 3.1800693742305065e-08, |
|
"logits/chosen": 5674.4140625, |
|
"logits/rejected": 4546.13818359375, |
|
"logps/chosen": -232.2156524658203, |
|
"logps/rejected": -209.8252716064453, |
|
"loss": 439.2195, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": 45.56594467163086, |
|
"rewards/margins": 1.6826508045196533, |
|
"rewards/rejected": 43.88329315185547, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8595819841036209, |
|
"grad_norm": 2555.6929882018603, |
|
"learning_rate": 2.9338221245941236e-08, |
|
"logits/chosen": 6251.90380859375, |
|
"logits/rejected": 5486.14990234375, |
|
"logps/chosen": -257.6957092285156, |
|
"logps/rejected": -247.49734497070312, |
|
"loss": 445.6683, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 53.01552200317383, |
|
"rewards/margins": 5.817216396331787, |
|
"rewards/rejected": 47.198307037353516, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.865469531939947, |
|
"grad_norm": 2526.9193833585073, |
|
"learning_rate": 2.6969023662613472e-08, |
|
"logits/chosen": 5537.1455078125, |
|
"logits/rejected": 5597.6640625, |
|
"logps/chosen": -249.64730834960938, |
|
"logps/rejected": -238.49862670898438, |
|
"loss": 453.4951, |
|
"rewards/accuracies": 0.5416666269302368, |
|
"rewards/chosen": 49.131866455078125, |
|
"rewards/margins": -3.0847103595733643, |
|
"rewards/rejected": 52.216575622558594, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8713570797762732, |
|
"grad_norm": 2814.258565256423, |
|
"learning_rate": 2.4694102464800663e-08, |
|
"logits/chosen": 5417.5751953125, |
|
"logits/rejected": 4861.5048828125, |
|
"logps/chosen": -246.10733032226562, |
|
"logps/rejected": -221.135986328125, |
|
"loss": 476.8938, |
|
"rewards/accuracies": 0.49166664481163025, |
|
"rewards/chosen": 44.802181243896484, |
|
"rewards/margins": -0.2635299563407898, |
|
"rewards/rejected": 45.065711975097656, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8772446276125994, |
|
"grad_norm": 2719.1332660328358, |
|
"learning_rate": 2.2514419273849673e-08, |
|
"logits/chosen": 5584.01953125, |
|
"logits/rejected": 4642.46337890625, |
|
"logps/chosen": -224.6255340576172, |
|
"logps/rejected": -216.149658203125, |
|
"loss": 454.43, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 45.871826171875, |
|
"rewards/margins": 5.6382737159729, |
|
"rewards/rejected": 40.23354721069336, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8831321754489255, |
|
"grad_norm": 2562.1914029050754, |
|
"learning_rate": 2.0430895453492942e-08, |
|
"logits/chosen": 5416.74951171875, |
|
"logits/rejected": 4298.4677734375, |
|
"logps/chosen": -233.50082397460938, |
|
"logps/rejected": -192.23582458496094, |
|
"loss": 446.0934, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": 47.021453857421875, |
|
"rewards/margins": 1.942139983177185, |
|
"rewards/rejected": 45.07931900024414, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8890197232852517, |
|
"grad_norm": 2849.2417749033407, |
|
"learning_rate": 1.8444411720383107e-08, |
|
"logits/chosen": 5647.05517578125, |
|
"logits/rejected": 4601.66015625, |
|
"logps/chosen": -243.07870483398438, |
|
"logps/rejected": -205.91433715820312, |
|
"loss": 444.2926, |
|
"rewards/accuracies": 0.5916666388511658, |
|
"rewards/chosen": 53.23212432861328, |
|
"rewards/margins": 7.5799431800842285, |
|
"rewards/rejected": 45.652183532714844, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8949072711215779, |
|
"grad_norm": 2609.783375316123, |
|
"learning_rate": 1.655580777180937e-08, |
|
"logits/chosen": 4866.36865234375, |
|
"logits/rejected": 4648.73046875, |
|
"logps/chosen": -211.3268280029297, |
|
"logps/rejected": -216.62088012695312, |
|
"loss": 431.3077, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 45.19532012939453, |
|
"rewards/margins": 3.8858094215393066, |
|
"rewards/rejected": 41.309513092041016, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.900794818957904, |
|
"grad_norm": 2858.325700718927, |
|
"learning_rate": 1.4765881930752982e-08, |
|
"logits/chosen": 5512.6796875, |
|
"logits/rejected": 5062.978515625, |
|
"logps/chosen": -251.55654907226562, |
|
"logps/rejected": -247.4755401611328, |
|
"loss": 475.6445, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 47.72459030151367, |
|
"rewards/margins": -0.3327750265598297, |
|
"rewards/rejected": 48.05736541748047, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9066823667942302, |
|
"grad_norm": 2696.9143449356766, |
|
"learning_rate": 1.3075390808431897e-08, |
|
"logits/chosen": 4964.9306640625, |
|
"logits/rejected": 4029.17822265625, |
|
"logps/chosen": -210.2575225830078, |
|
"logps/rejected": -181.75967407226562, |
|
"loss": 427.3603, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": 46.60421371459961, |
|
"rewards/margins": 6.900620460510254, |
|
"rewards/rejected": 39.70359420776367, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9125699146305564, |
|
"grad_norm": 2909.043218826057, |
|
"learning_rate": 1.1485048984476997e-08, |
|
"logits/chosen": 6264.9814453125, |
|
"logits/rejected": 4926.421875, |
|
"logps/chosen": -247.24398803710938, |
|
"logps/rejected": -222.6241455078125, |
|
"loss": 457.0212, |
|
"rewards/accuracies": 0.491666704416275, |
|
"rewards/chosen": 49.02656936645508, |
|
"rewards/margins": 6.186914920806885, |
|
"rewards/rejected": 42.83965301513672, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9184574624668825, |
|
"grad_norm": 2774.8159405504452, |
|
"learning_rate": 9.995528704875633e-09, |
|
"logits/chosen": 6105.7099609375, |
|
"logits/rejected": 4611.01708984375, |
|
"logps/chosen": -251.1758270263672, |
|
"logps/rejected": -221.3173065185547, |
|
"loss": 449.7213, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": 50.6319465637207, |
|
"rewards/margins": -3.3480277061462402, |
|
"rewards/rejected": 53.9799690246582, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9243450103032087, |
|
"grad_norm": 2694.402825577869, |
|
"learning_rate": 8.607459597809563e-09, |
|
"logits/chosen": 5613.7509765625, |
|
"logits/rejected": 4740.57275390625, |
|
"logps/chosen": -254.7351837158203, |
|
"logps/rejected": -211.7694549560547, |
|
"loss": 451.4933, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 47.686885833740234, |
|
"rewards/margins": 2.6942806243896484, |
|
"rewards/rejected": 44.992610931396484, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 2701.2806646124445, |
|
"learning_rate": 7.321428407507879e-09, |
|
"logits/chosen": 5281.224609375, |
|
"logits/rejected": 4749.42236328125, |
|
"logps/chosen": -232.33102416992188, |
|
"logps/rejected": -226.73635864257812, |
|
"loss": 433.525, |
|
"rewards/accuracies": 0.46666663885116577, |
|
"rewards/chosen": 43.77975082397461, |
|
"rewards/margins": -7.4785027503967285, |
|
"rewards/rejected": 51.25825881958008, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.936120105975861, |
|
"grad_norm": 2723.54992738993, |
|
"learning_rate": 6.137978746226846e-09, |
|
"logits/chosen": 5657.05322265625, |
|
"logits/rejected": 4991.169921875, |
|
"logps/chosen": -242.9114227294922, |
|
"logps/rejected": -208.00692749023438, |
|
"loss": 448.1441, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 47.34257888793945, |
|
"rewards/margins": 0.23659400641918182, |
|
"rewards/rejected": 47.105987548828125, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9420076538121872, |
|
"grad_norm": 2634.7142241290358, |
|
"learning_rate": 5.057610864462353e-09, |
|
"logits/chosen": 5467.34619140625, |
|
"logits/rejected": 4431.1748046875, |
|
"logps/chosen": -226.5965576171875, |
|
"logps/rejected": -206.7377166748047, |
|
"loss": 449.6918, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 53.428863525390625, |
|
"rewards/margins": 8.42198371887207, |
|
"rewards/rejected": 45.00688552856445, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9478952016485134, |
|
"grad_norm": 3525.9973883937264, |
|
"learning_rate": 4.080781439491199e-09, |
|
"logits/chosen": 5627.1240234375, |
|
"logits/rejected": 5125.11328125, |
|
"logps/chosen": -238.8662109375, |
|
"logps/rejected": -252.44711303710938, |
|
"loss": 494.8671, |
|
"rewards/accuracies": 0.4833333492279053, |
|
"rewards/chosen": 46.18857955932617, |
|
"rewards/margins": -9.739079475402832, |
|
"rewards/rejected": 55.92766189575195, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9537827494848395, |
|
"grad_norm": 3049.8501688183246, |
|
"learning_rate": 3.207903382331262e-09, |
|
"logits/chosen": 5722.05078125, |
|
"logits/rejected": 4703.1142578125, |
|
"logps/chosen": -247.2072296142578, |
|
"logps/rejected": -230.0430450439453, |
|
"loss": 493.1144, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 53.449073791503906, |
|
"rewards/margins": 6.262479305267334, |
|
"rewards/rejected": 47.186588287353516, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9596702973211657, |
|
"grad_norm": 2622.2573449101515, |
|
"learning_rate": 2.4393456632016972e-09, |
|
"logits/chosen": 5326.9599609375, |
|
"logits/rejected": 4888.16845703125, |
|
"logps/chosen": -240.6510009765625, |
|
"logps/rejected": -242.0840301513672, |
|
"loss": 461.7448, |
|
"rewards/accuracies": 0.5666667222976685, |
|
"rewards/chosen": 45.62113571166992, |
|
"rewards/margins": -0.24436044692993164, |
|
"rewards/rejected": 45.86549758911133, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9655578451574919, |
|
"grad_norm": 2788.090741451844, |
|
"learning_rate": 1.7754331555573653e-09, |
|
"logits/chosen": 5645.78515625, |
|
"logits/rejected": 4626.64208984375, |
|
"logps/chosen": -252.6411895751953, |
|
"logps/rejected": -214.1852264404297, |
|
"loss": 458.1211, |
|
"rewards/accuracies": 0.5500000715255737, |
|
"rewards/chosen": 48.367576599121094, |
|
"rewards/margins": 1.717292070388794, |
|
"rewards/rejected": 46.65028762817383, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.971445392993818, |
|
"grad_norm": 2566.0122939348357, |
|
"learning_rate": 1.216446498763013e-09, |
|
"logits/chosen": 4668.9580078125, |
|
"logits/rejected": 3859.90478515625, |
|
"logps/chosen": -201.18142700195312, |
|
"logps/rejected": -183.87576293945312, |
|
"loss": 451.7181, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": 44.933570861816406, |
|
"rewards/margins": 4.844240665435791, |
|
"rewards/rejected": 40.089332580566406, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9773329408301442, |
|
"grad_norm": 2484.1333346476968, |
|
"learning_rate": 7.626219794655553e-10, |
|
"logits/chosen": 5885.1611328125, |
|
"logits/rejected": 4983.9033203125, |
|
"logps/chosen": -217.813720703125, |
|
"logps/rejected": -211.51522827148438, |
|
"loss": 455.2822, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 47.337215423583984, |
|
"rewards/margins": 4.239817142486572, |
|
"rewards/rejected": 43.09739303588867, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9832204886664704, |
|
"grad_norm": 2771.044324224387, |
|
"learning_rate": 4.1415143171436017e-10, |
|
"logits/chosen": 5878.18359375, |
|
"logits/rejected": 5294.08154296875, |
|
"logps/chosen": -278.19500732421875, |
|
"logps/rejected": -239.20901489257812, |
|
"loss": 478.5493, |
|
"rewards/accuracies": 0.5500000715255737, |
|
"rewards/chosen": 50.47035598754883, |
|
"rewards/margins": 2.3384971618652344, |
|
"rewards/rejected": 48.13185501098633, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9891080365027966, |
|
"grad_norm": 2385.4573040406094, |
|
"learning_rate": 1.7118215587214047e-10, |
|
"logits/chosen": 5687.02392578125, |
|
"logits/rejected": 4925.9482421875, |
|
"logps/chosen": -242.67739868164062, |
|
"logps/rejected": -220.5124969482422, |
|
"loss": 452.0058, |
|
"rewards/accuracies": 0.533333420753479, |
|
"rewards/chosen": 46.42241668701172, |
|
"rewards/margins": -4.088390350341797, |
|
"rewards/rejected": 50.51081085205078, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9949955843391227, |
|
"grad_norm": 3470.232158570286, |
|
"learning_rate": 3.3816856350177284e-11, |
|
"logits/chosen": 5894.1396484375, |
|
"logits/rejected": 4498.638671875, |
|
"logps/chosen": -261.60467529296875, |
|
"logps/rejected": -218.1473388671875, |
|
"loss": 473.7954, |
|
"rewards/accuracies": 0.5750000476837158, |
|
"rewards/chosen": 50.67768478393555, |
|
"rewards/margins": 9.646495819091797, |
|
"rewards/rejected": 41.03118896484375, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9997056226081837, |
|
"step": 1698, |
|
"total_flos": 0.0, |
|
"train_loss": 463.5738731716772, |
|
"train_runtime": 22506.5567, |
|
"train_samples_per_second": 2.716, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1698, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|