|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.617801047120419e-09, |
|
"logits/chosen": -1.1073582172393799, |
|
"logits/rejected": -1.147336483001709, |
|
"logps/chosen": -555.1083374023438, |
|
"logps/rejected": -319.5801086425781, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 3.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6178010471204188e-08, |
|
"logits/chosen": -1.2926875352859497, |
|
"logits/rejected": -1.3704100847244263, |
|
"logps/chosen": -381.8235168457031, |
|
"logps/rejected": -391.3815002441406, |
|
"loss": 0.6938, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.011978763155639172, |
|
"rewards/margins": 0.01244138553738594, |
|
"rewards/rejected": -0.00046262191608548164, |
|
"step": 10, |
|
"use_label": 23.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": -1.3483327627182007, |
|
"logits/rejected": -1.3216352462768555, |
|
"logps/chosen": -363.7161560058594, |
|
"logps/rejected": -315.9764709472656, |
|
"loss": 0.6975, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.008383020758628845, |
|
"rewards/margins": -0.02416672185063362, |
|
"rewards/rejected": 0.015783701092004776, |
|
"step": 20, |
|
"use_label": 61.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.853403141361257e-08, |
|
"logits/chosen": -1.3223068714141846, |
|
"logits/rejected": -1.3222100734710693, |
|
"logps/chosen": -280.5625915527344, |
|
"logps/rejected": -258.6246643066406, |
|
"loss": 0.6945, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.035853710025548935, |
|
"rewards/margins": 0.010982252657413483, |
|
"rewards/rejected": 0.024871457368135452, |
|
"step": 30, |
|
"use_label": 101.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": -1.235068678855896, |
|
"logits/rejected": -1.1716922521591187, |
|
"logps/chosen": -295.0016174316406, |
|
"logps/rejected": -334.9220275878906, |
|
"loss": 0.6876, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.05318360775709152, |
|
"rewards/margins": 0.013463905081152916, |
|
"rewards/rejected": 0.03971971571445465, |
|
"step": 40, |
|
"use_label": 141.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3089005235602092e-07, |
|
"logits/chosen": -1.2642734050750732, |
|
"logits/rejected": -1.1366677284240723, |
|
"logps/chosen": -365.61114501953125, |
|
"logps/rejected": -289.4996032714844, |
|
"loss": 0.6816, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.07207981497049332, |
|
"rewards/margins": 0.05135401338338852, |
|
"rewards/rejected": 0.020725790411233902, |
|
"step": 50, |
|
"use_label": 181.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": -1.2177165746688843, |
|
"logits/rejected": -1.230103611946106, |
|
"logps/chosen": -320.33563232421875, |
|
"logps/rejected": -297.27508544921875, |
|
"loss": 0.6684, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.10490015894174576, |
|
"rewards/margins": 0.06382963806390762, |
|
"rewards/rejected": 0.041070520877838135, |
|
"step": 60, |
|
"use_label": 221.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8324607329842932e-07, |
|
"logits/chosen": -1.3265694379806519, |
|
"logits/rejected": -1.228366732597351, |
|
"logps/chosen": -353.4063720703125, |
|
"logps/rejected": -329.4417724609375, |
|
"loss": 0.6629, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.29588761925697327, |
|
"rewards/margins": 0.1510847806930542, |
|
"rewards/rejected": 0.14480280876159668, |
|
"step": 70, |
|
"use_label": 261.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": -1.1528517007827759, |
|
"logits/rejected": -1.1881415843963623, |
|
"logps/chosen": -337.8947448730469, |
|
"logps/rejected": -304.3246765136719, |
|
"loss": 0.6595, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.24385130405426025, |
|
"rewards/margins": 0.10451836884021759, |
|
"rewards/rejected": 0.13933296501636505, |
|
"step": 80, |
|
"use_label": 301.0 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.356020942408377e-07, |
|
"logits/chosen": -1.2590320110321045, |
|
"logits/rejected": -1.3250510692596436, |
|
"logps/chosen": -300.1564025878906, |
|
"logps/rejected": -253.16357421875, |
|
"loss": 0.6333, |
|
"pred_label": 0.4000000059604645, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.25413432717323303, |
|
"rewards/margins": 0.1232389584183693, |
|
"rewards/rejected": 0.13089534640312195, |
|
"step": 90, |
|
"use_label": 340.6000061035156 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": -1.2287836074829102, |
|
"logits/rejected": -1.2516543865203857, |
|
"logps/chosen": -284.37042236328125, |
|
"logps/rejected": -261.1955261230469, |
|
"loss": 0.6389, |
|
"pred_label": 1.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3382905125617981, |
|
"rewards/margins": 0.1669512689113617, |
|
"rewards/rejected": 0.17133919894695282, |
|
"step": 100, |
|
"use_label": 380.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.879581151832461e-07, |
|
"logits/chosen": -1.3341495990753174, |
|
"logits/rejected": -1.3016605377197266, |
|
"logps/chosen": -292.0628662109375, |
|
"logps/rejected": -291.2762756347656, |
|
"loss": 0.6436, |
|
"pred_label": 1.0, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.34865301847457886, |
|
"rewards/margins": 0.18116387724876404, |
|
"rewards/rejected": 0.16748914122581482, |
|
"step": 110, |
|
"use_label": 420.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": -1.2554535865783691, |
|
"logits/rejected": -1.2652862071990967, |
|
"logps/chosen": -344.93731689453125, |
|
"logps/rejected": -276.16986083984375, |
|
"loss": 0.6143, |
|
"pred_label": 1.2000000476837158, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5013664364814758, |
|
"rewards/margins": 0.28251418471336365, |
|
"rewards/rejected": 0.21885231137275696, |
|
"step": 120, |
|
"use_label": 459.79998779296875 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4031413612565446e-07, |
|
"logits/chosen": -1.3496097326278687, |
|
"logits/rejected": -1.2879693508148193, |
|
"logps/chosen": -386.6773376464844, |
|
"logps/rejected": -273.1239013671875, |
|
"loss": 0.5905, |
|
"pred_label": 2.75, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6433510184288025, |
|
"rewards/margins": 0.37278619408607483, |
|
"rewards/rejected": 0.27056482434272766, |
|
"step": 130, |
|
"use_label": 498.25 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": -1.3663859367370605, |
|
"logits/rejected": -1.3222754001617432, |
|
"logps/chosen": -272.4532775878906, |
|
"logps/rejected": -275.90032958984375, |
|
"loss": 0.5982, |
|
"pred_label": 4.849999904632568, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5143588185310364, |
|
"rewards/margins": 0.23399925231933594, |
|
"rewards/rejected": 0.28035953640937805, |
|
"step": 140, |
|
"use_label": 536.1500244140625 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.926701570680628e-07, |
|
"logits/chosen": -1.3226642608642578, |
|
"logits/rejected": -1.2861449718475342, |
|
"logps/chosen": -351.81878662109375, |
|
"logps/rejected": -261.70208740234375, |
|
"loss": 0.5397, |
|
"pred_label": 10.800000190734863, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6599913835525513, |
|
"rewards/margins": 0.4658470153808594, |
|
"rewards/rejected": 0.1941443383693695, |
|
"step": 150, |
|
"use_label": 570.2000122070312 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": -1.1659371852874756, |
|
"logits/rejected": -1.1296656131744385, |
|
"logps/chosen": -412.0987243652344, |
|
"logps/rejected": -323.6183776855469, |
|
"loss": 0.5415, |
|
"pred_label": 16.200000762939453, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8539795875549316, |
|
"rewards/margins": 0.553295373916626, |
|
"rewards/rejected": 0.3006841838359833, |
|
"step": 160, |
|
"use_label": 604.7999877929688 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.450261780104712e-07, |
|
"logits/chosen": -1.2072548866271973, |
|
"logits/rejected": -1.2403260469436646, |
|
"logps/chosen": -385.46820068359375, |
|
"logps/rejected": -320.39019775390625, |
|
"loss": 0.5418, |
|
"pred_label": 25.149999618530273, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9536998867988586, |
|
"rewards/margins": 0.5978282690048218, |
|
"rewards/rejected": 0.35587143898010254, |
|
"step": 170, |
|
"use_label": 635.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": -1.2297383546829224, |
|
"logits/rejected": -1.258941888809204, |
|
"logps/chosen": -341.0736999511719, |
|
"logps/rejected": -295.0050964355469, |
|
"loss": 0.5171, |
|
"pred_label": 38.04999923706055, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.2062633037567139, |
|
"rewards/margins": 0.8106303215026855, |
|
"rewards/rejected": 0.3956330120563507, |
|
"step": 180, |
|
"use_label": 662.9500122070312 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.973821989528796e-07, |
|
"logits/chosen": -1.339400053024292, |
|
"logits/rejected": -1.315095067024231, |
|
"logps/chosen": -286.23583984375, |
|
"logps/rejected": -274.06976318359375, |
|
"loss": 0.5214, |
|
"pred_label": 47.400001525878906, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.115768313407898, |
|
"rewards/margins": 0.642764687538147, |
|
"rewards/rejected": 0.4730035662651062, |
|
"step": 190, |
|
"use_label": 693.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.973821989528796e-07, |
|
"logits/chosen": -1.2894158363342285, |
|
"logits/rejected": -1.3884027004241943, |
|
"logps/chosen": -376.9474182128906, |
|
"logps/rejected": -412.8877868652344, |
|
"loss": 0.537, |
|
"pred_label": 55.599998474121094, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.0884490013122559, |
|
"rewards/margins": 0.6527363061904907, |
|
"rewards/rejected": 0.4357127249240875, |
|
"step": 200, |
|
"use_label": 725.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.944735311227458e-07, |
|
"logits/chosen": -1.2643864154815674, |
|
"logits/rejected": -1.2973284721374512, |
|
"logps/chosen": -277.7148132324219, |
|
"logps/rejected": -278.7284240722656, |
|
"loss": 0.5029, |
|
"pred_label": 67.3499984741211, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.057620644569397, |
|
"rewards/margins": 0.7939804792404175, |
|
"rewards/rejected": 0.26364022493362427, |
|
"step": 210, |
|
"use_label": 753.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.91564863292612e-07, |
|
"logits/chosen": -1.2208797931671143, |
|
"logits/rejected": -1.216244101524353, |
|
"logps/chosen": -309.74603271484375, |
|
"logps/rejected": -312.56842041015625, |
|
"loss": 0.4718, |
|
"pred_label": 77.94999694824219, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.0285298824310303, |
|
"rewards/margins": 0.5832255482673645, |
|
"rewards/rejected": 0.4453042447566986, |
|
"step": 220, |
|
"use_label": 783.0499877929688 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.886561954624782e-07, |
|
"logits/chosen": -1.2997448444366455, |
|
"logits/rejected": -1.270655870437622, |
|
"logps/chosen": -440.33111572265625, |
|
"logps/rejected": -398.54632568359375, |
|
"loss": 0.4994, |
|
"pred_label": 90.4000015258789, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.3546545505523682, |
|
"rewards/margins": 1.087874174118042, |
|
"rewards/rejected": 0.2667803168296814, |
|
"step": 230, |
|
"use_label": 810.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.857475276323444e-07, |
|
"logits/chosen": -1.338390588760376, |
|
"logits/rejected": -1.2634680271148682, |
|
"logps/chosen": -327.8422546386719, |
|
"logps/rejected": -232.891845703125, |
|
"loss": 0.4924, |
|
"pred_label": 103.1500015258789, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.0152827501296997, |
|
"rewards/margins": 1.013619303703308, |
|
"rewards/rejected": 0.0016636044019833207, |
|
"step": 240, |
|
"use_label": 837.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.828388598022106e-07, |
|
"logits/chosen": -1.3745036125183105, |
|
"logits/rejected": -1.362729787826538, |
|
"logps/chosen": -346.9915771484375, |
|
"logps/rejected": -278.84124755859375, |
|
"loss": 0.4861, |
|
"pred_label": 118.25, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.2634034156799316, |
|
"rewards/margins": 0.8081910014152527, |
|
"rewards/rejected": 0.4552123546600342, |
|
"step": 250, |
|
"use_label": 862.75 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.799301919720768e-07, |
|
"logits/chosen": -1.2410266399383545, |
|
"logits/rejected": -1.1942863464355469, |
|
"logps/chosen": -369.42156982421875, |
|
"logps/rejected": -283.9178771972656, |
|
"loss": 0.4563, |
|
"pred_label": 129.89999389648438, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.2563164234161377, |
|
"rewards/margins": 0.9190329313278198, |
|
"rewards/rejected": 0.3372834324836731, |
|
"step": 260, |
|
"use_label": 891.0999755859375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.77021524141943e-07, |
|
"logits/chosen": -1.3895528316497803, |
|
"logits/rejected": -1.3751407861709595, |
|
"logps/chosen": -268.8004455566406, |
|
"logps/rejected": -284.19122314453125, |
|
"loss": 0.4572, |
|
"pred_label": 143.9499969482422, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.2117127180099487, |
|
"rewards/margins": 0.6257116794586182, |
|
"rewards/rejected": 0.5860010385513306, |
|
"step": 270, |
|
"use_label": 917.0499877929688 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7411285631180917e-07, |
|
"logits/chosen": -1.3496041297912598, |
|
"logits/rejected": -1.374954104423523, |
|
"logps/chosen": -324.27777099609375, |
|
"logps/rejected": -279.5069580078125, |
|
"loss": 0.4693, |
|
"pred_label": 153.35000610351562, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 1.1455862522125244, |
|
"rewards/margins": 0.4547230303287506, |
|
"rewards/rejected": 0.6908631920814514, |
|
"step": 280, |
|
"use_label": 947.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": -1.3121898174285889, |
|
"logits/rejected": -1.33925199508667, |
|
"logps/chosen": -279.45233154296875, |
|
"logps/rejected": -333.3996276855469, |
|
"loss": 0.4481, |
|
"pred_label": 166.14999389648438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7546733617782593, |
|
"rewards/margins": 0.7626081705093384, |
|
"rewards/rejected": -0.007934845983982086, |
|
"step": 290, |
|
"use_label": 974.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.682955206515416e-07, |
|
"logits/chosen": -1.2102586030960083, |
|
"logits/rejected": -1.2808181047439575, |
|
"logps/chosen": -326.1608581542969, |
|
"logps/rejected": -304.8093566894531, |
|
"loss": 0.4718, |
|
"pred_label": 179.10000610351562, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6026958227157593, |
|
"rewards/margins": 0.38519367575645447, |
|
"rewards/rejected": 0.217502161860466, |
|
"step": 300, |
|
"use_label": 1001.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.653868528214078e-07, |
|
"logits/chosen": -1.1727502346038818, |
|
"logits/rejected": -1.2067267894744873, |
|
"logps/chosen": -349.1585693359375, |
|
"logps/rejected": -368.10772705078125, |
|
"loss": 0.4232, |
|
"pred_label": 194.14999389648438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.496382474899292, |
|
"rewards/margins": 1.2464112043380737, |
|
"rewards/rejected": 0.24997124075889587, |
|
"step": 310, |
|
"use_label": 1026.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.62478184991274e-07, |
|
"logits/chosen": -1.2799582481384277, |
|
"logits/rejected": -1.3597850799560547, |
|
"logps/chosen": -314.4355773925781, |
|
"logps/rejected": -300.247802734375, |
|
"loss": 0.421, |
|
"pred_label": 210.4499969482422, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.3173748254776, |
|
"rewards/margins": 1.0775160789489746, |
|
"rewards/rejected": 0.2398585081100464, |
|
"step": 320, |
|
"use_label": 1050.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5956951716114016e-07, |
|
"logits/chosen": -1.229280948638916, |
|
"logits/rejected": -1.2974430322647095, |
|
"logps/chosen": -345.85198974609375, |
|
"logps/rejected": -309.1730651855469, |
|
"loss": 0.4067, |
|
"pred_label": 227.8000030517578, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.1288254261016846, |
|
"rewards/margins": 0.8339816331863403, |
|
"rewards/rejected": 0.2948438823223114, |
|
"step": 330, |
|
"use_label": 1073.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.566608493310064e-07, |
|
"logits/chosen": -1.3431947231292725, |
|
"logits/rejected": -1.3393170833587646, |
|
"logps/chosen": -356.05810546875, |
|
"logps/rejected": -269.2801818847656, |
|
"loss": 0.3981, |
|
"pred_label": 247.6999969482422, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.18732750415802, |
|
"rewards/margins": 1.1738572120666504, |
|
"rewards/rejected": 0.01347041130065918, |
|
"step": 340, |
|
"use_label": 1093.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5375218150087257e-07, |
|
"logits/chosen": -1.2831729650497437, |
|
"logits/rejected": -1.2885137796401978, |
|
"logps/chosen": -262.3739013671875, |
|
"logps/rejected": -313.76385498046875, |
|
"loss": 0.404, |
|
"pred_label": 265.5, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.1653120517730713, |
|
"rewards/margins": 0.48939722776412964, |
|
"rewards/rejected": 0.6759146451950073, |
|
"step": 350, |
|
"use_label": 1115.5 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5084351367073883e-07, |
|
"logits/chosen": -1.352308988571167, |
|
"logits/rejected": -1.3754525184631348, |
|
"logps/chosen": -239.6806182861328, |
|
"logps/rejected": -248.5498046875, |
|
"loss": 0.3967, |
|
"pred_label": 285.3999938964844, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.0487511157989502, |
|
"rewards/margins": 1.0042428970336914, |
|
"rewards/rejected": 0.044508084654808044, |
|
"step": 360, |
|
"use_label": 1135.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.47934845840605e-07, |
|
"logits/chosen": -1.3563268184661865, |
|
"logits/rejected": -1.3786267042160034, |
|
"logps/chosen": -308.45672607421875, |
|
"logps/rejected": -270.8168029785156, |
|
"loss": 0.3685, |
|
"pred_label": 303.54998779296875, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.2717820405960083, |
|
"rewards/margins": 1.1417100429534912, |
|
"rewards/rejected": 0.13007190823554993, |
|
"step": 370, |
|
"use_label": 1157.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.450261780104712e-07, |
|
"logits/chosen": -1.325315237045288, |
|
"logits/rejected": -1.279359221458435, |
|
"logps/chosen": -325.87542724609375, |
|
"logps/rejected": -285.46929931640625, |
|
"loss": 0.3768, |
|
"pred_label": 326.6000061035156, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.9515581130981445, |
|
"rewards/margins": 1.806125283241272, |
|
"rewards/rejected": 0.1454329490661621, |
|
"step": 380, |
|
"use_label": 1174.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.421175101803374e-07, |
|
"logits/chosen": -1.3155038356781006, |
|
"logits/rejected": -1.3505948781967163, |
|
"logps/chosen": -299.5450134277344, |
|
"logps/rejected": -313.3736267089844, |
|
"loss": 0.3309, |
|
"pred_label": 345.95001220703125, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.0010504722595215, |
|
"rewards/margins": 1.6885391473770142, |
|
"rewards/rejected": 0.3125116229057312, |
|
"step": 390, |
|
"use_label": 1195.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.392088423502036e-07, |
|
"logits/chosen": -1.305530309677124, |
|
"logits/rejected": -1.3502506017684937, |
|
"logps/chosen": -300.59857177734375, |
|
"logps/rejected": -307.1338806152344, |
|
"loss": 0.3581, |
|
"pred_label": 362.79998779296875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.3619585037231445, |
|
"rewards/margins": 0.779948353767395, |
|
"rewards/rejected": 0.5820102095603943, |
|
"step": 400, |
|
"use_label": 1218.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3630017452006976e-07, |
|
"logits/chosen": -1.2975621223449707, |
|
"logits/rejected": -1.3306124210357666, |
|
"logps/chosen": -328.60797119140625, |
|
"logps/rejected": -269.7942810058594, |
|
"loss": 0.3682, |
|
"pred_label": 382.3999938964844, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.766815185546875, |
|
"rewards/margins": 1.294459342956543, |
|
"rewards/rejected": 0.47235584259033203, |
|
"step": 410, |
|
"use_label": 1238.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.33391506689936e-07, |
|
"logits/chosen": -1.3032572269439697, |
|
"logits/rejected": -1.2937753200531006, |
|
"logps/chosen": -330.87353515625, |
|
"logps/rejected": -301.7434997558594, |
|
"loss": 0.338, |
|
"pred_label": 403.20001220703125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.4692720174789429, |
|
"rewards/margins": 1.3137176036834717, |
|
"rewards/rejected": 0.15555439889431, |
|
"step": 420, |
|
"use_label": 1257.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.304828388598022e-07, |
|
"logits/chosen": -1.1606519222259521, |
|
"logits/rejected": -1.1426994800567627, |
|
"logps/chosen": -382.469482421875, |
|
"logps/rejected": -351.1206970214844, |
|
"loss": 0.312, |
|
"pred_label": 425.20001220703125, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.9217780828475952, |
|
"rewards/margins": 2.1217658519744873, |
|
"rewards/rejected": -0.19998787343502045, |
|
"step": 430, |
|
"use_label": 1275.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2757417102966844e-07, |
|
"logits/chosen": -1.2695534229278564, |
|
"logits/rejected": -1.2679811716079712, |
|
"logps/chosen": -311.1631164550781, |
|
"logps/rejected": -303.9151611328125, |
|
"loss": 0.3555, |
|
"pred_label": 445.04998779296875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 1.351044774055481, |
|
"rewards/margins": 0.4551563262939453, |
|
"rewards/rejected": 0.89588862657547, |
|
"step": 440, |
|
"use_label": 1295.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.246655031995346e-07, |
|
"logits/chosen": -1.198559284210205, |
|
"logits/rejected": -1.194226861000061, |
|
"logps/chosen": -286.85296630859375, |
|
"logps/rejected": -267.08056640625, |
|
"loss": 0.318, |
|
"pred_label": 467.0, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.3747012615203857, |
|
"rewards/margins": 2.1655492782592773, |
|
"rewards/rejected": -0.7908478379249573, |
|
"step": 450, |
|
"use_label": 1314.0 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.217568353694008e-07, |
|
"logits/chosen": -1.3408215045928955, |
|
"logits/rejected": -1.3759543895721436, |
|
"logps/chosen": -340.38336181640625, |
|
"logps/rejected": -321.1266174316406, |
|
"loss": 0.3117, |
|
"pred_label": 491.45001220703125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.2093924283981323, |
|
"rewards/margins": 1.9316478967666626, |
|
"rewards/rejected": -0.7222554683685303, |
|
"step": 460, |
|
"use_label": 1329.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": -1.3295974731445312, |
|
"logits/rejected": -1.3128505945205688, |
|
"logps/chosen": -242.5327606201172, |
|
"logps/rejected": -224.53195190429688, |
|
"loss": 0.3026, |
|
"pred_label": 512.9000244140625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9095451235771179, |
|
"rewards/margins": 1.644636869430542, |
|
"rewards/rejected": -0.7350918650627136, |
|
"step": 470, |
|
"use_label": 1348.0999755859375 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.159394997091332e-07, |
|
"logits/chosen": -1.3231532573699951, |
|
"logits/rejected": -1.2729825973510742, |
|
"logps/chosen": -264.4250183105469, |
|
"logps/rejected": -219.93978881835938, |
|
"loss": 0.3323, |
|
"pred_label": 536.0, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7578233480453491, |
|
"rewards/margins": 1.116870403289795, |
|
"rewards/rejected": -0.35904714465141296, |
|
"step": 480, |
|
"use_label": 1365.0 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1303083187899937e-07, |
|
"logits/chosen": -1.126888394355774, |
|
"logits/rejected": -1.1470086574554443, |
|
"logps/chosen": -373.8638916015625, |
|
"logps/rejected": -374.7835998535156, |
|
"loss": 0.347, |
|
"pred_label": 555.5999755859375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9710286855697632, |
|
"rewards/margins": 1.8147733211517334, |
|
"rewards/rejected": -0.8437445759773254, |
|
"step": 490, |
|
"use_label": 1385.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1012216404886563e-07, |
|
"logits/chosen": -1.2007501125335693, |
|
"logits/rejected": -1.1932474374771118, |
|
"logps/chosen": -347.7434387207031, |
|
"logps/rejected": -291.282958984375, |
|
"loss": 0.3333, |
|
"pred_label": 579.5999755859375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1730000972747803, |
|
"rewards/margins": 1.3524248600006104, |
|
"rewards/rejected": -0.17942459881305695, |
|
"step": 500, |
|
"use_label": 1401.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.072134962187318e-07, |
|
"logits/chosen": -1.3202276229858398, |
|
"logits/rejected": -1.3420360088348389, |
|
"logps/chosen": -267.11639404296875, |
|
"logps/rejected": -283.467041015625, |
|
"loss": 0.3203, |
|
"pred_label": 603.7000122070312, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.058019995689392, |
|
"rewards/margins": 2.189736843109131, |
|
"rewards/rejected": -1.1317169666290283, |
|
"step": 510, |
|
"use_label": 1417.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0430482838859805e-07, |
|
"logits/chosen": -1.1408579349517822, |
|
"logits/rejected": -1.2456986904144287, |
|
"logps/chosen": -342.52593994140625, |
|
"logps/rejected": -286.4843444824219, |
|
"loss": 0.3375, |
|
"pred_label": 627.4000244140625, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5285916328430176, |
|
"rewards/margins": 1.3080298900604248, |
|
"rewards/rejected": -0.7794381380081177, |
|
"step": 520, |
|
"use_label": 1433.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.013961605584642e-07, |
|
"logits/chosen": -1.2992430925369263, |
|
"logits/rejected": -1.3081134557724, |
|
"logps/chosen": -313.76641845703125, |
|
"logps/rejected": -381.96466064453125, |
|
"loss": 0.2815, |
|
"pred_label": 652.2000122070312, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.136601448059082, |
|
"rewards/margins": 2.0457780361175537, |
|
"rewards/rejected": -0.9091763496398926, |
|
"step": 530, |
|
"use_label": 1448.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.984874927283304e-07, |
|
"logits/chosen": -1.278599739074707, |
|
"logits/rejected": -1.319656491279602, |
|
"logps/chosen": -306.2652893066406, |
|
"logps/rejected": -265.16363525390625, |
|
"loss": 0.3163, |
|
"pred_label": 676.4000244140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.4270758628845215, |
|
"rewards/margins": 2.458122730255127, |
|
"rewards/rejected": -0.031046796590089798, |
|
"step": 540, |
|
"use_label": 1464.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.955788248981966e-07, |
|
"logits/chosen": -1.2827562093734741, |
|
"logits/rejected": -1.1873165369033813, |
|
"logps/chosen": -303.70391845703125, |
|
"logps/rejected": -265.5109558105469, |
|
"loss": 0.3474, |
|
"pred_label": 698.1500244140625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.532619595527649, |
|
"rewards/margins": 2.0600717067718506, |
|
"rewards/rejected": -0.5274521708488464, |
|
"step": 550, |
|
"use_label": 1482.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.926701570680628e-07, |
|
"logits/chosen": -1.3157541751861572, |
|
"logits/rejected": -1.250253677368164, |
|
"logps/chosen": -301.3360595703125, |
|
"logps/rejected": -319.97308349609375, |
|
"loss": 0.3382, |
|
"pred_label": 726.5, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 2.103365182876587, |
|
"rewards/margins": 1.9628784656524658, |
|
"rewards/rejected": 0.14048661291599274, |
|
"step": 560, |
|
"use_label": 1494.5 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8976148923792903e-07, |
|
"logits/chosen": -1.376778483390808, |
|
"logits/rejected": -1.3120372295379639, |
|
"logps/chosen": -333.80181884765625, |
|
"logps/rejected": -280.1228332519531, |
|
"loss": 0.3608, |
|
"pred_label": 747.1500244140625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.266614317893982, |
|
"rewards/margins": 1.6077734231948853, |
|
"rewards/rejected": -0.34115907549858093, |
|
"step": 570, |
|
"use_label": 1513.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8685282140779524e-07, |
|
"logits/chosen": -1.3779680728912354, |
|
"logits/rejected": -1.3599121570587158, |
|
"logps/chosen": -310.0410461425781, |
|
"logps/rejected": -264.19580078125, |
|
"loss": 0.2868, |
|
"pred_label": 763.1500244140625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.130375385284424, |
|
"rewards/margins": 2.0123019218444824, |
|
"rewards/rejected": 0.11807334423065186, |
|
"step": 580, |
|
"use_label": 1537.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.839441535776614e-07, |
|
"logits/chosen": -1.3360798358917236, |
|
"logits/rejected": -1.3371288776397705, |
|
"logps/chosen": -339.41485595703125, |
|
"logps/rejected": -361.7887268066406, |
|
"loss": 0.3098, |
|
"pred_label": 787.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.6782824993133545, |
|
"rewards/margins": 1.6973320245742798, |
|
"rewards/rejected": -0.01904923841357231, |
|
"step": 590, |
|
"use_label": 1554.0 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8103548574752766e-07, |
|
"logits/chosen": -1.2587051391601562, |
|
"logits/rejected": -1.1522762775421143, |
|
"logps/chosen": -296.5978698730469, |
|
"logps/rejected": -276.82574462890625, |
|
"loss": 0.2843, |
|
"pred_label": 815.4000244140625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.5922435522079468, |
|
"rewards/margins": 1.7545421123504639, |
|
"rewards/rejected": -0.16229870915412903, |
|
"step": 600, |
|
"use_label": 1565.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.781268179173938e-07, |
|
"logits/chosen": -1.2215590476989746, |
|
"logits/rejected": -1.2082473039627075, |
|
"logps/chosen": -304.05950927734375, |
|
"logps/rejected": -402.14996337890625, |
|
"loss": 0.2557, |
|
"pred_label": 842.2999877929688, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.8012117147445679, |
|
"rewards/margins": 2.9960241317749023, |
|
"rewards/rejected": -2.194812774658203, |
|
"step": 610, |
|
"use_label": 1578.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7521815008726e-07, |
|
"logits/chosen": -1.247642993927002, |
|
"logits/rejected": -1.2035638093948364, |
|
"logps/chosen": -315.6612854003906, |
|
"logps/rejected": -302.01751708984375, |
|
"loss": 0.2351, |
|
"pred_label": 869.2999877929688, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.4792921543121338, |
|
"rewards/margins": 2.9789254665374756, |
|
"rewards/rejected": -1.4996334314346313, |
|
"step": 620, |
|
"use_label": 1591.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.723094822571262e-07, |
|
"logits/chosen": -1.1842483282089233, |
|
"logits/rejected": -1.247299313545227, |
|
"logps/chosen": -277.7129211425781, |
|
"logps/rejected": -321.3715515136719, |
|
"loss": 0.2968, |
|
"pred_label": 896.5499877929688, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.45529526472091675, |
|
"rewards/margins": 1.2875747680664062, |
|
"rewards/rejected": -0.8322793841362, |
|
"step": 630, |
|
"use_label": 1604.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.6940081442699243e-07, |
|
"logits/chosen": -1.1754447221755981, |
|
"logits/rejected": -1.2381727695465088, |
|
"logps/chosen": -396.6316223144531, |
|
"logps/rejected": -338.87164306640625, |
|
"loss": 0.2743, |
|
"pred_label": 923.0499877929688, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.854255199432373, |
|
"rewards/margins": 2.8615994453430176, |
|
"rewards/rejected": -2.0073440074920654, |
|
"step": 640, |
|
"use_label": 1617.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": -1.3000792264938354, |
|
"logits/rejected": -1.337820291519165, |
|
"logps/chosen": -261.8930969238281, |
|
"logps/rejected": -257.5715026855469, |
|
"loss": 0.2734, |
|
"pred_label": 950.7000122070312, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.9624491930007935, |
|
"rewards/margins": 1.762574553489685, |
|
"rewards/rejected": -0.8001253008842468, |
|
"step": 650, |
|
"use_label": 1630.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6358347876672485e-07, |
|
"logits/chosen": -1.2691199779510498, |
|
"logits/rejected": -1.2646231651306152, |
|
"logps/chosen": -337.8616943359375, |
|
"logps/rejected": -244.37094116210938, |
|
"loss": 0.2734, |
|
"pred_label": 978.9500122070312, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.2994811534881592, |
|
"rewards/margins": 2.3611044883728027, |
|
"rewards/rejected": -1.061623215675354, |
|
"step": 660, |
|
"use_label": 1642.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.60674810936591e-07, |
|
"logits/chosen": -1.261584997177124, |
|
"logits/rejected": -1.1427786350250244, |
|
"logps/chosen": -381.8892822265625, |
|
"logps/rejected": -267.61029052734375, |
|
"loss": 0.2662, |
|
"pred_label": 1007.25, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13106364011764526, |
|
"rewards/margins": 2.517549991607666, |
|
"rewards/rejected": -2.648613452911377, |
|
"step": 670, |
|
"use_label": 1653.75 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5776614310645726e-07, |
|
"logits/chosen": -1.2966188192367554, |
|
"logits/rejected": -1.304734706878662, |
|
"logps/chosen": -329.88262939453125, |
|
"logps/rejected": -311.21649169921875, |
|
"loss": 0.278, |
|
"pred_label": 1033.8499755859375, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.22023256123065948, |
|
"rewards/margins": 0.27784207463264465, |
|
"rewards/rejected": -0.49807462096214294, |
|
"step": 680, |
|
"use_label": 1667.1500244140625 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.548574752763234e-07, |
|
"logits/chosen": -1.0903209447860718, |
|
"logits/rejected": -1.1760300397872925, |
|
"logps/chosen": -370.1248474121094, |
|
"logps/rejected": -320.8165283203125, |
|
"loss": 0.2686, |
|
"pred_label": 1061.199951171875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.1078599691390991, |
|
"rewards/margins": 2.3121399879455566, |
|
"rewards/rejected": -1.204279899597168, |
|
"step": 690, |
|
"use_label": 1679.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.519488074461897e-07, |
|
"logits/chosen": -1.2087898254394531, |
|
"logits/rejected": -1.170114278793335, |
|
"logps/chosen": -316.9244689941406, |
|
"logps/rejected": -294.49346923828125, |
|
"loss": 0.2622, |
|
"pred_label": 1086.3499755859375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.7141532897949219, |
|
"rewards/margins": 2.1581480503082275, |
|
"rewards/rejected": -0.4439946711063385, |
|
"step": 700, |
|
"use_label": 1694.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4904013961605583e-07, |
|
"logits/chosen": -1.233457326889038, |
|
"logits/rejected": -1.2359874248504639, |
|
"logps/chosen": -314.5318908691406, |
|
"logps/rejected": -388.088623046875, |
|
"loss": 0.2755, |
|
"pred_label": 1111.300048828125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.477555513381958, |
|
"rewards/margins": -0.013068770989775658, |
|
"rewards/rejected": 1.490624189376831, |
|
"step": 710, |
|
"use_label": 1709.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4613147178592204e-07, |
|
"logits/chosen": -1.2819852828979492, |
|
"logits/rejected": -1.2896205186843872, |
|
"logps/chosen": -241.99050903320312, |
|
"logps/rejected": -281.1546325683594, |
|
"loss": 0.2385, |
|
"pred_label": 1134.5, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.46859627962112427, |
|
"rewards/margins": 2.0235211849212646, |
|
"rewards/rejected": -1.5549249649047852, |
|
"step": 720, |
|
"use_label": 1726.5 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4322280395578825e-07, |
|
"logits/chosen": -1.0794142484664917, |
|
"logits/rejected": -1.152029037475586, |
|
"logps/chosen": -319.8411865234375, |
|
"logps/rejected": -360.5047302246094, |
|
"loss": 0.2396, |
|
"pred_label": 1162.1500244140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0120699405670166, |
|
"rewards/margins": 2.247300624847412, |
|
"rewards/rejected": -1.2352306842803955, |
|
"step": 730, |
|
"use_label": 1738.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4031413612565446e-07, |
|
"logits/chosen": -1.333953619003296, |
|
"logits/rejected": -1.2932109832763672, |
|
"logps/chosen": -310.48443603515625, |
|
"logps/rejected": -296.43182373046875, |
|
"loss": 0.3141, |
|
"pred_label": 1187.449951171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4783060550689697, |
|
"rewards/margins": 1.6771808862686157, |
|
"rewards/rejected": -2.155486822128296, |
|
"step": 740, |
|
"use_label": 1753.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.374054682955206e-07, |
|
"logits/chosen": -1.3608129024505615, |
|
"logits/rejected": -1.3046305179595947, |
|
"logps/chosen": -330.74310302734375, |
|
"logps/rejected": -310.60198974609375, |
|
"loss": 0.2691, |
|
"pred_label": 1212.5999755859375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5366764068603516, |
|
"rewards/margins": 1.5661550760269165, |
|
"rewards/rejected": -1.029478669166565, |
|
"step": 750, |
|
"use_label": 1768.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3449680046538687e-07, |
|
"logits/chosen": -1.2935998439788818, |
|
"logits/rejected": -1.292982578277588, |
|
"logps/chosen": -294.06005859375, |
|
"logps/rejected": -312.6604309082031, |
|
"loss": 0.2453, |
|
"pred_label": 1240.4000244140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0071651935577393, |
|
"rewards/margins": 2.852874994277954, |
|
"rewards/rejected": -1.8457095623016357, |
|
"step": 760, |
|
"use_label": 1780.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.31588132635253e-07, |
|
"logits/chosen": -1.062832236289978, |
|
"logits/rejected": -0.9800139665603638, |
|
"logps/chosen": -297.5071716308594, |
|
"logps/rejected": -353.8055725097656, |
|
"loss": 0.2174, |
|
"pred_label": 1270.25, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.8052531480789185, |
|
"rewards/margins": 4.82207727432251, |
|
"rewards/rejected": -4.016823768615723, |
|
"step": 770, |
|
"use_label": 1790.75 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.286794648051193e-07, |
|
"logits/chosen": -1.1665582656860352, |
|
"logits/rejected": -1.1822797060012817, |
|
"logps/chosen": -369.43438720703125, |
|
"logps/rejected": -359.2782897949219, |
|
"loss": 0.2728, |
|
"pred_label": 1301.5, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6651790738105774, |
|
"rewards/margins": 2.360260486602783, |
|
"rewards/rejected": -1.695081353187561, |
|
"step": 780, |
|
"use_label": 1799.5 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2577079697498544e-07, |
|
"logits/chosen": -1.3665610551834106, |
|
"logits/rejected": -1.3496174812316895, |
|
"logps/chosen": -404.8565979003906, |
|
"logps/rejected": -373.46405029296875, |
|
"loss": 0.2217, |
|
"pred_label": 1332.300048828125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08961932361125946, |
|
"rewards/margins": 3.382511854171753, |
|
"rewards/rejected": -3.4721312522888184, |
|
"step": 790, |
|
"use_label": 1808.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2286212914485165e-07, |
|
"logits/chosen": -1.3454267978668213, |
|
"logits/rejected": -1.3274281024932861, |
|
"logps/chosen": -335.8668212890625, |
|
"logps/rejected": -332.7888488769531, |
|
"loss": 0.218, |
|
"pred_label": 1360.800048828125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4208967685699463, |
|
"rewards/margins": 1.5826103687286377, |
|
"rewards/rejected": -1.1617136001586914, |
|
"step": 800, |
|
"use_label": 1820.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.1995346131471786e-07, |
|
"logits/chosen": -1.2152721881866455, |
|
"logits/rejected": -1.154066801071167, |
|
"logps/chosen": -333.5192565917969, |
|
"logps/rejected": -272.8238220214844, |
|
"loss": 0.2319, |
|
"pred_label": 1388.4000244140625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.0722098350524902, |
|
"rewards/margins": 3.2645347118377686, |
|
"rewards/rejected": -2.1923251152038574, |
|
"step": 810, |
|
"use_label": 1832.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1704479348458406e-07, |
|
"logits/chosen": -1.312711238861084, |
|
"logits/rejected": -1.3220988512039185, |
|
"logps/chosen": -301.717529296875, |
|
"logps/rejected": -309.1519470214844, |
|
"loss": 0.2123, |
|
"pred_label": 1418.0999755859375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.3842562437057495, |
|
"rewards/margins": 1.5546679496765137, |
|
"rewards/rejected": -0.1704115867614746, |
|
"step": 820, |
|
"use_label": 1842.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": -1.2439206838607788, |
|
"logits/rejected": -1.2316957712173462, |
|
"logps/chosen": -274.51019287109375, |
|
"logps/rejected": -314.82940673828125, |
|
"loss": 0.2971, |
|
"pred_label": 1444.75, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.229410171508789, |
|
"rewards/margins": 2.7308907508850098, |
|
"rewards/rejected": -1.5014803409576416, |
|
"step": 830, |
|
"use_label": 1856.25 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.112274578243165e-07, |
|
"logits/chosen": -1.277681589126587, |
|
"logits/rejected": -1.3482627868652344, |
|
"logps/chosen": -245.6984405517578, |
|
"logps/rejected": -274.3936462402344, |
|
"loss": 0.271, |
|
"pred_label": 1471.75, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.7067890167236328, |
|
"rewards/margins": 2.375427007675171, |
|
"rewards/rejected": -0.6686381101608276, |
|
"step": 840, |
|
"use_label": 1869.25 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0831878999418263e-07, |
|
"logits/chosen": -1.223837971687317, |
|
"logits/rejected": -1.2153947353363037, |
|
"logps/chosen": -329.645751953125, |
|
"logps/rejected": -368.6504211425781, |
|
"loss": 0.2432, |
|
"pred_label": 1502.050048828125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.48693960905075073, |
|
"rewards/margins": 1.9081785678863525, |
|
"rewards/rejected": -1.421238660812378, |
|
"step": 850, |
|
"use_label": 1878.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.054101221640489e-07, |
|
"logits/chosen": -1.208174705505371, |
|
"logits/rejected": -1.2833263874053955, |
|
"logps/chosen": -335.9885559082031, |
|
"logps/rejected": -310.40545654296875, |
|
"loss": 0.2323, |
|
"pred_label": 1532.0999755859375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.3224217891693115, |
|
"rewards/margins": 2.202239513397217, |
|
"rewards/rejected": -0.8798176050186157, |
|
"step": 860, |
|
"use_label": 1888.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0250145433391505e-07, |
|
"logits/chosen": -1.2315248250961304, |
|
"logits/rejected": -1.2171595096588135, |
|
"logps/chosen": -362.20660400390625, |
|
"logps/rejected": -303.99725341796875, |
|
"loss": 0.2533, |
|
"pred_label": 1559.4000244140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.4734199047088623, |
|
"rewards/margins": 2.8212103843688965, |
|
"rewards/rejected": -1.3477907180786133, |
|
"step": 870, |
|
"use_label": 1901.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9959278650378126e-07, |
|
"logits/chosen": -1.2676366567611694, |
|
"logits/rejected": -1.207371711730957, |
|
"logps/chosen": -397.70416259765625, |
|
"logps/rejected": -341.6063537597656, |
|
"loss": 0.2686, |
|
"pred_label": 1589.5999755859375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5567753911018372, |
|
"rewards/margins": 1.585971474647522, |
|
"rewards/rejected": -1.0291959047317505, |
|
"step": 880, |
|
"use_label": 1911.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9668411867364746e-07, |
|
"logits/chosen": -1.1451526880264282, |
|
"logits/rejected": -1.0805937051773071, |
|
"logps/chosen": -237.3237762451172, |
|
"logps/rejected": -232.00241088867188, |
|
"loss": 0.2663, |
|
"pred_label": 1620.300048828125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.1563435047864914, |
|
"rewards/margins": 1.990779161453247, |
|
"rewards/rejected": -1.8344357013702393, |
|
"step": 890, |
|
"use_label": 1920.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9377545084351367e-07, |
|
"logits/chosen": -1.1840555667877197, |
|
"logits/rejected": -1.1891014575958252, |
|
"logps/chosen": -219.21728515625, |
|
"logps/rejected": -220.53848266601562, |
|
"loss": 0.2378, |
|
"pred_label": 1648.9000244140625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5114763975143433, |
|
"rewards/margins": 1.1068259477615356, |
|
"rewards/rejected": -1.6183021068572998, |
|
"step": 900, |
|
"use_label": 1932.0999755859375 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.908667830133799e-07, |
|
"logits/chosen": -1.2712427377700806, |
|
"logits/rejected": -1.3315383195877075, |
|
"logps/chosen": -369.0324401855469, |
|
"logps/rejected": -281.7047424316406, |
|
"loss": 0.2503, |
|
"pred_label": 1677.4000244140625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.2587319612503052, |
|
"rewards/margins": 2.2725837230682373, |
|
"rewards/rejected": -1.0138520002365112, |
|
"step": 910, |
|
"use_label": 1943.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.879581151832461e-07, |
|
"logits/chosen": -1.237377405166626, |
|
"logits/rejected": -1.2065725326538086, |
|
"logps/chosen": -358.11248779296875, |
|
"logps/rejected": -365.1144104003906, |
|
"loss": 0.2472, |
|
"pred_label": 1706.0, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.12370361387729645, |
|
"rewards/margins": 1.9158817529678345, |
|
"rewards/rejected": -1.7921781539916992, |
|
"step": 920, |
|
"use_label": 1955.0 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8504944735311224e-07, |
|
"logits/chosen": -1.268371343612671, |
|
"logits/rejected": -1.2534607648849487, |
|
"logps/chosen": -273.5567626953125, |
|
"logps/rejected": -315.6604919433594, |
|
"loss": 0.2614, |
|
"pred_label": 1733.5999755859375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04811231419444084, |
|
"rewards/margins": 2.209930181503296, |
|
"rewards/rejected": -2.258042335510254, |
|
"step": 930, |
|
"use_label": 1967.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.821407795229785e-07, |
|
"logits/chosen": -1.164576768875122, |
|
"logits/rejected": -1.0726913213729858, |
|
"logps/chosen": -301.34490966796875, |
|
"logps/rejected": -293.6893005371094, |
|
"loss": 0.2803, |
|
"pred_label": 1760.5, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.535773515701294, |
|
"rewards/margins": 3.1747701168060303, |
|
"rewards/rejected": -2.6389966011047363, |
|
"step": 940, |
|
"use_label": 1980.5 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7923211169284466e-07, |
|
"logits/chosen": -1.2998483180999756, |
|
"logits/rejected": -1.2662798166275024, |
|
"logps/chosen": -343.672607421875, |
|
"logps/rejected": -368.7283630371094, |
|
"loss": 0.2189, |
|
"pred_label": 1787.449951171875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.820811927318573, |
|
"rewards/margins": 1.670477271080017, |
|
"rewards/rejected": -0.8496652841567993, |
|
"step": 950, |
|
"use_label": 1993.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.763234438627109e-07, |
|
"logits/chosen": -1.313528060913086, |
|
"logits/rejected": -1.2543927431106567, |
|
"logps/chosen": -303.2686462402344, |
|
"logps/rejected": -286.05535888671875, |
|
"loss": 0.256, |
|
"pred_label": 1810.1500244140625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3981182873249054, |
|
"rewards/margins": 1.8850862979888916, |
|
"rewards/rejected": -2.2832047939300537, |
|
"step": 960, |
|
"use_label": 2010.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7341477603257707e-07, |
|
"logits/chosen": -1.2742154598236084, |
|
"logits/rejected": -1.2301501035690308, |
|
"logps/chosen": -339.7518310546875, |
|
"logps/rejected": -329.7853088378906, |
|
"loss": 0.2627, |
|
"pred_label": 1836.300048828125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6415281295776367, |
|
"rewards/margins": 1.2891826629638672, |
|
"rewards/rejected": -1.930710792541504, |
|
"step": 970, |
|
"use_label": 2024.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.705061082024433e-07, |
|
"logits/chosen": -1.2222896814346313, |
|
"logits/rejected": -1.1963387727737427, |
|
"logps/chosen": -375.39959716796875, |
|
"logps/rejected": -348.4335632324219, |
|
"loss": 0.2533, |
|
"pred_label": 1864.6500244140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.4741919040679932, |
|
"rewards/margins": 3.042367458343506, |
|
"rewards/rejected": -1.5681754350662231, |
|
"step": 980, |
|
"use_label": 2036.3499755859375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.675974403723095e-07, |
|
"logits/chosen": -1.1725823879241943, |
|
"logits/rejected": -1.0960336923599243, |
|
"logps/chosen": -304.5809326171875, |
|
"logps/rejected": -284.103271484375, |
|
"loss": 0.2378, |
|
"pred_label": 1895.4000244140625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.33665040135383606, |
|
"rewards/margins": 4.249828338623047, |
|
"rewards/rejected": -3.913177967071533, |
|
"step": 990, |
|
"use_label": 2045.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.646887725421757e-07, |
|
"logits/chosen": -1.1304484605789185, |
|
"logits/rejected": -1.125525712966919, |
|
"logps/chosen": -365.712158203125, |
|
"logps/rejected": -345.81195068359375, |
|
"loss": 0.2517, |
|
"pred_label": 1928.199951171875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.9080309867858887, |
|
"rewards/margins": 3.343575954437256, |
|
"rewards/rejected": -2.435544967651367, |
|
"step": 1000, |
|
"use_label": 2052.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": -1.2770113945007324, |
|
"logits/rejected": -1.214017391204834, |
|
"logps/chosen": -372.6104431152344, |
|
"logps/rejected": -243.80184936523438, |
|
"loss": 0.1902, |
|
"pred_label": 1958.949951171875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21130351722240448, |
|
"rewards/margins": 2.0323612689971924, |
|
"rewards/rejected": -2.2436647415161133, |
|
"step": 1010, |
|
"use_label": 2062.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.588714368819081e-07, |
|
"logits/chosen": -1.1657683849334717, |
|
"logits/rejected": -1.1163619756698608, |
|
"logps/chosen": -297.716064453125, |
|
"logps/rejected": -282.97125244140625, |
|
"loss": 0.2088, |
|
"pred_label": 1987.5999755859375, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1362704634666443, |
|
"rewards/margins": 2.989813804626465, |
|
"rewards/rejected": -3.126084089279175, |
|
"step": 1020, |
|
"use_label": 2073.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5596276905177426e-07, |
|
"logits/chosen": -1.226504921913147, |
|
"logits/rejected": -1.194415807723999, |
|
"logps/chosen": -339.902587890625, |
|
"logps/rejected": -316.89312744140625, |
|
"loss": 0.2374, |
|
"pred_label": 2018.0, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9146644473075867, |
|
"rewards/margins": 2.7297873497009277, |
|
"rewards/rejected": -1.8151226043701172, |
|
"step": 1030, |
|
"use_label": 2083.0 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.530541012216405e-07, |
|
"logits/chosen": -1.2352535724639893, |
|
"logits/rejected": -1.1748406887054443, |
|
"logps/chosen": -311.44940185546875, |
|
"logps/rejected": -284.37603759765625, |
|
"loss": 0.2823, |
|
"pred_label": 2045.3499755859375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.10683774948120117, |
|
"rewards/margins": 1.7979570627212524, |
|
"rewards/rejected": -1.6911194324493408, |
|
"step": 1040, |
|
"use_label": 2095.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.501454333915067e-07, |
|
"logits/chosen": -1.1737799644470215, |
|
"logits/rejected": -1.2158236503601074, |
|
"logps/chosen": -411.43634033203125, |
|
"logps/rejected": -429.01605224609375, |
|
"loss": 0.2445, |
|
"pred_label": 2076.300048828125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.1737092733383179, |
|
"rewards/margins": 2.0826451778411865, |
|
"rewards/rejected": -0.9089359045028687, |
|
"step": 1050, |
|
"use_label": 2104.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.472367655613729e-07, |
|
"logits/chosen": -1.195278525352478, |
|
"logits/rejected": -1.1494415998458862, |
|
"logps/chosen": -345.48663330078125, |
|
"logps/rejected": -427.1460876464844, |
|
"loss": 0.2202, |
|
"pred_label": 2106.35009765625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7652619481086731, |
|
"rewards/margins": 4.071230411529541, |
|
"rewards/rejected": -3.3059685230255127, |
|
"step": 1060, |
|
"use_label": 2114.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.443280977312391e-07, |
|
"logits/chosen": -1.2246100902557373, |
|
"logits/rejected": -1.2212460041046143, |
|
"logps/chosen": -376.6558532714844, |
|
"logps/rejected": -344.31451416015625, |
|
"loss": 0.2269, |
|
"pred_label": 2134.449951171875, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.7661001682281494, |
|
"rewards/margins": 4.430109977722168, |
|
"rewards/rejected": -2.6640098094940186, |
|
"step": 1070, |
|
"use_label": 2126.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.414194299011053e-07, |
|
"logits/chosen": -1.1105204820632935, |
|
"logits/rejected": -1.0868804454803467, |
|
"logps/chosen": -314.63348388671875, |
|
"logps/rejected": -307.9922790527344, |
|
"loss": 0.1703, |
|
"pred_label": 2163.5, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6920607089996338, |
|
"rewards/margins": 2.328885555267334, |
|
"rewards/rejected": -1.6368249654769897, |
|
"step": 1080, |
|
"use_label": 2137.5 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.385107620709715e-07, |
|
"logits/chosen": -1.1469954252243042, |
|
"logits/rejected": -1.1428484916687012, |
|
"logps/chosen": -335.100830078125, |
|
"logps/rejected": -333.8580627441406, |
|
"loss": 0.1841, |
|
"pred_label": 2190.800048828125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8686046600341797, |
|
"rewards/margins": 2.732456922531128, |
|
"rewards/rejected": -1.8638522624969482, |
|
"step": 1090, |
|
"use_label": 2150.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.356020942408377e-07, |
|
"logits/chosen": -1.1892510652542114, |
|
"logits/rejected": -1.1579782962799072, |
|
"logps/chosen": -343.0919189453125, |
|
"logps/rejected": -357.2165832519531, |
|
"loss": 0.2029, |
|
"pred_label": 2219.800048828125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.5540876388549805, |
|
"rewards/margins": 2.639407157897949, |
|
"rewards/rejected": -1.0853195190429688, |
|
"step": 1100, |
|
"use_label": 2161.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.326934264107039e-07, |
|
"logits/chosen": -1.150119662284851, |
|
"logits/rejected": -1.1163709163665771, |
|
"logps/chosen": -424.14764404296875, |
|
"logps/rejected": -254.954345703125, |
|
"loss": 0.2579, |
|
"pred_label": 2250.14990234375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.06615400314331055, |
|
"rewards/margins": 2.4133870601654053, |
|
"rewards/rejected": -2.347233295440674, |
|
"step": 1110, |
|
"use_label": 2170.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2978475858057008e-07, |
|
"logits/chosen": -1.1137053966522217, |
|
"logits/rejected": -1.1033612489700317, |
|
"logps/chosen": -301.53887939453125, |
|
"logps/rejected": -317.7568054199219, |
|
"loss": 0.2462, |
|
"pred_label": 2276.85009765625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.4118252992630005, |
|
"rewards/margins": 1.6095244884490967, |
|
"rewards/rejected": -0.19769929349422455, |
|
"step": 1120, |
|
"use_label": 2184.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2687609075043629e-07, |
|
"logits/chosen": -1.1289111375808716, |
|
"logits/rejected": -1.0888346433639526, |
|
"logps/chosen": -350.7413024902344, |
|
"logps/rejected": -379.24273681640625, |
|
"loss": 0.2236, |
|
"pred_label": 2302.39990234375, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.709761619567871, |
|
"rewards/margins": 3.1695258617401123, |
|
"rewards/rejected": -1.4597642421722412, |
|
"step": 1130, |
|
"use_label": 2198.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.239674229203025e-07, |
|
"logits/chosen": -1.162214994430542, |
|
"logits/rejected": -1.2171916961669922, |
|
"logps/chosen": -343.6764831542969, |
|
"logps/rejected": -366.0986022949219, |
|
"loss": 0.2462, |
|
"pred_label": 2330.050048828125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.5795726776123047, |
|
"rewards/margins": 2.7293777465820312, |
|
"rewards/rejected": -1.1498053073883057, |
|
"step": 1140, |
|
"use_label": 2210.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.210587550901687e-07, |
|
"logits/chosen": -1.0336754322052002, |
|
"logits/rejected": -1.0693806409835815, |
|
"logps/chosen": -379.285888671875, |
|
"logps/rejected": -329.3329162597656, |
|
"loss": 0.1904, |
|
"pred_label": 2355.050048828125, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.6715002059936523, |
|
"rewards/margins": 4.020023822784424, |
|
"rewards/rejected": -2.3485240936279297, |
|
"step": 1150, |
|
"use_label": 2225.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1815008726003488e-07, |
|
"logits/chosen": -1.1704041957855225, |
|
"logits/rejected": -1.1589972972869873, |
|
"logps/chosen": -324.221923828125, |
|
"logps/rejected": -336.85528564453125, |
|
"loss": 0.1874, |
|
"pred_label": 2386.800048828125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6751067638397217, |
|
"rewards/margins": 2.5076706409454346, |
|
"rewards/rejected": -1.8325637578964233, |
|
"step": 1160, |
|
"use_label": 2234.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.152414194299011e-07, |
|
"logits/chosen": -1.0068198442459106, |
|
"logits/rejected": -1.000146508216858, |
|
"logps/chosen": -282.8496398925781, |
|
"logps/rejected": -321.56085205078125, |
|
"loss": 0.2117, |
|
"pred_label": 2416.5, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7289396524429321, |
|
"rewards/margins": 1.5910956859588623, |
|
"rewards/rejected": -2.320035219192505, |
|
"step": 1170, |
|
"use_label": 2244.5 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.123327515997673e-07, |
|
"logits/chosen": -0.9953794479370117, |
|
"logits/rejected": -0.970104992389679, |
|
"logps/chosen": -317.9019470214844, |
|
"logps/rejected": -309.2327575683594, |
|
"loss": 0.1938, |
|
"pred_label": 2443.75, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3172712028026581, |
|
"rewards/margins": 2.3301925659179688, |
|
"rewards/rejected": -2.6474642753601074, |
|
"step": 1180, |
|
"use_label": 2257.25 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": -1.0719118118286133, |
|
"logits/rejected": -1.0339205265045166, |
|
"logps/chosen": -371.0210876464844, |
|
"logps/rejected": -436.35797119140625, |
|
"loss": 0.1679, |
|
"pred_label": 2471.5, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.381567120552063, |
|
"rewards/margins": 4.559169292449951, |
|
"rewards/rejected": -3.1776022911071777, |
|
"step": 1190, |
|
"use_label": 2269.5 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0651541593949969e-07, |
|
"logits/chosen": -1.1298894882202148, |
|
"logits/rejected": -1.1007440090179443, |
|
"logps/chosen": -314.60009765625, |
|
"logps/rejected": -374.28387451171875, |
|
"loss": 0.2071, |
|
"pred_label": 2505.75, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.431455373764038, |
|
"rewards/margins": 4.434044361114502, |
|
"rewards/rejected": -3.0025887489318848, |
|
"step": 1200, |
|
"use_label": 2275.25 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.036067481093659e-07, |
|
"logits/chosen": -0.998555064201355, |
|
"logits/rejected": -1.096919298171997, |
|
"logps/chosen": -364.8897399902344, |
|
"logps/rejected": -305.2925720214844, |
|
"loss": 0.2148, |
|
"pred_label": 2539.550048828125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8155422210693359, |
|
"rewards/margins": 2.473628520965576, |
|
"rewards/rejected": -1.6580864191055298, |
|
"step": 1210, |
|
"use_label": 2281.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.006980802792321e-07, |
|
"logits/chosen": -1.093653678894043, |
|
"logits/rejected": -1.1002882719039917, |
|
"logps/chosen": -354.3478088378906, |
|
"logps/rejected": -318.2798156738281, |
|
"loss": 0.1867, |
|
"pred_label": 2571.14990234375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.8111639022827148, |
|
"rewards/margins": 3.449200391769409, |
|
"rewards/rejected": -1.6380364894866943, |
|
"step": 1220, |
|
"use_label": 2289.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.977894124490983e-07, |
|
"logits/chosen": -1.2149935960769653, |
|
"logits/rejected": -1.1372343301773071, |
|
"logps/chosen": -292.52294921875, |
|
"logps/rejected": -265.5375061035156, |
|
"loss": 0.1922, |
|
"pred_label": 2601.39990234375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0008518159156665206, |
|
"rewards/margins": 2.4776790142059326, |
|
"rewards/rejected": -2.4768271446228027, |
|
"step": 1230, |
|
"use_label": 2299.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9488074461896452e-07, |
|
"logits/chosen": -1.1095410585403442, |
|
"logits/rejected": -1.108186960220337, |
|
"logps/chosen": -381.8337707519531, |
|
"logps/rejected": -426.4427185058594, |
|
"loss": 0.2153, |
|
"pred_label": 2632.60009765625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.9956940412521362, |
|
"rewards/margins": 4.763395309448242, |
|
"rewards/rejected": -3.7677009105682373, |
|
"step": 1240, |
|
"use_label": 2308.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.919720767888307e-07, |
|
"logits/chosen": -1.2132353782653809, |
|
"logits/rejected": -1.2195868492126465, |
|
"logps/chosen": -322.24774169921875, |
|
"logps/rejected": -393.90020751953125, |
|
"loss": 0.1843, |
|
"pred_label": 2666.699951171875, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.1081535592675209, |
|
"rewards/margins": 2.927469253540039, |
|
"rewards/rejected": -2.8193156719207764, |
|
"step": 1250, |
|
"use_label": 2314.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.890634089586969e-07, |
|
"logits/chosen": -1.0909610986709595, |
|
"logits/rejected": -1.0712100267410278, |
|
"logps/chosen": -301.9937744140625, |
|
"logps/rejected": -297.81365966796875, |
|
"loss": 0.2063, |
|
"pred_label": 2699.35009765625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6107617616653442, |
|
"rewards/margins": 3.5464184284210205, |
|
"rewards/rejected": -4.157180309295654, |
|
"step": 1260, |
|
"use_label": 2321.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.861547411285631e-07, |
|
"logits/chosen": -1.1298341751098633, |
|
"logits/rejected": -1.0467592477798462, |
|
"logps/chosen": -324.2348937988281, |
|
"logps/rejected": -344.5401611328125, |
|
"loss": 0.2163, |
|
"pred_label": 2732.89990234375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3071551024913788, |
|
"rewards/margins": 3.859396457672119, |
|
"rewards/rejected": -4.16655158996582, |
|
"step": 1270, |
|
"use_label": 2328.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8324607329842932e-07, |
|
"logits/chosen": -1.0855954885482788, |
|
"logits/rejected": -1.1321277618408203, |
|
"logps/chosen": -276.91912841796875, |
|
"logps/rejected": -345.2870178222656, |
|
"loss": 0.2098, |
|
"pred_label": 2764.39990234375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4349310398101807, |
|
"rewards/margins": 2.8562276363372803, |
|
"rewards/rejected": -4.291158676147461, |
|
"step": 1280, |
|
"use_label": 2336.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.803374054682955e-07, |
|
"logits/chosen": -0.9666376113891602, |
|
"logits/rejected": -1.0153762102127075, |
|
"logps/chosen": -300.74078369140625, |
|
"logps/rejected": -354.02117919921875, |
|
"loss": 0.2097, |
|
"pred_label": 2795.449951171875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6840406656265259, |
|
"rewards/margins": 2.9005465507507324, |
|
"rewards/rejected": -3.5845870971679688, |
|
"step": 1290, |
|
"use_label": 2345.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.774287376381617e-07, |
|
"logits/chosen": -1.1049658060073853, |
|
"logits/rejected": -1.0567684173583984, |
|
"logps/chosen": -389.1566162109375, |
|
"logps/rejected": -305.6317443847656, |
|
"loss": 0.1964, |
|
"pred_label": 2828.35009765625, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.8954909443855286, |
|
"rewards/margins": 4.705790042877197, |
|
"rewards/rejected": -3.8102996349334717, |
|
"step": 1300, |
|
"use_label": 2352.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7452006980802792e-07, |
|
"logits/chosen": -1.1040902137756348, |
|
"logits/rejected": -1.0495140552520752, |
|
"logps/chosen": -460.7845764160156, |
|
"logps/rejected": -396.0311279296875, |
|
"loss": 0.2207, |
|
"pred_label": 2858.25, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.5667216777801514, |
|
"rewards/margins": 2.7725882530212402, |
|
"rewards/rejected": -1.2058665752410889, |
|
"step": 1310, |
|
"use_label": 2362.75 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7161140197789412e-07, |
|
"logits/chosen": -1.0317670106887817, |
|
"logits/rejected": -1.0540131330490112, |
|
"logps/chosen": -372.09661865234375, |
|
"logps/rejected": -359.15264892578125, |
|
"loss": 0.192, |
|
"pred_label": 2887.85009765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.242278575897217, |
|
"rewards/margins": 1.178301453590393, |
|
"rewards/rejected": -3.4205803871154785, |
|
"step": 1320, |
|
"use_label": 2373.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.687027341477603e-07, |
|
"logits/chosen": -1.0154337882995605, |
|
"logits/rejected": -1.026306390762329, |
|
"logps/chosen": -246.6059112548828, |
|
"logps/rejected": -313.6022033691406, |
|
"loss": 0.1788, |
|
"pred_label": 2915.949951171875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02297346666455269, |
|
"rewards/margins": 3.024552583694458, |
|
"rewards/rejected": -3.0475258827209473, |
|
"step": 1330, |
|
"use_label": 2385.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.657940663176265e-07, |
|
"logits/chosen": -1.0699912309646606, |
|
"logits/rejected": -1.0195733308792114, |
|
"logps/chosen": -466.06085205078125, |
|
"logps/rejected": -317.84381103515625, |
|
"loss": 0.2023, |
|
"pred_label": 2949.5, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.48599451780319214, |
|
"rewards/margins": 4.262557029724121, |
|
"rewards/rejected": -3.776562213897705, |
|
"step": 1340, |
|
"use_label": 2391.5 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6288539848749272e-07, |
|
"logits/chosen": -1.0590379238128662, |
|
"logits/rejected": -1.0681087970733643, |
|
"logps/chosen": -320.8080139160156, |
|
"logps/rejected": -345.09051513671875, |
|
"loss": 0.2052, |
|
"pred_label": 2979.60009765625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6778038144111633, |
|
"rewards/margins": 2.3581137657165527, |
|
"rewards/rejected": -3.0359177589416504, |
|
"step": 1350, |
|
"use_label": 2401.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5997673065735893e-07, |
|
"logits/chosen": -1.0564696788787842, |
|
"logits/rejected": -0.9981715083122253, |
|
"logps/chosen": -422.28436279296875, |
|
"logps/rejected": -317.775146484375, |
|
"loss": 0.2191, |
|
"pred_label": 3008.10009765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9884247779846191, |
|
"rewards/margins": 2.251682996749878, |
|
"rewards/rejected": -3.240108013153076, |
|
"step": 1360, |
|
"use_label": 2412.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": -1.1280796527862549, |
|
"logits/rejected": -1.0177102088928223, |
|
"logps/chosen": -372.64996337890625, |
|
"logps/rejected": -325.7216796875, |
|
"loss": 0.1683, |
|
"pred_label": 3039.0, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2417742758989334, |
|
"rewards/margins": 3.3245158195495605, |
|
"rewards/rejected": -3.5662899017333984, |
|
"step": 1370, |
|
"use_label": 2422.0 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5415939499709132e-07, |
|
"logits/chosen": -1.0724250078201294, |
|
"logits/rejected": -1.0401219129562378, |
|
"logps/chosen": -272.91546630859375, |
|
"logps/rejected": -230.291748046875, |
|
"loss": 0.236, |
|
"pred_label": 3070.25, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9332826137542725, |
|
"rewards/margins": 2.3847384452819824, |
|
"rewards/rejected": -4.318020820617676, |
|
"step": 1380, |
|
"use_label": 2430.75 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5125072716695752e-07, |
|
"logits/chosen": -1.0457665920257568, |
|
"logits/rejected": -1.0457961559295654, |
|
"logps/chosen": -352.20379638671875, |
|
"logps/rejected": -325.810546875, |
|
"loss": 0.183, |
|
"pred_label": 3103.550048828125, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5959171652793884, |
|
"rewards/margins": 3.479651927947998, |
|
"rewards/rejected": -4.075569152832031, |
|
"step": 1390, |
|
"use_label": 2437.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4834205933682373e-07, |
|
"logits/chosen": -1.0471112728118896, |
|
"logits/rejected": -1.0389719009399414, |
|
"logps/chosen": -274.6317138671875, |
|
"logps/rejected": -285.1258239746094, |
|
"loss": 0.1871, |
|
"pred_label": 3134.60009765625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.41235724091529846, |
|
"rewards/margins": 3.290208339691162, |
|
"rewards/rejected": -3.7025654315948486, |
|
"step": 1400, |
|
"use_label": 2446.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4543339150668994e-07, |
|
"logits/chosen": -1.1152770519256592, |
|
"logits/rejected": -0.9952844381332397, |
|
"logps/chosen": -310.62872314453125, |
|
"logps/rejected": -284.43927001953125, |
|
"loss": 0.2265, |
|
"pred_label": 3163.60009765625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.20648960769176483, |
|
"rewards/margins": 2.571920871734619, |
|
"rewards/rejected": -2.365431547164917, |
|
"step": 1410, |
|
"use_label": 2457.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4252472367655612e-07, |
|
"logits/chosen": -0.9487142562866211, |
|
"logits/rejected": -0.9793168902397156, |
|
"logps/chosen": -406.9850769042969, |
|
"logps/rejected": -329.6900634765625, |
|
"loss": 0.1839, |
|
"pred_label": 3192.89990234375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.578311562538147, |
|
"rewards/margins": 3.119631290435791, |
|
"rewards/rejected": -2.5413200855255127, |
|
"step": 1420, |
|
"use_label": 2468.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3961605584642233e-07, |
|
"logits/chosen": -1.0938589572906494, |
|
"logits/rejected": -1.0605380535125732, |
|
"logps/chosen": -306.23809814453125, |
|
"logps/rejected": -274.6723937988281, |
|
"loss": 0.1687, |
|
"pred_label": 3223.800048828125, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6292298436164856, |
|
"rewards/margins": 3.2063794136047363, |
|
"rewards/rejected": -3.8356094360351562, |
|
"step": 1430, |
|
"use_label": 2477.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3670738801628854e-07, |
|
"logits/chosen": -0.9974590539932251, |
|
"logits/rejected": -1.0331952571868896, |
|
"logps/chosen": -378.69158935546875, |
|
"logps/rejected": -439.4248046875, |
|
"loss": 0.1876, |
|
"pred_label": 3252.699951171875, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.29973307251930237, |
|
"rewards/margins": 3.267655849456787, |
|
"rewards/rejected": -3.5673890113830566, |
|
"step": 1440, |
|
"use_label": 2488.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3379872018615474e-07, |
|
"logits/chosen": -1.0476627349853516, |
|
"logits/rejected": -1.0306545495986938, |
|
"logps/chosen": -381.58709716796875, |
|
"logps/rejected": -317.57110595703125, |
|
"loss": 0.203, |
|
"pred_label": 3282.699951171875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4640182554721832, |
|
"rewards/margins": 2.2011430263519287, |
|
"rewards/rejected": -2.665161609649658, |
|
"step": 1450, |
|
"use_label": 2498.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3089005235602092e-07, |
|
"logits/chosen": -1.0808570384979248, |
|
"logits/rejected": -1.0447580814361572, |
|
"logps/chosen": -351.30523681640625, |
|
"logps/rejected": -301.14276123046875, |
|
"loss": 0.2173, |
|
"pred_label": 3313.64990234375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.037274181842803955, |
|
"rewards/margins": 2.719515323638916, |
|
"rewards/rejected": -2.756789445877075, |
|
"step": 1460, |
|
"use_label": 2507.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2798138452588713e-07, |
|
"logits/chosen": -1.0119661092758179, |
|
"logits/rejected": -1.0127899646759033, |
|
"logps/chosen": -452.9007263183594, |
|
"logps/rejected": -350.81988525390625, |
|
"loss": 0.2244, |
|
"pred_label": 3344.35009765625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5907049179077148, |
|
"rewards/margins": 4.506594181060791, |
|
"rewards/rejected": -3.915889024734497, |
|
"step": 1470, |
|
"use_label": 2516.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2507271669575334e-07, |
|
"logits/chosen": -0.9774877429008484, |
|
"logits/rejected": -1.0214698314666748, |
|
"logps/chosen": -311.8316955566406, |
|
"logps/rejected": -339.8797607421875, |
|
"loss": 0.1938, |
|
"pred_label": 3374.64990234375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14671669900417328, |
|
"rewards/margins": 1.559191346168518, |
|
"rewards/rejected": -1.7059080600738525, |
|
"step": 1480, |
|
"use_label": 2526.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2216404886561955e-07, |
|
"logits/chosen": -1.0379236936569214, |
|
"logits/rejected": -1.0210058689117432, |
|
"logps/chosen": -270.0731506347656, |
|
"logps/rejected": -259.6552734375, |
|
"loss": 0.2187, |
|
"pred_label": 3405.449951171875, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6488065719604492, |
|
"rewards/margins": 2.2493479251861572, |
|
"rewards/rejected": -2.8981544971466064, |
|
"step": 1490, |
|
"use_label": 2535.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1925538103548575e-07, |
|
"logits/chosen": -0.8977063894271851, |
|
"logits/rejected": -0.984424889087677, |
|
"logps/chosen": -356.542724609375, |
|
"logps/rejected": -314.09893798828125, |
|
"loss": 0.1712, |
|
"pred_label": 3438.60009765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.38773947954177856, |
|
"rewards/margins": 2.917739152908325, |
|
"rewards/rejected": -2.5299997329711914, |
|
"step": 1500, |
|
"use_label": 2542.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1634671320535195e-07, |
|
"logits/chosen": -1.0663585662841797, |
|
"logits/rejected": -1.0083823204040527, |
|
"logps/chosen": -318.5123291015625, |
|
"logps/rejected": -304.78387451171875, |
|
"loss": 0.1956, |
|
"pred_label": 3471.699951171875, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7554278373718262, |
|
"rewards/margins": 3.285256862640381, |
|
"rewards/rejected": -2.529829263687134, |
|
"step": 1510, |
|
"use_label": 2549.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1343804537521814e-07, |
|
"logits/chosen": -1.051598310470581, |
|
"logits/rejected": -1.0226097106933594, |
|
"logps/chosen": -377.0965881347656, |
|
"logps/rejected": -338.76812744140625, |
|
"loss": 0.2022, |
|
"pred_label": 3499.949951171875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8909615278244019, |
|
"rewards/margins": 1.9300997257232666, |
|
"rewards/rejected": -1.039137840270996, |
|
"step": 1520, |
|
"use_label": 2561.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1052937754508435e-07, |
|
"logits/chosen": -1.105948567390442, |
|
"logits/rejected": -1.1236369609832764, |
|
"logps/chosen": -421.694580078125, |
|
"logps/rejected": -365.2895202636719, |
|
"loss": 0.2134, |
|
"pred_label": 3530.699951171875, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.1226226091384888, |
|
"rewards/margins": 4.040251731872559, |
|
"rewards/rejected": -2.9176290035247803, |
|
"step": 1530, |
|
"use_label": 2570.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0762070971495054e-07, |
|
"logits/chosen": -1.0308454036712646, |
|
"logits/rejected": -1.0807101726531982, |
|
"logps/chosen": -355.47320556640625, |
|
"logps/rejected": -407.9166259765625, |
|
"loss": 0.2194, |
|
"pred_label": 3559.35009765625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.4176074266433716, |
|
"rewards/margins": 2.80814528465271, |
|
"rewards/rejected": -1.390538215637207, |
|
"step": 1540, |
|
"use_label": 2581.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": -1.0221843719482422, |
|
"logits/rejected": -1.003930926322937, |
|
"logps/chosen": -313.1138610839844, |
|
"logps/rejected": -262.46295166015625, |
|
"loss": 0.2012, |
|
"pred_label": 3589.85009765625, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.20224742591381073, |
|
"rewards/margins": 4.130466461181641, |
|
"rewards/rejected": -3.9282188415527344, |
|
"step": 1550, |
|
"use_label": 2591.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0180337405468295e-07, |
|
"logits/chosen": -1.0754446983337402, |
|
"logits/rejected": -1.0588288307189941, |
|
"logps/chosen": -333.0397644042969, |
|
"logps/rejected": -329.0122985839844, |
|
"loss": 0.212, |
|
"pred_label": 3616.550048828125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.766626238822937, |
|
"rewards/margins": 3.1271777153015137, |
|
"rewards/rejected": -3.893803358078003, |
|
"step": 1560, |
|
"use_label": 2604.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.889470622454915e-08, |
|
"logits/chosen": -1.0235944986343384, |
|
"logits/rejected": -1.030893087387085, |
|
"logps/chosen": -322.72857666015625, |
|
"logps/rejected": -325.28912353515625, |
|
"loss": 0.1968, |
|
"pred_label": 3644.64990234375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4102725982666016, |
|
"rewards/margins": 2.7497966289520264, |
|
"rewards/rejected": -4.160068988800049, |
|
"step": 1570, |
|
"use_label": 2616.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.598603839441535e-08, |
|
"logits/chosen": -1.0010260343551636, |
|
"logits/rejected": -1.0144360065460205, |
|
"logps/chosen": -380.1663818359375, |
|
"logps/rejected": -335.47662353515625, |
|
"loss": 0.2129, |
|
"pred_label": 3672.25, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.20972386002540588, |
|
"rewards/margins": 1.8820480108261108, |
|
"rewards/rejected": -1.6723241806030273, |
|
"step": 1580, |
|
"use_label": 2628.75 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.307737056428156e-08, |
|
"logits/chosen": -0.800024151802063, |
|
"logits/rejected": -0.8323174715042114, |
|
"logps/chosen": -315.90447998046875, |
|
"logps/rejected": -332.60150146484375, |
|
"loss": 0.1793, |
|
"pred_label": 3700.699951171875, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8666568994522095, |
|
"rewards/margins": 3.055481433868408, |
|
"rewards/rejected": -3.9221386909484863, |
|
"step": 1590, |
|
"use_label": 2640.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.016870273414775e-08, |
|
"logits/chosen": -0.8738692998886108, |
|
"logits/rejected": -0.9172623753547668, |
|
"logps/chosen": -350.1977233886719, |
|
"logps/rejected": -301.5623474121094, |
|
"loss": 0.2091, |
|
"pred_label": 3731.60009765625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1013541221618652, |
|
"rewards/margins": 2.4148409366607666, |
|
"rewards/rejected": -3.5161945819854736, |
|
"step": 1600, |
|
"use_label": 2649.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.726003490401396e-08, |
|
"logits/chosen": -1.0966861248016357, |
|
"logits/rejected": -1.0557001829147339, |
|
"logps/chosen": -371.09173583984375, |
|
"logps/rejected": -266.89263916015625, |
|
"loss": 0.1972, |
|
"pred_label": 3761.550048828125, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1973705142736435, |
|
"rewards/margins": 3.2193920612335205, |
|
"rewards/rejected": -3.416762590408325, |
|
"step": 1610, |
|
"use_label": 2659.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.435136707388015e-08, |
|
"logits/chosen": -0.9223749041557312, |
|
"logits/rejected": -0.9156180620193481, |
|
"logps/chosen": -407.92889404296875, |
|
"logps/rejected": -357.1949157714844, |
|
"loss": 0.1682, |
|
"pred_label": 3793.60009765625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09885386377573013, |
|
"rewards/margins": 2.7179036140441895, |
|
"rewards/rejected": -2.8167572021484375, |
|
"step": 1620, |
|
"use_label": 2667.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.144269924374636e-08, |
|
"logits/chosen": -0.8016144633293152, |
|
"logits/rejected": -0.8304374814033508, |
|
"logps/chosen": -392.49249267578125, |
|
"logps/rejected": -387.77813720703125, |
|
"loss": 0.2052, |
|
"pred_label": 3822.14990234375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7084876298904419, |
|
"rewards/margins": 3.3112130165100098, |
|
"rewards/rejected": -4.019700527191162, |
|
"step": 1630, |
|
"use_label": 2678.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.853403141361257e-08, |
|
"logits/chosen": -1.014793872833252, |
|
"logits/rejected": -0.9608389139175415, |
|
"logps/chosen": -301.42108154296875, |
|
"logps/rejected": -262.4256286621094, |
|
"loss": 0.23, |
|
"pred_label": 3848.75, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.032284706830978394, |
|
"rewards/margins": 2.6330177783966064, |
|
"rewards/rejected": -2.6653025150299072, |
|
"step": 1640, |
|
"use_label": 2692.25 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.562536358347876e-08, |
|
"logits/chosen": -0.9088438749313354, |
|
"logits/rejected": -0.9523930549621582, |
|
"logps/chosen": -322.62933349609375, |
|
"logps/rejected": -361.05670166015625, |
|
"loss": 0.2088, |
|
"pred_label": 3879.449951171875, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8268980979919434, |
|
"rewards/margins": 1.8542957305908203, |
|
"rewards/rejected": -2.6811938285827637, |
|
"step": 1650, |
|
"use_label": 2701.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.271669575334497e-08, |
|
"logits/chosen": -0.9642359614372253, |
|
"logits/rejected": -0.8722718358039856, |
|
"logps/chosen": -359.36968994140625, |
|
"logps/rejected": -413.9576110839844, |
|
"loss": 0.1724, |
|
"pred_label": 3914.60009765625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.34769096970558167, |
|
"rewards/margins": 3.559408664703369, |
|
"rewards/rejected": -3.907099485397339, |
|
"step": 1660, |
|
"use_label": 2706.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.980802792321116e-08, |
|
"logits/chosen": -0.8372431993484497, |
|
"logits/rejected": -0.8543712496757507, |
|
"logps/chosen": -358.82550048828125, |
|
"logps/rejected": -364.4158630371094, |
|
"loss": 0.2092, |
|
"pred_label": 3946.949951171875, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.024167824536561966, |
|
"rewards/margins": 1.891296148300171, |
|
"rewards/rejected": -1.9154638051986694, |
|
"step": 1670, |
|
"use_label": 2714.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.689936009307737e-08, |
|
"logits/chosen": -0.8467708826065063, |
|
"logits/rejected": -0.8923909068107605, |
|
"logps/chosen": -311.6363830566406, |
|
"logps/rejected": -330.9986267089844, |
|
"loss": 0.1716, |
|
"pred_label": 3979.300048828125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.442889928817749, |
|
"rewards/margins": 1.873750925064087, |
|
"rewards/rejected": -3.316641330718994, |
|
"step": 1680, |
|
"use_label": 2721.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.399069226294357e-08, |
|
"logits/chosen": -0.9512916803359985, |
|
"logits/rejected": -0.9659273028373718, |
|
"logps/chosen": -374.9173278808594, |
|
"logps/rejected": -325.18695068359375, |
|
"loss": 0.2179, |
|
"pred_label": 4007.25, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05676973983645439, |
|
"rewards/margins": 2.8793885707855225, |
|
"rewards/rejected": -2.9361586570739746, |
|
"step": 1690, |
|
"use_label": 2733.75 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.108202443280977e-08, |
|
"logits/chosen": -0.8465694189071655, |
|
"logits/rejected": -0.9132062792778015, |
|
"logps/chosen": -346.77032470703125, |
|
"logps/rejected": -317.15716552734375, |
|
"loss": 0.1967, |
|
"pred_label": 4035.75, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8769257664680481, |
|
"rewards/margins": 3.0119924545288086, |
|
"rewards/rejected": -3.88891863822937, |
|
"step": 1700, |
|
"use_label": 2745.25 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.8173356602675974e-08, |
|
"logits/chosen": -0.9974952936172485, |
|
"logits/rejected": -0.9654536247253418, |
|
"logps/chosen": -257.2732849121094, |
|
"logps/rejected": -298.55157470703125, |
|
"loss": 0.1809, |
|
"pred_label": 4066.35009765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.23062801361084, |
|
"rewards/margins": 2.8383820056915283, |
|
"rewards/rejected": -5.069010257720947, |
|
"step": 1710, |
|
"use_label": 2754.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.5264688772542175e-08, |
|
"logits/chosen": -0.891338050365448, |
|
"logits/rejected": -0.9084685444831848, |
|
"logps/chosen": -470.7725524902344, |
|
"logps/rejected": -485.2267150878906, |
|
"loss": 0.1991, |
|
"pred_label": 4097.75, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.26461061835289, |
|
"rewards/margins": 3.3599886894226074, |
|
"rewards/rejected": -3.6245994567871094, |
|
"step": 1720, |
|
"use_label": 2763.25 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": -0.8564783334732056, |
|
"logits/rejected": -0.9062970876693726, |
|
"logps/chosen": -296.5504455566406, |
|
"logps/rejected": -302.5833740234375, |
|
"loss": 0.1845, |
|
"pred_label": 4129.0498046875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.253291130065918, |
|
"rewards/margins": 1.962683916091919, |
|
"rewards/rejected": -3.215975284576416, |
|
"step": 1730, |
|
"use_label": 2771.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.944735311227458e-08, |
|
"logits/chosen": -0.8973791003227234, |
|
"logits/rejected": -0.842978835105896, |
|
"logps/chosen": -309.6785583496094, |
|
"logps/rejected": -285.935791015625, |
|
"loss": 0.2085, |
|
"pred_label": 4161.39990234375, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.08151117712259293, |
|
"rewards/margins": 3.9789955615997314, |
|
"rewards/rejected": -3.897484540939331, |
|
"step": 1740, |
|
"use_label": 2779.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.653868528214078e-08, |
|
"logits/chosen": -0.9628440141677856, |
|
"logits/rejected": -1.0249922275543213, |
|
"logps/chosen": -376.8697814941406, |
|
"logps/rejected": -336.0437316894531, |
|
"loss": 0.194, |
|
"pred_label": 4192.5, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.799761414527893, |
|
"rewards/margins": 3.394575595855713, |
|
"rewards/rejected": -5.194336891174316, |
|
"step": 1750, |
|
"use_label": 2788.5 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.363001745200698e-08, |
|
"logits/chosen": -0.9686734080314636, |
|
"logits/rejected": -1.011160135269165, |
|
"logps/chosen": -344.68109130859375, |
|
"logps/rejected": -338.3143615722656, |
|
"loss": 0.1822, |
|
"pred_label": 4224.9501953125, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.9003709554672241, |
|
"rewards/margins": 4.197125434875488, |
|
"rewards/rejected": -3.2967541217803955, |
|
"step": 1760, |
|
"use_label": 2796.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.072134962187318e-08, |
|
"logits/chosen": -1.0619252920150757, |
|
"logits/rejected": -1.0584673881530762, |
|
"logps/chosen": -443.986083984375, |
|
"logps/rejected": -363.45660400390625, |
|
"loss": 0.2066, |
|
"pred_label": 4258.4501953125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09187255799770355, |
|
"rewards/margins": 2.6800460815429688, |
|
"rewards/rejected": -2.5881736278533936, |
|
"step": 1770, |
|
"use_label": 2802.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.781268179173938e-08, |
|
"logits/chosen": -0.9357656240463257, |
|
"logits/rejected": -0.9777708053588867, |
|
"logps/chosen": -364.95159912109375, |
|
"logps/rejected": -321.37890625, |
|
"loss": 0.1591, |
|
"pred_label": 4284.2998046875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.44581514596939087, |
|
"rewards/margins": 3.383704423904419, |
|
"rewards/rejected": -2.9378890991210938, |
|
"step": 1780, |
|
"use_label": 2816.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.490401396160558e-08, |
|
"logits/chosen": -0.9476548433303833, |
|
"logits/rejected": -0.9675912857055664, |
|
"logps/chosen": -278.46197509765625, |
|
"logps/rejected": -292.71759033203125, |
|
"loss": 0.193, |
|
"pred_label": 4318.0, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4520017206668854, |
|
"rewards/margins": 3.4121756553649902, |
|
"rewards/rejected": -3.864177703857422, |
|
"step": 1790, |
|
"use_label": 2823.0 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.199534613147178e-08, |
|
"logits/chosen": -0.8004802465438843, |
|
"logits/rejected": -0.8716949224472046, |
|
"logps/chosen": -337.95367431640625, |
|
"logps/rejected": -374.6369934082031, |
|
"loss": 0.2068, |
|
"pred_label": 4347.14990234375, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5371319651603699, |
|
"rewards/margins": 3.537078380584717, |
|
"rewards/rejected": -4.074210166931152, |
|
"step": 1800, |
|
"use_label": 2833.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9086678301337987e-08, |
|
"logits/chosen": -1.0241055488586426, |
|
"logits/rejected": -1.0045052766799927, |
|
"logps/chosen": -336.6506042480469, |
|
"logps/rejected": -323.0858459472656, |
|
"loss": 0.1695, |
|
"pred_label": 4374.85009765625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.255857229232788, |
|
"rewards/margins": 2.3176028728485107, |
|
"rewards/rejected": -3.573460102081299, |
|
"step": 1810, |
|
"use_label": 2846.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6178010471204188e-08, |
|
"logits/chosen": -0.9693848490715027, |
|
"logits/rejected": -0.9834591150283813, |
|
"logps/chosen": -423.89404296875, |
|
"logps/rejected": -360.3699035644531, |
|
"loss": 0.1947, |
|
"pred_label": 4406.10009765625, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.7960270047187805, |
|
"rewards/margins": 5.266651153564453, |
|
"rewards/rejected": -4.470623970031738, |
|
"step": 1820, |
|
"use_label": 2854.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.326934264107039e-08, |
|
"logits/chosen": -0.8095889091491699, |
|
"logits/rejected": -0.942054271697998, |
|
"logps/chosen": -363.0487365722656, |
|
"logps/rejected": -354.41607666015625, |
|
"loss": 0.155, |
|
"pred_label": 4439.2001953125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.144942045211792, |
|
"rewards/margins": 2.5031371116638184, |
|
"rewards/rejected": -3.6480789184570312, |
|
"step": 1830, |
|
"use_label": 2861.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.036067481093659e-08, |
|
"logits/chosen": -0.8409671783447266, |
|
"logits/rejected": -0.83684903383255, |
|
"logps/chosen": -343.324951171875, |
|
"logps/rejected": -362.7954406738281, |
|
"loss": 0.1852, |
|
"pred_label": 4470.85009765625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.28511303663253784, |
|
"rewards/margins": 3.413196563720703, |
|
"rewards/rejected": -3.6983096599578857, |
|
"step": 1840, |
|
"use_label": 2870.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.745200698080279e-08, |
|
"logits/chosen": -0.9692668914794922, |
|
"logits/rejected": -0.9877262115478516, |
|
"logps/chosen": -347.4239196777344, |
|
"logps/rejected": -361.1467590332031, |
|
"loss": 0.1698, |
|
"pred_label": 4502.25, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.050318337976932526, |
|
"rewards/margins": 4.280552864074707, |
|
"rewards/rejected": -4.330872058868408, |
|
"step": 1850, |
|
"use_label": 2878.75 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4543339150668994e-08, |
|
"logits/chosen": -0.9399221539497375, |
|
"logits/rejected": -0.9900743365287781, |
|
"logps/chosen": -236.5078582763672, |
|
"logps/rejected": -264.27252197265625, |
|
"loss": 0.186, |
|
"pred_label": 4531.4501953125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.7518811225891113, |
|
"rewards/margins": 4.044899940490723, |
|
"rewards/rejected": -3.293017864227295, |
|
"step": 1860, |
|
"use_label": 2889.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1634671320535195e-08, |
|
"logits/chosen": -1.0314300060272217, |
|
"logits/rejected": -1.0113561153411865, |
|
"logps/chosen": -360.2470703125, |
|
"logps/rejected": -362.8114013671875, |
|
"loss": 0.177, |
|
"pred_label": 4561.4501953125, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7401183843612671, |
|
"rewards/margins": 2.7796757221221924, |
|
"rewards/rejected": -3.51979398727417, |
|
"step": 1870, |
|
"use_label": 2899.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.726003490401395e-09, |
|
"logits/chosen": -1.0057262182235718, |
|
"logits/rejected": -1.0620293617248535, |
|
"logps/chosen": -371.9859619140625, |
|
"logps/rejected": -325.24932861328125, |
|
"loss": 0.1686, |
|
"pred_label": 4594.35009765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7309083938598633, |
|
"rewards/margins": 2.8372626304626465, |
|
"rewards/rejected": -3.5681710243225098, |
|
"step": 1880, |
|
"use_label": 2906.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.817335660267597e-09, |
|
"logits/chosen": -0.9105908274650574, |
|
"logits/rejected": -0.8740211725234985, |
|
"logps/chosen": -344.8439025878906, |
|
"logps/rejected": -438.42010498046875, |
|
"loss": 0.1399, |
|
"pred_label": 4625.9501953125, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2992219924926758, |
|
"rewards/margins": 3.6406044960021973, |
|
"rewards/rejected": -4.939826011657715, |
|
"step": 1890, |
|
"use_label": 2915.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9086678301337986e-09, |
|
"logits/chosen": -1.0261294841766357, |
|
"logits/rejected": -0.9415041208267212, |
|
"logps/chosen": -310.5721130371094, |
|
"logps/rejected": -294.1573486328125, |
|
"loss": 0.1918, |
|
"pred_label": 4658.85009765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.248200535774231, |
|
"rewards/margins": 2.8387322425842285, |
|
"rewards/rejected": -4.086933135986328, |
|
"step": 1900, |
|
"use_label": 2922.14990234375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.9566202163696289, |
|
"logits/rejected": -0.9806071519851685, |
|
"logps/chosen": -324.8318786621094, |
|
"logps/rejected": -311.9029846191406, |
|
"loss": 0.1714, |
|
"pred_label": 4690.7998046875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9383823275566101, |
|
"rewards/margins": 2.941167116165161, |
|
"rewards/rejected": -3.879549741744995, |
|
"step": 1910, |
|
"use_label": 2930.199951171875 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -0.9365912079811096, |
|
"eval_logits/rejected": -0.926014244556427, |
|
"eval_logps/chosen": -352.8179626464844, |
|
"eval_logps/rejected": -349.7088317871094, |
|
"eval_loss": 0.18906600773334503, |
|
"eval_pred_label": 4808.27001953125, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": 0.2332553267478943, |
|
"eval_rewards/margins": 3.972490072250366, |
|
"eval_rewards/rejected": -3.7392351627349854, |
|
"eval_runtime": 284.6758, |
|
"eval_samples_per_second": 7.026, |
|
"eval_steps_per_second": 0.221, |
|
"eval_use_label": 2959.730224609375, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2926436502271922, |
|
"train_runtime": 13370.5554, |
|
"train_samples_per_second": 4.572, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|