|
{ |
|
"best_metric": 0.5855222344398499, |
|
"best_model_checkpoint": "data/tinyllama_moe_dpo_ultrafeedback_v2_epochs3/checkpoint-2700", |
|
"epoch": 2.998430141287284, |
|
"eval_steps": 100, |
|
"global_step": 2865, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -3.0106258392333984, |
|
"logits/rejected": -3.0041162967681885, |
|
"logps/chosen": -291.6616516113281, |
|
"logps/rejected": -273.537353515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -3.0320074558258057, |
|
"logits/rejected": -2.934544801712036, |
|
"logps/chosen": -352.8655090332031, |
|
"logps/rejected": -284.1784362792969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.00030098477145656943, |
|
"rewards/margins": 6.371454219333827e-05, |
|
"rewards/rejected": -0.00036469934275373816, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.9808428287506104, |
|
"logits/rejected": -2.9612295627593994, |
|
"logps/chosen": -309.6392822265625, |
|
"logps/rejected": -278.2618103027344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0001357152796117589, |
|
"rewards/margins": 0.00045497194514609873, |
|
"rewards/rejected": -0.0003192565927747637, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -3.041250705718994, |
|
"logits/rejected": -2.9839859008789062, |
|
"logps/chosen": -342.7677917480469, |
|
"logps/rejected": -301.0032653808594, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.00031170996953733265, |
|
"rewards/margins": -0.00037957567838020623, |
|
"rewards/rejected": 6.78658252581954e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -3.019380807876587, |
|
"logits/rejected": -2.974083423614502, |
|
"logps/chosen": -331.848876953125, |
|
"logps/rejected": -276.879150390625, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.00034866915666498244, |
|
"rewards/margins": -0.00015269347932189703, |
|
"rewards/rejected": -0.00019597564823925495, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -3.025217294692993, |
|
"logits/rejected": -2.984266757965088, |
|
"logps/chosen": -347.17523193359375, |
|
"logps/rejected": -309.79034423828125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0004488878767006099, |
|
"rewards/margins": -0.0009045412880368531, |
|
"rewards/rejected": 0.0004556533822324127, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -3.008871078491211, |
|
"logits/rejected": -2.947890281677246, |
|
"logps/chosen": -348.37127685546875, |
|
"logps/rejected": -318.5699462890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.417404564563185e-05, |
|
"rewards/margins": -0.00020001048687845469, |
|
"rewards/rejected": 0.0001858363684732467, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -3.078416585922241, |
|
"logits/rejected": -3.026381492614746, |
|
"logps/chosen": -382.86102294921875, |
|
"logps/rejected": -335.41156005859375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0005409394507296383, |
|
"rewards/margins": 0.0006863707094453275, |
|
"rewards/rejected": -0.00014543140423484147, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.9282450675964355, |
|
"logits/rejected": -2.872313976287842, |
|
"logps/chosen": -355.75653076171875, |
|
"logps/rejected": -294.4638366699219, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0010749772191047668, |
|
"rewards/margins": 0.0013450259575620294, |
|
"rewards/rejected": -0.0002700486802496016, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -3.0248095989227295, |
|
"logits/rejected": -2.9788899421691895, |
|
"logps/chosen": -348.9649963378906, |
|
"logps/rejected": -311.44647216796875, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0013544766698032618, |
|
"rewards/margins": 0.0028665403369814157, |
|
"rewards/rejected": -0.001512063667178154, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999974255581648e-07, |
|
"logits/chosen": -2.998736619949341, |
|
"logits/rejected": -2.972041606903076, |
|
"logps/chosen": -367.4033203125, |
|
"logps/rejected": -341.64483642578125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00238443398848176, |
|
"rewards/margins": 0.0027559935115277767, |
|
"rewards/rejected": -0.00037155949394218624, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -3.040524482727051, |
|
"eval_logits/rejected": -2.987576484680176, |
|
"eval_logps/chosen": -344.61151123046875, |
|
"eval_logps/rejected": -302.75537109375, |
|
"eval_loss": 0.6913198828697205, |
|
"eval_rewards/accuracies": 0.6349206566810608, |
|
"eval_rewards/chosen": 0.0043304311111569405, |
|
"eval_rewards/margins": 0.004837073851376772, |
|
"eval_rewards/rejected": -0.0005066432058811188, |
|
"eval_runtime": 244.6034, |
|
"eval_samples_per_second": 8.177, |
|
"eval_steps_per_second": 0.258, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.999684636964402e-07, |
|
"logits/chosen": -3.017376661300659, |
|
"logits/rejected": -2.9261746406555176, |
|
"logps/chosen": -315.4708557128906, |
|
"logps/rejected": -251.8899383544922, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0034377477131783962, |
|
"rewards/margins": 0.0038212400395423174, |
|
"rewards/rejected": -0.00038349232636392117, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.999073256611217e-07, |
|
"logits/chosen": -2.9883151054382324, |
|
"logits/rejected": -2.910341262817383, |
|
"logps/chosen": -361.08331298828125, |
|
"logps/rejected": -289.36883544921875, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007036352995783091, |
|
"rewards/margins": 0.008973561227321625, |
|
"rewards/rejected": -0.0019372075330466032, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.998140193219545e-07, |
|
"logits/chosen": -3.0366291999816895, |
|
"logits/rejected": -2.9711904525756836, |
|
"logps/chosen": -381.4322204589844, |
|
"logps/rejected": -310.4883728027344, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00792085099965334, |
|
"rewards/margins": 0.0068405852653086185, |
|
"rewards/rejected": 0.0010802658507600427, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.996885566894172e-07, |
|
"logits/chosen": -2.9757721424102783, |
|
"logits/rejected": -2.955821990966797, |
|
"logps/chosen": -286.1941833496094, |
|
"logps/rejected": -260.9352111816406, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.007854754105210304, |
|
"rewards/margins": 0.008457413874566555, |
|
"rewards/rejected": -0.0006026608753018081, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.995309539131771e-07, |
|
"logits/chosen": -3.035512685775757, |
|
"logits/rejected": -3.00437593460083, |
|
"logps/chosen": -335.96539306640625, |
|
"logps/rejected": -335.1185607910156, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00691782683134079, |
|
"rewards/margins": 0.003046808298677206, |
|
"rewards/rejected": 0.0038710187654942274, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.993412312800101e-07, |
|
"logits/chosen": -3.0231873989105225, |
|
"logits/rejected": -2.9608006477355957, |
|
"logps/chosen": -354.9385681152344, |
|
"logps/rejected": -332.57757568359375, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.010783755220472813, |
|
"rewards/margins": 0.010257494635879993, |
|
"rewards/rejected": 0.0005262610502541065, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.991194132111906e-07, |
|
"logits/chosen": -3.037797212600708, |
|
"logits/rejected": -2.991992473602295, |
|
"logps/chosen": -325.5256042480469, |
|
"logps/rejected": -280.1507263183594, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.015455600805580616, |
|
"rewards/margins": 0.014618036337196827, |
|
"rewards/rejected": 0.0008375659817829728, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.988655282593471e-07, |
|
"logits/chosen": -2.967064619064331, |
|
"logits/rejected": -2.900017261505127, |
|
"logps/chosen": -299.7720947265625, |
|
"logps/rejected": -267.1083984375, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.011125795543193817, |
|
"rewards/margins": 0.019041184335947037, |
|
"rewards/rejected": -0.00791538879275322, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.985796091047871e-07, |
|
"logits/chosen": -3.0344669818878174, |
|
"logits/rejected": -2.965481996536255, |
|
"logps/chosen": -342.3845520019531, |
|
"logps/rejected": -299.60418701171875, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.017673885449767113, |
|
"rewards/margins": 0.02089458890259266, |
|
"rewards/rejected": -0.0032207041513174772, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.982616925512907e-07, |
|
"logits/chosen": -2.9737305641174316, |
|
"logits/rejected": -2.9367403984069824, |
|
"logps/chosen": -344.54833984375, |
|
"logps/rejected": -315.73516845703125, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02112628147006035, |
|
"rewards/margins": 0.02438109926879406, |
|
"rewards/rejected": -0.0032548182643949986, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -3.024318218231201, |
|
"eval_logits/rejected": -2.9700303077697754, |
|
"eval_logps/chosen": -343.5496826171875, |
|
"eval_logps/rejected": -303.65081787109375, |
|
"eval_loss": 0.6830371022224426, |
|
"eval_rewards/accuracies": 0.6448412537574768, |
|
"eval_rewards/chosen": 0.01494832057505846, |
|
"eval_rewards/margins": 0.0244095791131258, |
|
"eval_rewards/rejected": -0.009461257606744766, |
|
"eval_runtime": 246.2124, |
|
"eval_samples_per_second": 8.123, |
|
"eval_steps_per_second": 0.256, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.979118195213728e-07, |
|
"logits/chosen": -3.0241358280181885, |
|
"logits/rejected": -2.944836378097534, |
|
"logps/chosen": -367.56805419921875, |
|
"logps/rejected": -298.8295593261719, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.01484000962227583, |
|
"rewards/margins": 0.023524824529886246, |
|
"rewards/rejected": -0.008684814907610416, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.975300350510161e-07, |
|
"logits/chosen": -3.002933979034424, |
|
"logits/rejected": -2.9467215538024902, |
|
"logps/chosen": -350.8382263183594, |
|
"logps/rejected": -319.9858703613281, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01789170131087303, |
|
"rewards/margins": 0.02975825034081936, |
|
"rewards/rejected": -0.011866547167301178, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.971163882838732e-07, |
|
"logits/chosen": -2.9935097694396973, |
|
"logits/rejected": -2.9317831993103027, |
|
"logps/chosen": -349.8140563964844, |
|
"logps/rejected": -288.98077392578125, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.011691467836499214, |
|
"rewards/margins": 0.029249072074890137, |
|
"rewards/rejected": -0.017557602375745773, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.966709324649415e-07, |
|
"logits/chosen": -2.9749813079833984, |
|
"logits/rejected": -2.9110968112945557, |
|
"logps/chosen": -335.0533447265625, |
|
"logps/rejected": -266.52215576171875, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.01340649463236332, |
|
"rewards/margins": 0.027955498546361923, |
|
"rewards/rejected": -0.014549002051353455, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.961937249337091e-07, |
|
"logits/chosen": -2.9835305213928223, |
|
"logits/rejected": -2.9534316062927246, |
|
"logps/chosen": -320.1576232910156, |
|
"logps/rejected": -320.8951721191406, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.009979739785194397, |
|
"rewards/margins": 0.03077465295791626, |
|
"rewards/rejected": -0.020794907584786415, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.956848271167743e-07, |
|
"logits/chosen": -2.9606919288635254, |
|
"logits/rejected": -2.9051098823547363, |
|
"logps/chosen": -341.62701416015625, |
|
"logps/rejected": -306.46734619140625, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.004492092411965132, |
|
"rewards/margins": 0.0419074110686779, |
|
"rewards/rejected": -0.03741531819105148, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.951443045199382e-07, |
|
"logits/chosen": -3.039965867996216, |
|
"logits/rejected": -2.974046230316162, |
|
"logps/chosen": -353.82366943359375, |
|
"logps/rejected": -296.0869140625, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.013023038394749165, |
|
"rewards/margins": 0.053806107491254807, |
|
"rewards/rejected": -0.04078306630253792, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.945722267197731e-07, |
|
"logits/chosen": -3.0149953365325928, |
|
"logits/rejected": -2.9855613708496094, |
|
"logps/chosen": -353.22808837890625, |
|
"logps/rejected": -332.72735595703125, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0013278704136610031, |
|
"rewards/margins": 0.0351131446659565, |
|
"rewards/rejected": -0.03644100949168205, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.939686673546668e-07, |
|
"logits/chosen": -2.979219675064087, |
|
"logits/rejected": -2.940520763397217, |
|
"logps/chosen": -338.106201171875, |
|
"logps/rejected": -298.94110107421875, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.008899662643671036, |
|
"rewards/margins": 0.0386703684926033, |
|
"rewards/rejected": -0.047570034861564636, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.93333704115343e-07, |
|
"logits/chosen": -2.9941365718841553, |
|
"logits/rejected": -2.884251117706299, |
|
"logps/chosen": -341.53387451171875, |
|
"logps/rejected": -282.03814697265625, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.00807519443333149, |
|
"rewards/margins": 0.05831586569547653, |
|
"rewards/rejected": -0.06639105826616287, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -2.9976212978363037, |
|
"eval_logits/rejected": -2.942260980606079, |
|
"eval_logps/chosen": -346.3836364746094, |
|
"eval_logps/rejected": -309.570068359375, |
|
"eval_loss": 0.6711614727973938, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": -0.013390865176916122, |
|
"eval_rewards/margins": 0.05526304244995117, |
|
"eval_rewards/rejected": -0.06865391135215759, |
|
"eval_runtime": 248.4655, |
|
"eval_samples_per_second": 8.049, |
|
"eval_steps_per_second": 0.254, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.926674187348617e-07, |
|
"logits/chosen": -2.9200305938720703, |
|
"logits/rejected": -2.890380382537842, |
|
"logps/chosen": -342.61090087890625, |
|
"logps/rejected": -319.4290466308594, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.016244180500507355, |
|
"rewards/margins": 0.06994068622589111, |
|
"rewards/rejected": -0.08618486672639847, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.91969896978098e-07, |
|
"logits/chosen": -2.929490804672241, |
|
"logits/rejected": -2.892176866531372, |
|
"logps/chosen": -334.2842712402344, |
|
"logps/rejected": -318.81976318359375, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.013753254897892475, |
|
"rewards/margins": 0.05771704763174057, |
|
"rewards/rejected": -0.07147030532360077, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.912412286307025e-07, |
|
"logits/chosen": -2.908052921295166, |
|
"logits/rejected": -2.8586883544921875, |
|
"logps/chosen": -334.84564208984375, |
|
"logps/rejected": -278.5325927734375, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.014625480398535728, |
|
"rewards/margins": 0.06231003254652023, |
|
"rewards/rejected": -0.07693551480770111, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.904815074875438e-07, |
|
"logits/chosen": -2.9351956844329834, |
|
"logits/rejected": -2.882476806640625, |
|
"logps/chosen": -297.5643310546875, |
|
"logps/rejected": -270.01666259765625, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.05379326269030571, |
|
"rewards/margins": 0.038004521280527115, |
|
"rewards/rejected": -0.09179778397083282, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.896908313406355e-07, |
|
"logits/chosen": -2.9030866622924805, |
|
"logits/rejected": -2.8945038318634033, |
|
"logps/chosen": -336.22027587890625, |
|
"logps/rejected": -334.49365234375, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05760540813207626, |
|
"rewards/margins": 0.0570923313498497, |
|
"rewards/rejected": -0.11469773948192596, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.88869301966548e-07, |
|
"logits/chosen": -2.961442232131958, |
|
"logits/rejected": -2.9141147136688232, |
|
"logps/chosen": -337.873046875, |
|
"logps/rejected": -292.5345153808594, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07577836513519287, |
|
"rewards/margins": 0.0639306977391243, |
|
"rewards/rejected": -0.13970905542373657, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.880170251133081e-07, |
|
"logits/chosen": -2.8861405849456787, |
|
"logits/rejected": -2.873425006866455, |
|
"logps/chosen": -290.9164123535156, |
|
"logps/rejected": -303.1529846191406, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0845533087849617, |
|
"rewards/margins": 0.07466953992843628, |
|
"rewards/rejected": -0.15922284126281738, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.871341104867864e-07, |
|
"logits/chosen": -2.9816460609436035, |
|
"logits/rejected": -2.9324944019317627, |
|
"logps/chosen": -363.70806884765625, |
|
"logps/rejected": -322.94158935546875, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07034312188625336, |
|
"rewards/margins": 0.08779822289943695, |
|
"rewards/rejected": -0.15814131498336792, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.862206717365765e-07, |
|
"logits/chosen": -2.8958492279052734, |
|
"logits/rejected": -2.8334128856658936, |
|
"logps/chosen": -334.65643310546875, |
|
"logps/rejected": -287.338134765625, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06828784197568893, |
|
"rewards/margins": 0.08057762682437897, |
|
"rewards/rejected": -0.1488654762506485, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.852768264413655e-07, |
|
"logits/chosen": -2.973942756652832, |
|
"logits/rejected": -2.9239089488983154, |
|
"logps/chosen": -374.6542663574219, |
|
"logps/rejected": -325.08868408203125, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0820477306842804, |
|
"rewards/margins": 0.0960758849978447, |
|
"rewards/rejected": -0.1781235933303833, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.954821825027466, |
|
"eval_logits/rejected": -2.897944450378418, |
|
"eval_logps/chosen": -353.1802062988281, |
|
"eval_logps/rejected": -320.7437744140625, |
|
"eval_loss": 0.6571324467658997, |
|
"eval_rewards/accuracies": 0.6765872836112976, |
|
"eval_rewards/chosen": -0.08135689049959183, |
|
"eval_rewards/margins": 0.09903378784656525, |
|
"eval_rewards/rejected": -0.18039065599441528, |
|
"eval_runtime": 240.8082, |
|
"eval_samples_per_second": 8.305, |
|
"eval_steps_per_second": 0.262, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.843026960937995e-07, |
|
"logits/chosen": -3.005959987640381, |
|
"logits/rejected": -2.960634708404541, |
|
"logps/chosen": -376.323974609375, |
|
"logps/rejected": -351.9470520019531, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0597182922065258, |
|
"rewards/margins": 0.10858096927404404, |
|
"rewards/rejected": -0.16829927265644073, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.832984060848445e-07, |
|
"logits/chosen": -2.883970260620117, |
|
"logits/rejected": -2.8291678428649902, |
|
"logps/chosen": -314.81866455078125, |
|
"logps/rejected": -272.30511474609375, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0751439556479454, |
|
"rewards/margins": 0.10195841640233994, |
|
"rewards/rejected": -0.17710237205028534, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.822640856876464e-07, |
|
"logits/chosen": -2.9058735370635986, |
|
"logits/rejected": -2.877462863922119, |
|
"logps/chosen": -320.7926940917969, |
|
"logps/rejected": -292.57293701171875, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.09108451753854752, |
|
"rewards/margins": 0.0795835480093956, |
|
"rewards/rejected": -0.1706680804491043, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.811998680408907e-07, |
|
"logits/chosen": -2.9125401973724365, |
|
"logits/rejected": -2.903066873550415, |
|
"logps/chosen": -323.45245361328125, |
|
"logps/rejected": -306.0644226074219, |
|
"loss": 0.6524, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13419905304908752, |
|
"rewards/margins": 0.06887730956077576, |
|
"rewards/rejected": -0.2030763328075409, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.801058901316645e-07, |
|
"logits/chosen": -2.8140056133270264, |
|
"logits/rejected": -2.7843329906463623, |
|
"logps/chosen": -326.67822265625, |
|
"logps/rejected": -301.6144714355469, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11106200516223907, |
|
"rewards/margins": 0.14083310961723328, |
|
"rewards/rejected": -0.25189512968063354, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.78982292777824e-07, |
|
"logits/chosen": -2.905949354171753, |
|
"logits/rejected": -2.8415050506591797, |
|
"logps/chosen": -332.97406005859375, |
|
"logps/rejected": -316.91937255859375, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.121739961206913, |
|
"rewards/margins": 0.10120918601751328, |
|
"rewards/rejected": -0.22294914722442627, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.778292206098673e-07, |
|
"logits/chosen": -2.9177937507629395, |
|
"logits/rejected": -2.8414828777313232, |
|
"logps/chosen": -376.5325927734375, |
|
"logps/rejected": -326.441162109375, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11956751346588135, |
|
"rewards/margins": 0.19230221211910248, |
|
"rewards/rejected": -0.311869740486145, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.7664682205231877e-07, |
|
"logits/chosen": -2.83701753616333, |
|
"logits/rejected": -2.8011679649353027, |
|
"logps/chosen": -291.61614990234375, |
|
"logps/rejected": -290.9491882324219, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.15953049063682556, |
|
"rewards/margins": 0.07381532341241837, |
|
"rewards/rejected": -0.23334583640098572, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.754352493046224e-07, |
|
"logits/chosen": -2.9087753295898438, |
|
"logits/rejected": -2.8327887058258057, |
|
"logps/chosen": -348.1717224121094, |
|
"logps/rejected": -318.3050842285156, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.16357417404651642, |
|
"rewards/margins": 0.13152363896369934, |
|
"rewards/rejected": -0.29509779810905457, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.741946583215514e-07, |
|
"logits/chosen": -2.8774545192718506, |
|
"logits/rejected": -2.8319413661956787, |
|
"logps/chosen": -337.0196838378906, |
|
"logps/rejected": -319.65899658203125, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17791475355625153, |
|
"rewards/margins": 0.15151168406009674, |
|
"rewards/rejected": -0.32942643761634827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.9120142459869385, |
|
"eval_logits/rejected": -2.8540806770324707, |
|
"eval_logps/chosen": -364.5343933105469, |
|
"eval_logps/rejected": -337.2181396484375, |
|
"eval_loss": 0.6447514891624451, |
|
"eval_rewards/accuracies": 0.6726190447807312, |
|
"eval_rewards/chosen": -0.19489827752113342, |
|
"eval_rewards/margins": 0.1502356082201004, |
|
"eval_rewards/rejected": -0.345133900642395, |
|
"eval_runtime": 248.0229, |
|
"eval_samples_per_second": 8.064, |
|
"eval_steps_per_second": 0.254, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.729252087931332e-07, |
|
"logits/chosen": -2.8862080574035645, |
|
"logits/rejected": -2.8038384914398193, |
|
"logps/chosen": -377.8186340332031, |
|
"logps/rejected": -323.3064880371094, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17217716574668884, |
|
"rewards/margins": 0.19356265664100647, |
|
"rewards/rejected": -0.3657398223876953, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.716270641240941e-07, |
|
"logits/chosen": -2.8480124473571777, |
|
"logits/rejected": -2.811933994293213, |
|
"logps/chosen": -320.3323974609375, |
|
"logps/rejected": -317.38763427734375, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.148501917719841, |
|
"rewards/margins": 0.18107430636882782, |
|
"rewards/rejected": -0.3295762240886688, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.703003914128258e-07, |
|
"logits/chosen": -2.847687244415283, |
|
"logits/rejected": -2.8126273155212402, |
|
"logps/chosen": -355.64996337890625, |
|
"logps/rejected": -330.8966369628906, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.15957526862621307, |
|
"rewards/margins": 0.15057061612606049, |
|
"rewards/rejected": -0.31014585494995117, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.689453614298758e-07, |
|
"logits/chosen": -2.8674135208129883, |
|
"logits/rejected": -2.8252620697021484, |
|
"logps/chosen": -375.3172302246094, |
|
"logps/rejected": -377.0331726074219, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.169221892952919, |
|
"rewards/margins": 0.15300126373767853, |
|
"rewards/rejected": -0.32222312688827515, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.6756214859596645e-07, |
|
"logits/chosen": -2.8661575317382812, |
|
"logits/rejected": -2.80385684967041, |
|
"logps/chosen": -347.62969970703125, |
|
"logps/rejected": -327.3890380859375, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24611690640449524, |
|
"rewards/margins": 0.16446547210216522, |
|
"rewards/rejected": -0.41058236360549927, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.661509309595426e-07, |
|
"logits/chosen": -2.8666725158691406, |
|
"logits/rejected": -2.801342487335205, |
|
"logps/chosen": -344.13330078125, |
|
"logps/rejected": -313.7489929199219, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.24062931537628174, |
|
"rewards/margins": 0.20030991733074188, |
|
"rewards/rejected": -0.4409392476081848, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.647118901738537e-07, |
|
"logits/chosen": -2.8669309616088867, |
|
"logits/rejected": -2.7961795330047607, |
|
"logps/chosen": -360.96478271484375, |
|
"logps/rejected": -339.89886474609375, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.25550180673599243, |
|
"rewards/margins": 0.14098383486270905, |
|
"rewards/rejected": -0.39648565649986267, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.632452114735706e-07, |
|
"logits/chosen": -2.814235210418701, |
|
"logits/rejected": -2.757559061050415, |
|
"logps/chosen": -350.2564697265625, |
|
"logps/rejected": -310.9596252441406, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.24496586620807648, |
|
"rewards/margins": 0.14976012706756592, |
|
"rewards/rejected": -0.3947260081768036, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.617510836509424e-07, |
|
"logits/chosen": -2.8700594902038574, |
|
"logits/rejected": -2.832390069961548, |
|
"logps/chosen": -341.0650329589844, |
|
"logps/rejected": -324.2391357421875, |
|
"loss": 0.638, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1659560203552246, |
|
"rewards/margins": 0.10084130614995956, |
|
"rewards/rejected": -0.26679736375808716, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.602296990314949e-07, |
|
"logits/chosen": -2.8588919639587402, |
|
"logits/rejected": -2.809976100921631, |
|
"logps/chosen": -410.419189453125, |
|
"logps/rejected": -396.41949462890625, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17672498524188995, |
|
"rewards/margins": 0.19581544399261475, |
|
"rewards/rejected": -0.3725404143333435, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.8732762336730957, |
|
"eval_logits/rejected": -2.814659595489502, |
|
"eval_logps/chosen": -368.0754089355469, |
|
"eval_logps/rejected": -344.1863098144531, |
|
"eval_loss": 0.6372315883636475, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": -0.2303084284067154, |
|
"eval_rewards/margins": 0.18450765311717987, |
|
"eval_rewards/rejected": -0.41481611132621765, |
|
"eval_runtime": 248.1999, |
|
"eval_samples_per_second": 8.058, |
|
"eval_steps_per_second": 0.254, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.5868125344927397e-07, |
|
"logits/chosen": -2.881749153137207, |
|
"logits/rejected": -2.810695171356201, |
|
"logps/chosen": -356.07464599609375, |
|
"logps/rejected": -302.9112854003906, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.22698974609375, |
|
"rewards/margins": 0.1910962164402008, |
|
"rewards/rejected": -0.4180859625339508, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.5710594622163814e-07, |
|
"logits/chosen": -2.8739027976989746, |
|
"logits/rejected": -2.8048624992370605, |
|
"logps/chosen": -364.158935546875, |
|
"logps/rejected": -319.1551208496094, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.29370805621147156, |
|
"rewards/margins": 0.15704287588596344, |
|
"rewards/rejected": -0.4507509171962738, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.555039801236017e-07, |
|
"logits/chosen": -2.746525287628174, |
|
"logits/rejected": -2.707292079925537, |
|
"logps/chosen": -338.1776123046875, |
|
"logps/rejected": -335.0467224121094, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.21730093657970428, |
|
"rewards/margins": 0.19230973720550537, |
|
"rewards/rejected": -0.40961068868637085, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.538755613617336e-07, |
|
"logits/chosen": -2.791337251663208, |
|
"logits/rejected": -2.7343456745147705, |
|
"logps/chosen": -348.16802978515625, |
|
"logps/rejected": -334.0476989746094, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.23033007979393005, |
|
"rewards/margins": 0.19607330858707428, |
|
"rewards/rejected": -0.42640337347984314, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.522208995476145e-07, |
|
"logits/chosen": -2.889570713043213, |
|
"logits/rejected": -2.7822928428649902, |
|
"logps/chosen": -401.09234619140625, |
|
"logps/rejected": -364.5537414550781, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.23586571216583252, |
|
"rewards/margins": 0.25756314396858215, |
|
"rewards/rejected": -0.4934287965297699, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.50540207670855e-07, |
|
"logits/chosen": -2.8644819259643555, |
|
"logits/rejected": -2.8281166553497314, |
|
"logps/chosen": -379.15887451171875, |
|
"logps/rejected": -360.2496337890625, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24248214066028595, |
|
"rewards/margins": 0.2142285406589508, |
|
"rewards/rejected": -0.45671066641807556, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.488337020716798e-07, |
|
"logits/chosen": -2.8308560848236084, |
|
"logits/rejected": -2.8049094676971436, |
|
"logps/chosen": -365.39263916015625, |
|
"logps/rejected": -360.0053405761719, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.26678240299224854, |
|
"rewards/margins": 0.21639254689216614, |
|
"rewards/rejected": -0.4831749498844147, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.4710160241307993e-07, |
|
"logits/chosen": -2.765575885772705, |
|
"logits/rejected": -2.7461342811584473, |
|
"logps/chosen": -347.0950012207031, |
|
"logps/rejected": -324.2687683105469, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3175194561481476, |
|
"rewards/margins": 0.09245363622903824, |
|
"rewards/rejected": -0.4099730849266052, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.453441316525376e-07, |
|
"logits/chosen": -2.7600603103637695, |
|
"logits/rejected": -2.700854539871216, |
|
"logps/chosen": -349.8055725097656, |
|
"logps/rejected": -332.6409606933594, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3004549443721771, |
|
"rewards/margins": 0.1717255860567093, |
|
"rewards/rejected": -0.4721805155277252, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.435615160133268e-07, |
|
"logits/chosen": -2.791268825531006, |
|
"logits/rejected": -2.7100212574005127, |
|
"logps/chosen": -335.6754150390625, |
|
"logps/rejected": -332.56280517578125, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.34759417176246643, |
|
"rewards/margins": 0.21199102699756622, |
|
"rewards/rejected": -0.5595852136611938, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -2.8268723487854004, |
|
"eval_logits/rejected": -2.766594648361206, |
|
"eval_logps/chosen": -373.9845275878906, |
|
"eval_logps/rejected": -353.7791748046875, |
|
"eval_loss": 0.631250262260437, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": -0.28939977288246155, |
|
"eval_rewards/margins": 0.22134484350681305, |
|
"eval_rewards/rejected": -0.5107446312904358, |
|
"eval_runtime": 250.2322, |
|
"eval_samples_per_second": 7.993, |
|
"eval_steps_per_second": 0.252, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.4175398495539397e-07, |
|
"logits/chosen": -2.8154489994049072, |
|
"logits/rejected": -2.7193312644958496, |
|
"logps/chosen": -390.2218322753906, |
|
"logps/rejected": -323.1303405761719, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.25868695974349976, |
|
"rewards/margins": 0.22367513179779053, |
|
"rewards/rejected": -0.48236212134361267, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.3992177114582117e-07, |
|
"logits/chosen": -2.8137097358703613, |
|
"logits/rejected": -2.7677464485168457, |
|
"logps/chosen": -371.6590576171875, |
|
"logps/rejected": -349.91522216796875, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24545975029468536, |
|
"rewards/margins": 0.20291368663311005, |
|
"rewards/rejected": -0.4483734965324402, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.380651104288776e-07, |
|
"logits/chosen": -2.79219913482666, |
|
"logits/rejected": -2.7212865352630615, |
|
"logps/chosen": -383.16070556640625, |
|
"logps/rejected": -343.8384094238281, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2518269717693329, |
|
"rewards/margins": 0.21711906790733337, |
|
"rewards/rejected": -0.46894603967666626, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.3618424179566094e-07, |
|
"logits/chosen": -2.7794361114501953, |
|
"logits/rejected": -2.7013282775878906, |
|
"logps/chosen": -409.0721130371094, |
|
"logps/rejected": -345.78033447265625, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.23068316280841827, |
|
"rewards/margins": 0.23051229119300842, |
|
"rewards/rejected": -0.4611954689025879, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.3427940735333436e-07, |
|
"logits/chosen": -2.7824223041534424, |
|
"logits/rejected": -2.7694931030273438, |
|
"logps/chosen": -373.7041931152344, |
|
"logps/rejected": -375.1035461425781, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.33885717391967773, |
|
"rewards/margins": 0.19208331406116486, |
|
"rewards/rejected": -0.5309404134750366, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.323508522939624e-07, |
|
"logits/chosen": -2.750168800354004, |
|
"logits/rejected": -2.710522174835205, |
|
"logps/chosen": -366.13519287109375, |
|
"logps/rejected": -355.31829833984375, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4173508286476135, |
|
"rewards/margins": 0.22161607444286346, |
|
"rewards/rejected": -0.6389669179916382, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.3039882486294966e-07, |
|
"logits/chosen": -2.7729387283325195, |
|
"logits/rejected": -2.747562885284424, |
|
"logps/chosen": -393.59881591796875, |
|
"logps/rejected": -406.81768798828125, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.38891881704330444, |
|
"rewards/margins": 0.215108722448349, |
|
"rewards/rejected": -0.604027509689331, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.2842357632708603e-07, |
|
"logits/chosen": -2.7065768241882324, |
|
"logits/rejected": -2.6670963764190674, |
|
"logps/chosen": -340.0065002441406, |
|
"logps/rejected": -324.4345397949219, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.2909296154975891, |
|
"rewards/margins": 0.1907232701778412, |
|
"rewards/rejected": -0.4816528856754303, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.264253609422038e-07, |
|
"logits/chosen": -2.7775344848632812, |
|
"logits/rejected": -2.7437610626220703, |
|
"logps/chosen": -391.9022521972656, |
|
"logps/rejected": -384.43426513671875, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2783506214618683, |
|
"rewards/margins": 0.23514008522033691, |
|
"rewards/rejected": -0.5134907960891724, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.244044359204495e-07, |
|
"logits/chosen": -2.713089942932129, |
|
"logits/rejected": -2.6497960090637207, |
|
"logps/chosen": -433.2242126464844, |
|
"logps/rejected": -373.65130615234375, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.29666823148727417, |
|
"rewards/margins": 0.24842536449432373, |
|
"rewards/rejected": -0.5450935959815979, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.7681221961975098, |
|
"eval_logits/rejected": -2.7056021690368652, |
|
"eval_logps/chosen": -381.1849060058594, |
|
"eval_logps/rejected": -364.1535949707031, |
|
"eval_loss": 0.6248704195022583, |
|
"eval_rewards/accuracies": 0.6845238208770752, |
|
"eval_rewards/chosen": -0.3614034950733185, |
|
"eval_rewards/margins": 0.2530852258205414, |
|
"eval_rewards/rejected": -0.6144886612892151, |
|
"eval_runtime": 249.0101, |
|
"eval_samples_per_second": 8.032, |
|
"eval_steps_per_second": 0.253, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.223610613971753e-07, |
|
"logits/chosen": -2.7306289672851562, |
|
"logits/rejected": -2.6280109882354736, |
|
"logps/chosen": -378.93804931640625, |
|
"logps/rejected": -328.80938720703125, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3830115795135498, |
|
"rewards/margins": 0.13664865493774414, |
|
"rewards/rejected": -0.519660234451294, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.2029550039745396e-07, |
|
"logits/chosen": -2.674085855484009, |
|
"logits/rejected": -2.6277194023132324, |
|
"logps/chosen": -331.6683654785156, |
|
"logps/rejected": -327.3447570800781, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3621932566165924, |
|
"rewards/margins": 0.2161780148744583, |
|
"rewards/rejected": -0.5783712267875671, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.1820801880222236e-07, |
|
"logits/chosen": -2.6937668323516846, |
|
"logits/rejected": -2.678345203399658, |
|
"logps/chosen": -336.51519775390625, |
|
"logps/rejected": -338.04058837890625, |
|
"loss": 0.6178, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.372277170419693, |
|
"rewards/margins": 0.2381594181060791, |
|
"rewards/rejected": -0.6104366183280945, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.160988853140567e-07, |
|
"logits/chosen": -2.68011212348938, |
|
"logits/rejected": -2.6486706733703613, |
|
"logps/chosen": -405.85235595703125, |
|
"logps/rejected": -377.17059326171875, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.44021159410476685, |
|
"rewards/margins": 0.13439835608005524, |
|
"rewards/rejected": -0.5746098756790161, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.1396837142258507e-07, |
|
"logits/chosen": -2.757357597351074, |
|
"logits/rejected": -2.696622848510742, |
|
"logps/chosen": -403.8123779296875, |
|
"logps/rejected": -353.694091796875, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.355672687292099, |
|
"rewards/margins": 0.28402405977249146, |
|
"rewards/rejected": -0.6396967172622681, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.1181675136954106e-07, |
|
"logits/chosen": -2.753262758255005, |
|
"logits/rejected": -2.715118646621704, |
|
"logps/chosen": -377.8106384277344, |
|
"logps/rejected": -364.499755859375, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4113299250602722, |
|
"rewards/margins": 0.2852802276611328, |
|
"rewards/rejected": -0.6966102123260498, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.09644302113463e-07, |
|
"logits/chosen": -2.6933841705322266, |
|
"logits/rejected": -2.671048641204834, |
|
"logps/chosen": -337.9107666015625, |
|
"logps/rejected": -356.3653564453125, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.40935665369033813, |
|
"rewards/margins": 0.30245229601860046, |
|
"rewards/rejected": -0.7118089199066162, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.0745130329404365e-07, |
|
"logits/chosen": -2.701093912124634, |
|
"logits/rejected": -2.63181471824646, |
|
"logps/chosen": -388.31011962890625, |
|
"logps/rejected": -363.6514587402344, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.504043459892273, |
|
"rewards/margins": 0.22630378603935242, |
|
"rewards/rejected": -0.7303472757339478, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.052380371961347e-07, |
|
"logits/chosen": -2.684615135192871, |
|
"logits/rejected": -2.652864933013916, |
|
"logps/chosen": -377.3784484863281, |
|
"logps/rejected": -379.3691711425781, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5067042112350464, |
|
"rewards/margins": 0.17588753998279572, |
|
"rewards/rejected": -0.6825917363166809, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.030047887134108e-07, |
|
"logits/chosen": -2.636793851852417, |
|
"logits/rejected": -2.5783472061157227, |
|
"logps/chosen": -407.3863220214844, |
|
"logps/rejected": -394.4078674316406, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.48934751749038696, |
|
"rewards/margins": 0.17317138612270355, |
|
"rewards/rejected": -0.6625188589096069, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -2.720741033554077, |
|
"eval_logits/rejected": -2.65683913230896, |
|
"eval_logps/chosen": -397.63446044921875, |
|
"eval_logps/rejected": -382.7857360839844, |
|
"eval_loss": 0.6203979253768921, |
|
"eval_rewards/accuracies": 0.6845238208770752, |
|
"eval_rewards/chosen": -0.5258990526199341, |
|
"eval_rewards/margins": 0.274911493062973, |
|
"eval_rewards/rejected": -0.8008105158805847, |
|
"eval_runtime": 244.5075, |
|
"eval_samples_per_second": 8.18, |
|
"eval_steps_per_second": 0.258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.007518453116979e-07, |
|
"logits/chosen": -2.6805593967437744, |
|
"logits/rejected": -2.6232123374938965, |
|
"logps/chosen": -357.9183044433594, |
|
"logps/rejected": -359.6230773925781, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5844573974609375, |
|
"rewards/margins": 0.2230493128299713, |
|
"rewards/rejected": -0.8075065612792969, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.984794969919702e-07, |
|
"logits/chosen": -2.6928341388702393, |
|
"logits/rejected": -2.6027913093566895, |
|
"logps/chosen": -404.1787414550781, |
|
"logps/rejected": -385.9433288574219, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5911322832107544, |
|
"rewards/margins": 0.29310551285743713, |
|
"rewards/rejected": -0.8842377662658691, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.96188036253021e-07, |
|
"logits/chosen": -2.7151689529418945, |
|
"logits/rejected": -2.6285009384155273, |
|
"logps/chosen": -379.22552490234375, |
|
"logps/rejected": -366.44720458984375, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.46869564056396484, |
|
"rewards/margins": 0.3491496443748474, |
|
"rewards/rejected": -0.8178452253341675, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.938777580538119e-07, |
|
"logits/chosen": -2.695061683654785, |
|
"logits/rejected": -2.608922243118286, |
|
"logps/chosen": -434.6171875, |
|
"logps/rejected": -423.9627380371094, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5053583383560181, |
|
"rewards/margins": 0.35829007625579834, |
|
"rewards/rejected": -0.8636484146118164, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.9154895977550585e-07, |
|
"logits/chosen": -2.7094149589538574, |
|
"logits/rejected": -2.6375811100006104, |
|
"logps/chosen": -389.6069641113281, |
|
"logps/rejected": -375.0043029785156, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.4657767415046692, |
|
"rewards/margins": 0.2953377366065979, |
|
"rewards/rejected": -0.7611144185066223, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.8920194118318725e-07, |
|
"logits/chosen": -2.649557590484619, |
|
"logits/rejected": -2.591104030609131, |
|
"logps/chosen": -365.44061279296875, |
|
"logps/rejected": -376.2007141113281, |
|
"loss": 0.6041, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3993779420852661, |
|
"rewards/margins": 0.3410153388977051, |
|
"rewards/rejected": -0.740393340587616, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.868370043872768e-07, |
|
"logits/chosen": -2.683077812194824, |
|
"logits/rejected": -2.6237359046936035, |
|
"logps/chosen": -416.84088134765625, |
|
"logps/rejected": -408.75482177734375, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.43280115723609924, |
|
"rewards/margins": 0.34043940901756287, |
|
"rewards/rejected": -0.7732406258583069, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.844544538046425e-07, |
|
"logits/chosen": -2.617851972579956, |
|
"logits/rejected": -2.592369556427002, |
|
"logps/chosen": -354.9996032714844, |
|
"logps/rejected": -388.5142822265625, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4673759341239929, |
|
"rewards/margins": 0.39914676547050476, |
|
"rewards/rejected": -0.8665226101875305, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.8205459611941577e-07, |
|
"logits/chosen": -2.669250011444092, |
|
"logits/rejected": -2.602257490158081, |
|
"logps/chosen": -417.526611328125, |
|
"logps/rejected": -391.24456787109375, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4427572190761566, |
|
"rewards/margins": 0.2923399806022644, |
|
"rewards/rejected": -0.7350972890853882, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.7963774024351423e-07, |
|
"logits/chosen": -2.6895923614501953, |
|
"logits/rejected": -2.6821107864379883, |
|
"logps/chosen": -368.69537353515625, |
|
"logps/rejected": -377.12225341796875, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.48284831643104553, |
|
"rewards/margins": 0.22640752792358398, |
|
"rewards/rejected": -0.7092558145523071, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_logits/chosen": -2.6968600749969482, |
|
"eval_logits/rejected": -2.6322262287139893, |
|
"eval_logps/chosen": -396.68231201171875, |
|
"eval_logps/rejected": -384.4855651855469, |
|
"eval_loss": 0.6144962310791016, |
|
"eval_rewards/accuracies": 0.6944444179534912, |
|
"eval_rewards/chosen": -0.5163776874542236, |
|
"eval_rewards/margins": 0.30143067240715027, |
|
"eval_rewards/rejected": -0.8178083896636963, |
|
"eval_runtime": 246.7178, |
|
"eval_samples_per_second": 8.106, |
|
"eval_steps_per_second": 0.255, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.7720419727687865e-07, |
|
"logits/chosen": -2.6810877323150635, |
|
"logits/rejected": -2.6230504512786865, |
|
"logps/chosen": -413.1744689941406, |
|
"logps/rejected": -382.6622314453125, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5526056289672852, |
|
"rewards/margins": 0.2373414933681488, |
|
"rewards/rejected": -0.7899471521377563, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.747542804674274e-07, |
|
"logits/chosen": -2.661088228225708, |
|
"logits/rejected": -2.6390786170959473, |
|
"logps/chosen": -398.89813232421875, |
|
"logps/rejected": -401.23431396484375, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.46416956186294556, |
|
"rewards/margins": 0.30864232778549194, |
|
"rewards/rejected": -0.7728118300437927, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.7228830517073527e-07, |
|
"logits/chosen": -2.622685670852661, |
|
"logits/rejected": -2.5972726345062256, |
|
"logps/chosen": -360.4078063964844, |
|
"logps/rejected": -360.20269775390625, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3580246567726135, |
|
"rewards/margins": 0.3438374400138855, |
|
"rewards/rejected": -0.7018621563911438, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.698065888094405e-07, |
|
"logits/chosen": -2.6089298725128174, |
|
"logits/rejected": -2.6035428047180176, |
|
"logps/chosen": -365.2914123535156, |
|
"logps/rejected": -406.7452087402344, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.43625983595848083, |
|
"rewards/margins": 0.285591185092926, |
|
"rewards/rejected": -0.7218509912490845, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.6730945083238594e-07, |
|
"logits/chosen": -2.6788554191589355, |
|
"logits/rejected": -2.60333251953125, |
|
"logps/chosen": -396.67388916015625, |
|
"logps/rejected": -369.4822998046875, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4486874043941498, |
|
"rewards/margins": 0.3007965087890625, |
|
"rewards/rejected": -0.7494838833808899, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.64797212673499e-07, |
|
"logits/chosen": -2.663841962814331, |
|
"logits/rejected": -2.530757427215576, |
|
"logps/chosen": -434.9602966308594, |
|
"logps/rejected": -395.90704345703125, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.434063196182251, |
|
"rewards/margins": 0.4063114523887634, |
|
"rewards/rejected": -0.840374767780304, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.6227019771041664e-07, |
|
"logits/chosen": -2.631474018096924, |
|
"logits/rejected": -2.5514721870422363, |
|
"logps/chosen": -367.2853698730469, |
|
"logps/rejected": -334.1014709472656, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.45699542760849, |
|
"rewards/margins": 0.30584800243377686, |
|
"rewards/rejected": -0.7628434896469116, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.5972873122285994e-07, |
|
"logits/chosen": -2.6002821922302246, |
|
"logits/rejected": -2.5638835430145264, |
|
"logps/chosen": -362.3927307128906, |
|
"logps/rejected": -384.8812561035156, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5179690718650818, |
|
"rewards/margins": 0.29387664794921875, |
|
"rewards/rejected": -0.8118457794189453, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.571731403507635e-07, |
|
"logits/chosen": -2.6326613426208496, |
|
"logits/rejected": -2.5637834072113037, |
|
"logps/chosen": -416.04046630859375, |
|
"logps/rejected": -411.080810546875, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5498114228248596, |
|
"rewards/margins": 0.36757707595825195, |
|
"rewards/rejected": -0.9173885583877563, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.5460375405216603e-07, |
|
"logits/chosen": -2.665194034576416, |
|
"logits/rejected": -2.601635456085205, |
|
"logps/chosen": -397.65960693359375, |
|
"logps/rejected": -401.44976806640625, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6755828261375427, |
|
"rewards/margins": 0.26671653985977173, |
|
"rewards/rejected": -0.9422993659973145, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_logits/chosen": -2.668804883956909, |
|
"eval_logits/rejected": -2.6023752689361572, |
|
"eval_logps/chosen": -396.83331298828125, |
|
"eval_logps/rejected": -385.9577941894531, |
|
"eval_loss": 0.6116264462471008, |
|
"eval_rewards/accuracies": 0.692460298538208, |
|
"eval_rewards/chosen": -0.5178873538970947, |
|
"eval_rewards/margins": 0.31464365124702454, |
|
"eval_rewards/rejected": -0.8325309753417969, |
|
"eval_runtime": 250.5629, |
|
"eval_samples_per_second": 7.982, |
|
"eval_steps_per_second": 0.251, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.520209030608662e-07, |
|
"logits/chosen": -2.6696746349334717, |
|
"logits/rejected": -2.6097488403320312, |
|
"logps/chosen": -393.92529296875, |
|
"logps/rejected": -380.6165466308594, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.47735723853111267, |
|
"rewards/margins": 0.25016671419143677, |
|
"rewards/rejected": -0.727523922920227, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.4942491984385066e-07, |
|
"logits/chosen": -2.666564464569092, |
|
"logits/rejected": -2.6042842864990234, |
|
"logps/chosen": -403.80145263671875, |
|
"logps/rejected": -381.906982421875, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4305171072483063, |
|
"rewards/margins": 0.3264329731464386, |
|
"rewards/rejected": -0.7569500207901001, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.468161385584982e-07, |
|
"logits/chosen": -2.6324963569641113, |
|
"logits/rejected": -2.5880188941955566, |
|
"logps/chosen": -381.2325744628906, |
|
"logps/rejected": -379.158203125, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3703802824020386, |
|
"rewards/margins": 0.34355229139328003, |
|
"rewards/rejected": -0.7139325141906738, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.441948950095672e-07, |
|
"logits/chosen": -2.6656455993652344, |
|
"logits/rejected": -2.5824806690216064, |
|
"logps/chosen": -407.565673828125, |
|
"logps/rejected": -368.3275451660156, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.43670734763145447, |
|
"rewards/margins": 0.37962377071380615, |
|
"rewards/rejected": -0.8163310885429382, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.4156152660596994e-07, |
|
"logits/chosen": -2.6464781761169434, |
|
"logits/rejected": -2.5937983989715576, |
|
"logps/chosen": -402.5704345703125, |
|
"logps/rejected": -392.44952392578125, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5296522974967957, |
|
"rewards/margins": 0.3139727711677551, |
|
"rewards/rejected": -0.8436250686645508, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3891637231734125e-07, |
|
"logits/chosen": -2.6544508934020996, |
|
"logits/rejected": -2.595885753631592, |
|
"logps/chosen": -386.92401123046875, |
|
"logps/rejected": -395.77642822265625, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.44389739632606506, |
|
"rewards/margins": 0.35769903659820557, |
|
"rewards/rejected": -0.801596462726593, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3625977263040643e-07, |
|
"logits/chosen": -2.666747570037842, |
|
"logits/rejected": -2.5960164070129395, |
|
"logps/chosen": -414.5166015625, |
|
"logps/rejected": -367.41424560546875, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4271417260169983, |
|
"rewards/margins": 0.2684404253959656, |
|
"rewards/rejected": -0.6955822110176086, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.3359206950515266e-07, |
|
"logits/chosen": -2.6162502765655518, |
|
"logits/rejected": -2.5295591354370117, |
|
"logps/chosen": -400.8308410644531, |
|
"logps/rejected": -366.78558349609375, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5077975988388062, |
|
"rewards/margins": 0.36180368065834045, |
|
"rewards/rejected": -0.8696013689041138, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.3091360633081236e-07, |
|
"logits/chosen": -2.687870502471924, |
|
"logits/rejected": -2.6193759441375732, |
|
"logps/chosen": -402.45098876953125, |
|
"logps/rejected": -392.7806701660156, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5487203598022461, |
|
"rewards/margins": 0.37653276324272156, |
|
"rewards/rejected": -0.9252530932426453, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2822472788166146e-07, |
|
"logits/chosen": -2.6522316932678223, |
|
"logits/rejected": -2.5871059894561768, |
|
"logps/chosen": -410.39764404296875, |
|
"logps/rejected": -405.1246643066406, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.47655636072158813, |
|
"rewards/margins": 0.35264235734939575, |
|
"rewards/rejected": -0.8291987180709839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_logits/chosen": -2.6376395225524902, |
|
"eval_logits/rejected": -2.570849657058716, |
|
"eval_logps/chosen": -403.4270935058594, |
|
"eval_logps/rejected": -394.70733642578125, |
|
"eval_loss": 0.6083069443702698, |
|
"eval_rewards/accuracies": 0.704365074634552, |
|
"eval_rewards/chosen": -0.5838249325752258, |
|
"eval_rewards/margins": 0.33620160818099976, |
|
"eval_rewards/rejected": -0.9200265407562256, |
|
"eval_runtime": 246.1597, |
|
"eval_samples_per_second": 8.125, |
|
"eval_steps_per_second": 0.256, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2552578027263955e-07, |
|
"logits/chosen": -2.623401165008545, |
|
"logits/rejected": -2.5443992614746094, |
|
"logps/chosen": -361.82171630859375, |
|
"logps/rejected": -374.11334228515625, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5367065668106079, |
|
"rewards/margins": 0.31009745597839355, |
|
"rewards/rejected": -0.8468039631843567, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.228171109147982e-07, |
|
"logits/chosen": -2.643584728240967, |
|
"logits/rejected": -2.5270514488220215, |
|
"logps/chosen": -410.5106506347656, |
|
"logps/rejected": -380.1089172363281, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.44848957657814026, |
|
"rewards/margins": 0.35878002643585205, |
|
"rewards/rejected": -0.8072696924209595, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.2009906847058125e-07, |
|
"logits/chosen": -2.639606475830078, |
|
"logits/rejected": -2.5487468242645264, |
|
"logps/chosen": -424.10595703125, |
|
"logps/rejected": -401.09674072265625, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.49688854813575745, |
|
"rewards/margins": 0.3513595461845398, |
|
"rewards/rejected": -0.8482481241226196, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1737200280894516e-07, |
|
"logits/chosen": -2.5839312076568604, |
|
"logits/rejected": -2.5728745460510254, |
|
"logps/chosen": -371.0592041015625, |
|
"logps/rejected": -380.7933349609375, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.523228645324707, |
|
"rewards/margins": 0.30655089020729065, |
|
"rewards/rejected": -0.8297795057296753, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.146362649603233e-07, |
|
"logits/chosen": -2.596813917160034, |
|
"logits/rejected": -2.546060562133789, |
|
"logps/chosen": -406.51953125, |
|
"logps/rejected": -392.93890380859375, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5865792036056519, |
|
"rewards/margins": 0.3280216157436371, |
|
"rewards/rejected": -0.9146007299423218, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.118922070714408e-07, |
|
"logits/chosen": -2.569214105606079, |
|
"logits/rejected": -2.54783034324646, |
|
"logps/chosen": -347.00006103515625, |
|
"logps/rejected": -362.70562744140625, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5019794702529907, |
|
"rewards/margins": 0.271824449300766, |
|
"rewards/rejected": -0.7738040089607239, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.091401823599865e-07, |
|
"logits/chosen": -2.594982624053955, |
|
"logits/rejected": -2.5411128997802734, |
|
"logps/chosen": -371.3302917480469, |
|
"logps/rejected": -372.2278747558594, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3963788151741028, |
|
"rewards/margins": 0.3730164170265198, |
|
"rewards/rejected": -0.7693952322006226, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.063805450691458e-07, |
|
"logits/chosen": -2.655991315841675, |
|
"logits/rejected": -2.56858491897583, |
|
"logps/chosen": -389.52099609375, |
|
"logps/rejected": -369.41400146484375, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.39786046743392944, |
|
"rewards/margins": 0.3743780255317688, |
|
"rewards/rejected": -0.7722384929656982, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.036136504220025e-07, |
|
"logits/chosen": -2.6017796993255615, |
|
"logits/rejected": -2.544804096221924, |
|
"logps/chosen": -393.8155517578125, |
|
"logps/rejected": -385.86541748046875, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.46426922082901, |
|
"rewards/margins": 0.35931476950645447, |
|
"rewards/rejected": -0.8235839605331421, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0083985457581415e-07, |
|
"logits/chosen": -2.5615665912628174, |
|
"logits/rejected": -2.544076681137085, |
|
"logps/chosen": -372.3790588378906, |
|
"logps/rejected": -374.0905456542969, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5213760137557983, |
|
"rewards/margins": 0.2982157766819, |
|
"rewards/rejected": -0.8195918202400208, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_logits/chosen": -2.613380193710327, |
|
"eval_logits/rejected": -2.545426368713379, |
|
"eval_logps/chosen": -397.10211181640625, |
|
"eval_logps/rejected": -387.2309875488281, |
|
"eval_loss": 0.6077432632446289, |
|
"eval_rewards/accuracies": 0.7103174328804016, |
|
"eval_rewards/chosen": -0.5205760598182678, |
|
"eval_rewards/margins": 0.32468709349632263, |
|
"eval_rewards/rejected": -0.8452631831169128, |
|
"eval_runtime": 247.7632, |
|
"eval_samples_per_second": 8.072, |
|
"eval_steps_per_second": 0.254, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.9805951457616684e-07, |
|
"logits/chosen": -2.5732953548431396, |
|
"logits/rejected": -2.505056381225586, |
|
"logps/chosen": -370.58367919921875, |
|
"logps/rejected": -377.2376708984375, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.4765905737876892, |
|
"rewards/margins": 0.30376702547073364, |
|
"rewards/rejected": -0.7803575396537781, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.952729883110164e-07, |
|
"logits/chosen": -2.560969829559326, |
|
"logits/rejected": -2.4919774532318115, |
|
"logps/chosen": -375.130859375, |
|
"logps/rejected": -401.1944580078125, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5600558519363403, |
|
"rewards/margins": 0.3885023593902588, |
|
"rewards/rejected": -0.9485582113265991, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.924806344646205e-07, |
|
"logits/chosen": -2.5645477771759033, |
|
"logits/rejected": -2.5073866844177246, |
|
"logps/chosen": -408.23126220703125, |
|
"logps/rejected": -424.2571716308594, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6115280389785767, |
|
"rewards/margins": 0.380671888589859, |
|
"rewards/rejected": -0.9921998977661133, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.896828124713684e-07, |
|
"logits/chosen": -2.549287796020508, |
|
"logits/rejected": -2.5001778602600098, |
|
"logps/chosen": -386.74102783203125, |
|
"logps/rejected": -394.2340393066406, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6311862468719482, |
|
"rewards/margins": 0.3289794921875, |
|
"rewards/rejected": -0.960165798664093, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.8687988246951437e-07, |
|
"logits/chosen": -2.5453922748565674, |
|
"logits/rejected": -2.518723726272583, |
|
"logps/chosen": -342.259033203125, |
|
"logps/rejected": -360.65850830078125, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.47795337438583374, |
|
"rewards/margins": 0.33936068415641785, |
|
"rewards/rejected": -0.8173141479492188, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.8407220525482047e-07, |
|
"logits/chosen": -2.5927088260650635, |
|
"logits/rejected": -2.489152431488037, |
|
"logps/chosen": -413.27105712890625, |
|
"logps/rejected": -376.00689697265625, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4944841265678406, |
|
"rewards/margins": 0.2662241756916046, |
|
"rewards/rejected": -0.7607083916664124, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.81260142234114e-07, |
|
"logits/chosen": -2.6516880989074707, |
|
"logits/rejected": -2.5429511070251465, |
|
"logps/chosen": -403.22454833984375, |
|
"logps/rejected": -371.4908142089844, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.46684926748275757, |
|
"rewards/margins": 0.3777723014354706, |
|
"rewards/rejected": -0.8446215391159058, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.7844405537876766e-07, |
|
"logits/chosen": -2.5847606658935547, |
|
"logits/rejected": -2.5084991455078125, |
|
"logps/chosen": -383.04254150390625, |
|
"logps/rejected": -428.8330078125, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.47037452459335327, |
|
"rewards/margins": 0.42992621660232544, |
|
"rewards/rejected": -0.9003008008003235, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.7562430717810586e-07, |
|
"logits/chosen": -2.5550408363342285, |
|
"logits/rejected": -2.54259991645813, |
|
"logps/chosen": -390.04278564453125, |
|
"logps/rejected": -383.87872314453125, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5229761004447937, |
|
"rewards/margins": 0.38916486501693726, |
|
"rewards/rejected": -0.912140965461731, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.728012605927447e-07, |
|
"logits/chosen": -2.605335235595703, |
|
"logits/rejected": -2.5029044151306152, |
|
"logps/chosen": -421.9798889160156, |
|
"logps/rejected": -388.80914306640625, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.57950758934021, |
|
"rewards/margins": 0.4323285222053528, |
|
"rewards/rejected": -1.011836290359497, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_logits/chosen": -2.5899722576141357, |
|
"eval_logits/rejected": -2.5210795402526855, |
|
"eval_logps/chosen": -404.44964599609375, |
|
"eval_logps/rejected": -398.31060791015625, |
|
"eval_loss": 0.602461040019989, |
|
"eval_rewards/accuracies": 0.7063491940498352, |
|
"eval_rewards/chosen": -0.594050943851471, |
|
"eval_rewards/margins": 0.36200812458992004, |
|
"eval_rewards/rejected": -0.9560590386390686, |
|
"eval_runtime": 240.2615, |
|
"eval_samples_per_second": 8.324, |
|
"eval_steps_per_second": 0.262, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.699752790078714e-07, |
|
"logits/chosen": -2.576601982116699, |
|
"logits/rejected": -2.484036922454834, |
|
"logps/chosen": -422.91094970703125, |
|
"logps/rejected": -394.78570556640625, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5908924341201782, |
|
"rewards/margins": 0.4301966726779938, |
|
"rewards/rejected": -1.0210891962051392, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.6714672618646916e-07, |
|
"logits/chosen": -2.550550699234009, |
|
"logits/rejected": -2.5153775215148926, |
|
"logps/chosen": -406.955322265625, |
|
"logps/rejected": -409.8970642089844, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5765306949615479, |
|
"rewards/margins": 0.34898003935813904, |
|
"rewards/rejected": -0.9255107045173645, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.643159662224931e-07, |
|
"logits/chosen": -2.510113477706909, |
|
"logits/rejected": -2.4473376274108887, |
|
"logps/chosen": -401.7076110839844, |
|
"logps/rejected": -385.7937927246094, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5977416634559631, |
|
"rewards/margins": 0.2979085147380829, |
|
"rewards/rejected": -0.8956502079963684, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.6148336349400386e-07, |
|
"logits/chosen": -2.581653118133545, |
|
"logits/rejected": -2.4795095920562744, |
|
"logps/chosen": -414.6515197753906, |
|
"logps/rejected": -404.5352783203125, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4708114564418793, |
|
"rewards/margins": 0.42272576689720154, |
|
"rewards/rejected": -0.893537163734436, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5864928261626416e-07, |
|
"logits/chosen": -2.5162148475646973, |
|
"logits/rejected": -2.454847812652588, |
|
"logps/chosen": -392.3681945800781, |
|
"logps/rejected": -386.84161376953125, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5253309011459351, |
|
"rewards/margins": 0.3526184856891632, |
|
"rewards/rejected": -0.8779493570327759, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.558140883948058e-07, |
|
"logits/chosen": -2.554378032684326, |
|
"logits/rejected": -2.4660162925720215, |
|
"logps/chosen": -411.94757080078125, |
|
"logps/rejected": -378.79522705078125, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5396718978881836, |
|
"rewards/margins": 0.370322585105896, |
|
"rewards/rejected": -0.9099944829940796, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5297814577847116e-07, |
|
"logits/chosen": -2.5671238899230957, |
|
"logits/rejected": -2.525606155395508, |
|
"logps/chosen": -404.15545654296875, |
|
"logps/rejected": -415.3843688964844, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5830854177474976, |
|
"rewards/margins": 0.32624879479408264, |
|
"rewards/rejected": -0.9093341827392578, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.501418198124365e-07, |
|
"logits/chosen": -2.56373929977417, |
|
"logits/rejected": -2.491987705230713, |
|
"logps/chosen": -436.6615295410156, |
|
"logps/rejected": -413.1544494628906, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5825528502464294, |
|
"rewards/margins": 0.4194890856742859, |
|
"rewards/rejected": -1.0020420551300049, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.473054755912234e-07, |
|
"logits/chosen": -2.5011954307556152, |
|
"logits/rejected": -2.4363338947296143, |
|
"logps/chosen": -406.9830017089844, |
|
"logps/rejected": -418.9451599121094, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5498841404914856, |
|
"rewards/margins": 0.49638238549232483, |
|
"rewards/rejected": -1.0462663173675537, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.444694782117033e-07, |
|
"logits/chosen": -2.5000977516174316, |
|
"logits/rejected": -2.443779468536377, |
|
"logps/chosen": -392.17327880859375, |
|
"logps/rejected": -432.06195068359375, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6436781287193298, |
|
"rewards/margins": 0.43835169076919556, |
|
"rewards/rejected": -1.082029938697815, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_logits/chosen": -2.5592637062072754, |
|
"eval_logits/rejected": -2.488711357116699, |
|
"eval_logps/chosen": -411.21783447265625, |
|
"eval_logps/rejected": -407.4162292480469, |
|
"eval_loss": 0.5976974964141846, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.6617329716682434, |
|
"eval_rewards/margins": 0.3853819668292999, |
|
"eval_rewards/rejected": -1.0471149682998657, |
|
"eval_runtime": 249.6762, |
|
"eval_samples_per_second": 8.01, |
|
"eval_steps_per_second": 0.252, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.416341927261016e-07, |
|
"logits/chosen": -2.509632110595703, |
|
"logits/rejected": -2.4712796211242676, |
|
"logps/chosen": -388.2112121582031, |
|
"logps/rejected": -393.1347961425781, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6419853568077087, |
|
"rewards/margins": 0.44171229004859924, |
|
"rewards/rejected": -1.0836976766586304, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3879998409500845e-07, |
|
"logits/chosen": -2.5448498725891113, |
|
"logits/rejected": -2.4746804237365723, |
|
"logps/chosen": -435.6138610839844, |
|
"logps/rejected": -420.2179260253906, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7542072534561157, |
|
"rewards/margins": 0.4248180389404297, |
|
"rewards/rejected": -1.1790252923965454, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3596721714039998e-07, |
|
"logits/chosen": -2.4864585399627686, |
|
"logits/rejected": -2.4267334938049316, |
|
"logps/chosen": -395.937744140625, |
|
"logps/rejected": -394.3992919921875, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.689933180809021, |
|
"rewards/margins": 0.41859620809555054, |
|
"rewards/rejected": -1.1085295677185059, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3313625649867824e-07, |
|
"logits/chosen": -2.5655341148376465, |
|
"logits/rejected": -2.4968507289886475, |
|
"logps/chosen": -454.5184631347656, |
|
"logps/rejected": -442.9791564941406, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.721511960029602, |
|
"rewards/margins": 0.4738715589046478, |
|
"rewards/rejected": -1.1953833103179932, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.303074665737355e-07, |
|
"logits/chosen": -2.503943920135498, |
|
"logits/rejected": -2.450364112854004, |
|
"logps/chosen": -424.10760498046875, |
|
"logps/rejected": -405.66070556640625, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.673719048500061, |
|
"rewards/margins": 0.3581104576587677, |
|
"rewards/rejected": -1.0318294763565063, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.274812114900469e-07, |
|
"logits/chosen": -2.505079507827759, |
|
"logits/rejected": -2.4630515575408936, |
|
"logps/chosen": -368.4876403808594, |
|
"logps/rejected": -392.97467041015625, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6174124479293823, |
|
"rewards/margins": 0.3502601981163025, |
|
"rewards/rejected": -0.9676725268363953, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2465785504580074e-07, |
|
"logits/chosen": -2.4787967205047607, |
|
"logits/rejected": -2.406154155731201, |
|
"logps/chosen": -432.4744567871094, |
|
"logps/rejected": -413.21453857421875, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.658542811870575, |
|
"rewards/margins": 0.3376074433326721, |
|
"rewards/rejected": -0.9961503744125366, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2183776066606947e-07, |
|
"logits/chosen": -2.5223982334136963, |
|
"logits/rejected": -2.51818585395813, |
|
"logps/chosen": -380.69573974609375, |
|
"logps/rejected": -432.8943786621094, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5959492921829224, |
|
"rewards/margins": 0.47599101066589355, |
|
"rewards/rejected": -1.0719401836395264, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.190212913560298e-07, |
|
"logits/chosen": -2.4563565254211426, |
|
"logits/rejected": -2.4220333099365234, |
|
"logps/chosen": -383.9895324707031, |
|
"logps/rejected": -395.3768005371094, |
|
"loss": 0.5726, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6407332420349121, |
|
"rewards/margins": 0.4355078339576721, |
|
"rewards/rejected": -1.076241135597229, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.1620880965423596e-07, |
|
"logits/chosen": -2.488713502883911, |
|
"logits/rejected": -2.434023380279541, |
|
"logps/chosen": -390.2538757324219, |
|
"logps/rejected": -380.91302490234375, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6403465867042542, |
|
"rewards/margins": 0.4435759484767914, |
|
"rewards/rejected": -1.0839226245880127, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": -2.536853313446045, |
|
"eval_logits/rejected": -2.4650635719299316, |
|
"eval_logps/chosen": -412.695556640625, |
|
"eval_logps/rejected": -411.40203857421875, |
|
"eval_loss": 0.5954813361167908, |
|
"eval_rewards/accuracies": 0.72817462682724, |
|
"eval_rewards/chosen": -0.6765100955963135, |
|
"eval_rewards/margins": 0.41046345233917236, |
|
"eval_rewards/rejected": -1.0869736671447754, |
|
"eval_runtime": 247.9314, |
|
"eval_samples_per_second": 8.067, |
|
"eval_steps_per_second": 0.254, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.134006775859537e-07, |
|
"logits/chosen": -2.4386448860168457, |
|
"logits/rejected": -2.4446792602539062, |
|
"logps/chosen": -400.6998596191406, |
|
"logps/rejected": -428.584716796875, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7244473695755005, |
|
"rewards/margins": 0.43817657232284546, |
|
"rewards/rejected": -1.1626240015029907, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1059725661655948e-07, |
|
"logits/chosen": -2.543206214904785, |
|
"logits/rejected": -2.481884002685547, |
|
"logps/chosen": -437.24176025390625, |
|
"logps/rejected": -411.68115234375, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.815970778465271, |
|
"rewards/margins": 0.4566773474216461, |
|
"rewards/rejected": -1.2726482152938843, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.077989076050133e-07, |
|
"logits/chosen": -2.4628424644470215, |
|
"logits/rejected": -2.414001703262329, |
|
"logps/chosen": -437.2959899902344, |
|
"logps/rejected": -445.44427490234375, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.874140739440918, |
|
"rewards/margins": 0.4144424498081207, |
|
"rewards/rejected": -1.2885833978652954, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.050059907574076e-07, |
|
"logits/chosen": -2.507554054260254, |
|
"logits/rejected": -2.427834987640381, |
|
"logps/chosen": -408.7767028808594, |
|
"logps/rejected": -413.369873046875, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7224391102790833, |
|
"rewards/margins": 0.4979207515716553, |
|
"rewards/rejected": -1.2203600406646729, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.022188655806016e-07, |
|
"logits/chosen": -2.523545503616333, |
|
"logits/rejected": -2.448479413986206, |
|
"logps/chosen": -451.228515625, |
|
"logps/rejected": -419.6627502441406, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7539777755737305, |
|
"rewards/margins": 0.37716880440711975, |
|
"rewards/rejected": -1.1311466693878174, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.9943789083594564e-07, |
|
"logits/chosen": -2.4754841327667236, |
|
"logits/rejected": -2.428614377975464, |
|
"logps/chosen": -385.0950622558594, |
|
"logps/rejected": -403.7740783691406, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7256184816360474, |
|
"rewards/margins": 0.4796815812587738, |
|
"rewards/rejected": -1.2053000926971436, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.9666342449310025e-07, |
|
"logits/chosen": -2.530557870864868, |
|
"logits/rejected": -2.4692397117614746, |
|
"logps/chosen": -409.13922119140625, |
|
"logps/rejected": -402.8389892578125, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6921072602272034, |
|
"rewards/margins": 0.4285035729408264, |
|
"rewards/rejected": -1.1206107139587402, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.938958236839588e-07, |
|
"logits/chosen": -2.52577543258667, |
|
"logits/rejected": -2.4166135787963867, |
|
"logps/chosen": -440.7547302246094, |
|
"logps/rejected": -413.27911376953125, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7067890167236328, |
|
"rewards/margins": 0.4192212224006653, |
|
"rewards/rejected": -1.1260101795196533, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.9113544465667637e-07, |
|
"logits/chosen": -2.46167254447937, |
|
"logits/rejected": -2.4476611614227295, |
|
"logps/chosen": -376.3783264160156, |
|
"logps/rejected": -403.9447326660156, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6462227702140808, |
|
"rewards/margins": 0.4927960932254791, |
|
"rewards/rejected": -1.1390188932418823, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.88382642729814e-07, |
|
"logits/chosen": -2.5014305114746094, |
|
"logits/rejected": -2.4548792839050293, |
|
"logps/chosen": -411.25457763671875, |
|
"logps/rejected": -420.2723693847656, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6595852971076965, |
|
"rewards/margins": 0.5647996068000793, |
|
"rewards/rejected": -1.2243849039077759, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_logits/chosen": -2.5078556537628174, |
|
"eval_logits/rejected": -2.4342150688171387, |
|
"eval_logps/chosen": -425.2423095703125, |
|
"eval_logps/rejected": -427.0003356933594, |
|
"eval_loss": 0.5904152393341064, |
|
"eval_rewards/accuracies": 0.7321428656578064, |
|
"eval_rewards/chosen": -0.8019776940345764, |
|
"eval_rewards/margins": 0.44097864627838135, |
|
"eval_rewards/rejected": -1.242956280708313, |
|
"eval_runtime": 248.3578, |
|
"eval_samples_per_second": 8.053, |
|
"eval_steps_per_second": 0.254, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.856377722466009e-07, |
|
"logits/chosen": -2.4711380004882812, |
|
"logits/rejected": -2.408111810684204, |
|
"logps/chosen": -441.938720703125, |
|
"logps/rejected": -463.12432861328125, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7691561579704285, |
|
"rewards/margins": 0.46461838483810425, |
|
"rewards/rejected": -1.2337746620178223, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.8290118652932364e-07, |
|
"logits/chosen": -2.4911022186279297, |
|
"logits/rejected": -2.45578670501709, |
|
"logps/chosen": -388.4582214355469, |
|
"logps/rejected": -418.9519958496094, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7712088823318481, |
|
"rewards/margins": 0.48189014196395874, |
|
"rewards/rejected": -1.2530990839004517, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.8017323783384601e-07, |
|
"logits/chosen": -2.524731397628784, |
|
"logits/rejected": -2.4751369953155518, |
|
"logps/chosen": -411.6083984375, |
|
"logps/rejected": -451.7425231933594, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6576933860778809, |
|
"rewards/margins": 0.4532436430454254, |
|
"rewards/rejected": -1.1109369993209839, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7745427730426635e-07, |
|
"logits/chosen": -2.5422208309173584, |
|
"logits/rejected": -2.4694812297821045, |
|
"logps/chosen": -394.8658447265625, |
|
"logps/rejected": -415.96124267578125, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6860970258712769, |
|
"rewards/margins": 0.5364459753036499, |
|
"rewards/rejected": -1.2225428819656372, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.7474465492771772e-07, |
|
"logits/chosen": -2.4822356700897217, |
|
"logits/rejected": -2.392840623855591, |
|
"logps/chosen": -445.530029296875, |
|
"logps/rejected": -415.55078125, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7760051488876343, |
|
"rewards/margins": 0.48258695006370544, |
|
"rewards/rejected": -1.258592128753662, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.7204471948931758e-07, |
|
"logits/chosen": -2.3855233192443848, |
|
"logits/rejected": -2.342365026473999, |
|
"logps/chosen": -368.435791015625, |
|
"logps/rejected": -391.09893798828125, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7862086296081543, |
|
"rewards/margins": 0.42663902044296265, |
|
"rewards/rejected": -1.2128477096557617, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.6935481852727173e-07, |
|
"logits/chosen": -2.4512977600097656, |
|
"logits/rejected": -2.3995602130889893, |
|
"logps/chosen": -432.49578857421875, |
|
"logps/rejected": -437.99298095703125, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7520878314971924, |
|
"rewards/margins": 0.4426315426826477, |
|
"rewards/rejected": -1.1947194337844849, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.6667529828813853e-07, |
|
"logits/chosen": -2.5046944618225098, |
|
"logits/rejected": -2.4515976905822754, |
|
"logps/chosen": -406.6270446777344, |
|
"logps/rejected": -427.93231201171875, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6665788888931274, |
|
"rewards/margins": 0.5746535658836365, |
|
"rewards/rejected": -1.2412325143814087, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.640065036822605e-07, |
|
"logits/chosen": -2.5178215503692627, |
|
"logits/rejected": -2.4513187408447266, |
|
"logps/chosen": -407.05023193359375, |
|
"logps/rejected": -399.77423095703125, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.682750403881073, |
|
"rewards/margins": 0.4834769368171692, |
|
"rewards/rejected": -1.1662273406982422, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.6134877823936607e-07, |
|
"logits/chosen": -2.4752984046936035, |
|
"logits/rejected": -2.40830659866333, |
|
"logps/chosen": -424.7567443847656, |
|
"logps/rejected": -407.9845275878906, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.758663535118103, |
|
"rewards/margins": 0.4667127728462219, |
|
"rewards/rejected": -1.2253763675689697, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_logits/chosen": -2.4927799701690674, |
|
"eval_logits/rejected": -2.4185616970062256, |
|
"eval_logps/chosen": -424.36309814453125, |
|
"eval_logps/rejected": -427.0936584472656, |
|
"eval_loss": 0.5897929668426514, |
|
"eval_rewards/accuracies": 0.7321428656578064, |
|
"eval_rewards/chosen": -0.7931855320930481, |
|
"eval_rewards/margins": 0.4507039487361908, |
|
"eval_rewards/rejected": -1.2438894510269165, |
|
"eval_runtime": 244.3171, |
|
"eval_samples_per_second": 8.186, |
|
"eval_steps_per_second": 0.258, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.587024640643513e-07, |
|
"logits/chosen": -2.446739673614502, |
|
"logits/rejected": -2.4178757667541504, |
|
"logps/chosen": -401.63641357421875, |
|
"logps/rejected": -406.626708984375, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7500771284103394, |
|
"rewards/margins": 0.4350431561470032, |
|
"rewards/rejected": -1.1851202249526978, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.5606790179324257e-07, |
|
"logits/chosen": -2.469186305999756, |
|
"logits/rejected": -2.3644776344299316, |
|
"logps/chosen": -437.4012756347656, |
|
"logps/rejected": -429.28369140625, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7953532338142395, |
|
"rewards/margins": 0.5376496315002441, |
|
"rewards/rejected": -1.3330028057098389, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.534454305493509e-07, |
|
"logits/chosen": -2.4878664016723633, |
|
"logits/rejected": -2.402182102203369, |
|
"logps/chosen": -430.43035888671875, |
|
"logps/rejected": -446.88055419921875, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.718338131904602, |
|
"rewards/margins": 0.5019052624702454, |
|
"rewards/rejected": -1.2202433347702026, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.5083538789961846e-07, |
|
"logits/chosen": -2.4689135551452637, |
|
"logits/rejected": -2.3651726245880127, |
|
"logps/chosen": -424.533935546875, |
|
"logps/rejected": -407.27264404296875, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7878087162971497, |
|
"rewards/margins": 0.4385625720024109, |
|
"rewards/rejected": -1.2263712882995605, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.4823810981116767e-07, |
|
"logits/chosen": -2.420289993286133, |
|
"logits/rejected": -2.3955094814300537, |
|
"logps/chosen": -412.322509765625, |
|
"logps/rejected": -425.9512634277344, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6696097254753113, |
|
"rewards/margins": 0.4169555604457855, |
|
"rewards/rejected": -1.086565375328064, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.456539306080543e-07, |
|
"logits/chosen": -2.4510982036590576, |
|
"logits/rejected": -2.4162471294403076, |
|
"logps/chosen": -413.22509765625, |
|
"logps/rejected": -462.84381103515625, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7580252289772034, |
|
"rewards/margins": 0.40020751953125, |
|
"rewards/rejected": -1.1582326889038086, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.4308318292823364e-07, |
|
"logits/chosen": -2.4641025066375732, |
|
"logits/rejected": -2.4155373573303223, |
|
"logps/chosen": -411.8502502441406, |
|
"logps/rejected": -425.70416259765625, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7296292185783386, |
|
"rewards/margins": 0.44563156366348267, |
|
"rewards/rejected": -1.1752609014511108, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.4052619768074267e-07, |
|
"logits/chosen": -2.449817180633545, |
|
"logits/rejected": -2.390150547027588, |
|
"logps/chosen": -420.39849853515625, |
|
"logps/rejected": -426.16400146484375, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7945643663406372, |
|
"rewards/margins": 0.48763760924339294, |
|
"rewards/rejected": -1.2822020053863525, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.3798330400310537e-07, |
|
"logits/chosen": -2.404205322265625, |
|
"logits/rejected": -2.3124032020568848, |
|
"logps/chosen": -383.17266845703125, |
|
"logps/rejected": -382.43841552734375, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6244784593582153, |
|
"rewards/margins": 0.4246695935726166, |
|
"rewards/rejected": -1.0491479635238647, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.354548292189657e-07, |
|
"logits/chosen": -2.437732458114624, |
|
"logits/rejected": -2.3991034030914307, |
|
"logps/chosen": -385.60443115234375, |
|
"logps/rejected": -417.61468505859375, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7070263624191284, |
|
"rewards/margins": 0.44580182433128357, |
|
"rewards/rejected": -1.1528282165527344, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_logits/chosen": -2.496089220046997, |
|
"eval_logits/rejected": -2.422304391860962, |
|
"eval_logps/chosen": -413.7807312011719, |
|
"eval_logps/rejected": -415.832763671875, |
|
"eval_loss": 0.5903951525688171, |
|
"eval_rewards/accuracies": 0.7202380895614624, |
|
"eval_rewards/chosen": -0.687361478805542, |
|
"eval_rewards/margins": 0.44391965866088867, |
|
"eval_rewards/rejected": -1.1312810182571411, |
|
"eval_runtime": 243.6107, |
|
"eval_samples_per_second": 8.21, |
|
"eval_steps_per_second": 0.259, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.3294109879595412e-07, |
|
"logits/chosen": -2.518566131591797, |
|
"logits/rejected": -2.4946486949920654, |
|
"logps/chosen": -407.2130126953125, |
|
"logps/rejected": -429.19573974609375, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6605619192123413, |
|
"rewards/margins": 0.3588492274284363, |
|
"rewards/rejected": -1.0194110870361328, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.304424363037932e-07, |
|
"logits/chosen": -2.449763059616089, |
|
"logits/rejected": -2.358640193939209, |
|
"logps/chosen": -434.584716796875, |
|
"logps/rejected": -435.99725341796875, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7439482808113098, |
|
"rewards/margins": 0.45913758873939514, |
|
"rewards/rejected": -1.2030858993530273, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.2795916337264756e-07, |
|
"logits/chosen": -2.46913480758667, |
|
"logits/rejected": -2.377265214920044, |
|
"logps/chosen": -426.83660888671875, |
|
"logps/rejected": -421.1961364746094, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7704228162765503, |
|
"rewards/margins": 0.47521066665649414, |
|
"rewards/rejected": -1.2456334829330444, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.2549159965172295e-07, |
|
"logits/chosen": -2.4469761848449707, |
|
"logits/rejected": -2.3427939414978027, |
|
"logps/chosen": -425.2955017089844, |
|
"logps/rejected": -424.61492919921875, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7805430293083191, |
|
"rewards/margins": 0.49475497007369995, |
|
"rewards/rejected": -1.275297999382019, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.2304006276812122e-07, |
|
"logits/chosen": -2.3801145553588867, |
|
"logits/rejected": -2.350468397140503, |
|
"logps/chosen": -366.3126220703125, |
|
"logps/rejected": -400.9440002441406, |
|
"loss": 0.5529, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7851904630661011, |
|
"rewards/margins": 0.4816361963748932, |
|
"rewards/rejected": -1.2668267488479614, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.2060486828595442e-07, |
|
"logits/chosen": -2.4748592376708984, |
|
"logits/rejected": -2.408360719680786, |
|
"logps/chosen": -419.6524353027344, |
|
"logps/rejected": -431.26055908203125, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8105200529098511, |
|
"rewards/margins": 0.42913907766342163, |
|
"rewards/rejected": -1.239659309387207, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1818632966572578e-07, |
|
"logits/chosen": -2.4946236610412598, |
|
"logits/rejected": -2.4172983169555664, |
|
"logps/chosen": -416.42645263671875, |
|
"logps/rejected": -442.043212890625, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8131068348884583, |
|
"rewards/margins": 0.5074289441108704, |
|
"rewards/rejected": -1.3205358982086182, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.1578475822398032e-07, |
|
"logits/chosen": -2.455920696258545, |
|
"logits/rejected": -2.3736257553100586, |
|
"logps/chosen": -422.4615783691406, |
|
"logps/rejected": -441.34521484375, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.817348301410675, |
|
"rewards/margins": 0.49631649255752563, |
|
"rewards/rejected": -1.3136647939682007, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.1340046309323206e-07, |
|
"logits/chosen": -2.474325656890869, |
|
"logits/rejected": -2.4197046756744385, |
|
"logps/chosen": -406.9759826660156, |
|
"logps/rejected": -424.93023681640625, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8615679740905762, |
|
"rewards/margins": 0.4161076545715332, |
|
"rewards/rejected": -1.2776756286621094, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.1103375118217218e-07, |
|
"logits/chosen": -2.4074063301086426, |
|
"logits/rejected": -2.339216709136963, |
|
"logps/chosen": -384.3484802246094, |
|
"logps/rejected": -401.8287048339844, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7149208784103394, |
|
"rewards/margins": 0.4786139130592346, |
|
"rewards/rejected": -1.1935349702835083, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_logits/chosen": -2.4819118976593018, |
|
"eval_logits/rejected": -2.407344341278076, |
|
"eval_logps/chosen": -420.6850891113281, |
|
"eval_logps/rejected": -423.75042724609375, |
|
"eval_loss": 0.5884086489677429, |
|
"eval_rewards/accuracies": 0.726190447807312, |
|
"eval_rewards/chosen": -0.756405770778656, |
|
"eval_rewards/margins": 0.45405152440071106, |
|
"eval_rewards/rejected": -1.2104572057724, |
|
"eval_runtime": 244.0013, |
|
"eval_samples_per_second": 8.197, |
|
"eval_steps_per_second": 0.258, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.086849271361634e-07, |
|
"logits/chosen": -2.4910244941711426, |
|
"logits/rejected": -2.4178977012634277, |
|
"logps/chosen": -421.26654052734375, |
|
"logps/rejected": -442.17657470703125, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7127526998519897, |
|
"rewards/margins": 0.517103374004364, |
|
"rewards/rejected": -1.2298561334609985, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.0635429329802578e-07, |
|
"logits/chosen": -2.428316831588745, |
|
"logits/rejected": -2.4025187492370605, |
|
"logps/chosen": -385.6629943847656, |
|
"logps/rejected": -423.72857666015625, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7322330474853516, |
|
"rewards/margins": 0.46380695700645447, |
|
"rewards/rejected": -1.1960399150848389, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.0404214966911895e-07, |
|
"logits/chosen": -2.4701590538024902, |
|
"logits/rejected": -2.3929755687713623, |
|
"logps/chosen": -432.38629150390625, |
|
"logps/rejected": -409.7430114746094, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7193957567214966, |
|
"rewards/margins": 0.4175872206687927, |
|
"rewards/rejected": -1.136983036994934, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.0174879387072549e-07, |
|
"logits/chosen": -2.4195656776428223, |
|
"logits/rejected": -2.3856568336486816, |
|
"logps/chosen": -378.31634521484375, |
|
"logps/rejected": -429.25396728515625, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7908437848091125, |
|
"rewards/margins": 0.4326706528663635, |
|
"rewards/rejected": -1.2235145568847656, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.947452110574098e-08, |
|
"logits/chosen": -2.411748170852661, |
|
"logits/rejected": -2.3596882820129395, |
|
"logps/chosen": -426.4925842285156, |
|
"logps/rejected": -443.3196716308594, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7338422536849976, |
|
"rewards/margins": 0.503951907157898, |
|
"rewards/rejected": -1.2377939224243164, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.721962412067519e-08, |
|
"logits/chosen": -2.401766061782837, |
|
"logits/rejected": -2.316685676574707, |
|
"logps/chosen": -408.12103271484375, |
|
"logps/rejected": -398.24017333984375, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8616282343864441, |
|
"rewards/margins": 0.36459067463874817, |
|
"rewards/rejected": -1.2262189388275146, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.498439316796913e-08, |
|
"logits/chosen": -2.429500102996826, |
|
"logits/rejected": -2.341491937637329, |
|
"logps/chosen": -388.14208984375, |
|
"logps/rejected": -396.15423583984375, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7477534413337708, |
|
"rewards/margins": 0.5198956727981567, |
|
"rewards/rejected": -1.2676490545272827, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.276911596863441e-08, |
|
"logits/chosen": -2.430539608001709, |
|
"logits/rejected": -2.3732991218566895, |
|
"logps/chosen": -408.09771728515625, |
|
"logps/rejected": -429.29705810546875, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6935451626777649, |
|
"rewards/margins": 0.5118761658668518, |
|
"rewards/rejected": -1.2054214477539062, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 9.05740776752163e-08, |
|
"logits/chosen": -2.534486770629883, |
|
"logits/rejected": -2.425327777862549, |
|
"logps/chosen": -466.2666931152344, |
|
"logps/rejected": -450.0174255371094, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.763168215751648, |
|
"rewards/margins": 0.5992245674133301, |
|
"rewards/rejected": -1.362392783164978, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.839956083508959e-08, |
|
"logits/chosen": -2.4332027435302734, |
|
"logits/rejected": -2.4018168449401855, |
|
"logps/chosen": -428.01910400390625, |
|
"logps/rejected": -455.42889404296875, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7803698182106018, |
|
"rewards/margins": 0.4613746106624603, |
|
"rewards/rejected": -1.2417443990707397, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_logits/chosen": -2.4696059226989746, |
|
"eval_logits/rejected": -2.394796133041382, |
|
"eval_logps/chosen": -426.54876708984375, |
|
"eval_logps/rejected": -430.72430419921875, |
|
"eval_loss": 0.587758481502533, |
|
"eval_rewards/accuracies": 0.726190447807312, |
|
"eval_rewards/chosen": -0.815041720867157, |
|
"eval_rewards/margins": 0.4651543200016022, |
|
"eval_rewards/rejected": -1.280196189880371, |
|
"eval_runtime": 249.102, |
|
"eval_samples_per_second": 8.029, |
|
"eval_steps_per_second": 0.253, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.624584535408836e-08, |
|
"logits/chosen": -2.467682361602783, |
|
"logits/rejected": -2.3958325386047363, |
|
"logps/chosen": -442.32879638671875, |
|
"logps/rejected": -444.9439392089844, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7700357437133789, |
|
"rewards/margins": 0.5091968178749084, |
|
"rewards/rejected": -1.2792325019836426, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.411320846047637e-08, |
|
"logits/chosen": -2.4758048057556152, |
|
"logits/rejected": -2.452558755874634, |
|
"logps/chosen": -398.3210754394531, |
|
"logps/rejected": -397.4677734375, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7616570591926575, |
|
"rewards/margins": 0.417407363653183, |
|
"rewards/rejected": -1.179064393043518, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.200192466926201e-08, |
|
"logits/chosen": -2.4519848823547363, |
|
"logits/rejected": -2.3826258182525635, |
|
"logps/chosen": -436.55859375, |
|
"logps/rejected": -458.65594482421875, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.8493996858596802, |
|
"rewards/margins": 0.5574203729629517, |
|
"rewards/rejected": -1.4068200588226318, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.991226574686241e-08, |
|
"logits/chosen": -2.4183828830718994, |
|
"logits/rejected": -2.3716251850128174, |
|
"logps/chosen": -374.54803466796875, |
|
"logps/rejected": -389.0216064453125, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7416827082633972, |
|
"rewards/margins": 0.5108539462089539, |
|
"rewards/rejected": -1.252536654472351, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.784450067612138e-08, |
|
"logits/chosen": -2.4434866905212402, |
|
"logits/rejected": -2.3613460063934326, |
|
"logps/chosen": -435.7193298339844, |
|
"logps/rejected": -428.2972717285156, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7578371167182922, |
|
"rewards/margins": 0.5514657497406006, |
|
"rewards/rejected": -1.3093029260635376, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.579889562168585e-08, |
|
"logits/chosen": -2.434720993041992, |
|
"logits/rejected": -2.359743595123291, |
|
"logps/chosen": -392.49005126953125, |
|
"logps/rejected": -418.412353515625, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7233065962791443, |
|
"rewards/margins": 0.6114121675491333, |
|
"rewards/rejected": -1.3347185850143433, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.377571389574474e-08, |
|
"logits/chosen": -2.4690604209899902, |
|
"logits/rejected": -2.412727117538452, |
|
"logps/chosen": -407.22943115234375, |
|
"logps/rejected": -429.3172302246094, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8272277116775513, |
|
"rewards/margins": 0.5033684968948364, |
|
"rewards/rejected": -1.3305962085723877, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.177521592413505e-08, |
|
"logits/chosen": -2.4891464710235596, |
|
"logits/rejected": -2.4008851051330566, |
|
"logps/chosen": -416.02374267578125, |
|
"logps/rejected": -434.93853759765625, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.84009850025177, |
|
"rewards/margins": 0.4889064431190491, |
|
"rewards/rejected": -1.3290048837661743, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.979765921282021e-08, |
|
"logits/chosen": -2.4268863201141357, |
|
"logits/rejected": -2.3376641273498535, |
|
"logps/chosen": -441.4808044433594, |
|
"logps/rejected": -436.463134765625, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8702206611633301, |
|
"rewards/margins": 0.5598399639129639, |
|
"rewards/rejected": -1.430060625076294, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.784329831474276e-08, |
|
"logits/chosen": -2.4219398498535156, |
|
"logits/rejected": -2.3306546211242676, |
|
"logps/chosen": -419.054443359375, |
|
"logps/rejected": -465.0423889160156, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7490620613098145, |
|
"rewards/margins": 0.6311138272285461, |
|
"rewards/rejected": -1.3801758289337158, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_logits/chosen": -2.4548380374908447, |
|
"eval_logits/rejected": -2.379462718963623, |
|
"eval_logps/chosen": -432.9532470703125, |
|
"eval_logps/rejected": -438.72894287109375, |
|
"eval_loss": 0.5864917039871216, |
|
"eval_rewards/accuracies": 0.7341269850730896, |
|
"eval_rewards/chosen": -0.879087507724762, |
|
"eval_rewards/margins": 0.481155127286911, |
|
"eval_rewards/rejected": -1.3602426052093506, |
|
"eval_runtime": 246.5339, |
|
"eval_samples_per_second": 8.112, |
|
"eval_steps_per_second": 0.256, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.591238479705901e-08, |
|
"logits/chosen": -2.487351179122925, |
|
"logits/rejected": -2.405980348587036, |
|
"logps/chosen": -408.71954345703125, |
|
"logps/rejected": -426.19744873046875, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7479840517044067, |
|
"rewards/margins": 0.6013373136520386, |
|
"rewards/rejected": -1.3493213653564453, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.40051672087562e-08, |
|
"logits/chosen": -2.4331510066986084, |
|
"logits/rejected": -2.3278965950012207, |
|
"logps/chosen": -438.2916564941406, |
|
"logps/rejected": -428.78485107421875, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8200156092643738, |
|
"rewards/margins": 0.6146233677864075, |
|
"rewards/rejected": -1.4346389770507812, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.212189104865972e-08, |
|
"logits/chosen": -2.448960781097412, |
|
"logits/rejected": -2.387979030609131, |
|
"logps/chosen": -428.59039306640625, |
|
"logps/rejected": -447.049560546875, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7488642930984497, |
|
"rewards/margins": 0.5563133955001831, |
|
"rewards/rejected": -1.3051776885986328, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.026279873383191e-08, |
|
"logits/chosen": -2.3341879844665527, |
|
"logits/rejected": -2.2485053539276123, |
|
"logps/chosen": -434.8133850097656, |
|
"logps/rejected": -454.1504821777344, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8202449083328247, |
|
"rewards/margins": 0.660535454750061, |
|
"rewards/rejected": -1.4807803630828857, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.842812956836804e-08, |
|
"logits/chosen": -2.5089287757873535, |
|
"logits/rejected": -2.418996572494507, |
|
"logps/chosen": -466.69097900390625, |
|
"logps/rejected": -467.65948486328125, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8531349897384644, |
|
"rewards/margins": 0.5565911531448364, |
|
"rewards/rejected": -1.4097262620925903, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.661811971259284e-08, |
|
"logits/chosen": -2.4990134239196777, |
|
"logits/rejected": -2.427431106567383, |
|
"logps/chosen": -414.7474060058594, |
|
"logps/rejected": -434.27142333984375, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7728979587554932, |
|
"rewards/margins": 0.4413650631904602, |
|
"rewards/rejected": -1.2142630815505981, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.483300215266168e-08, |
|
"logits/chosen": -2.392768621444702, |
|
"logits/rejected": -2.3579678535461426, |
|
"logps/chosen": -379.36358642578125, |
|
"logps/rejected": -460.744873046875, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.820690929889679, |
|
"rewards/margins": 0.571506679058075, |
|
"rewards/rejected": -1.392197608947754, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.307300667057049e-08, |
|
"logits/chosen": -2.4417078495025635, |
|
"logits/rejected": -2.350722551345825, |
|
"logps/chosen": -456.86138916015625, |
|
"logps/rejected": -441.4781188964844, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7839853763580322, |
|
"rewards/margins": 0.4950861930847168, |
|
"rewards/rejected": -1.279071569442749, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.133835981457771e-08, |
|
"logits/chosen": -2.4283745288848877, |
|
"logits/rejected": -2.406078815460205, |
|
"logps/chosen": -382.7402038574219, |
|
"logps/rejected": -412.78192138671875, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8303159475326538, |
|
"rewards/margins": 0.4645245671272278, |
|
"rewards/rejected": -1.2948405742645264, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.962928487004339e-08, |
|
"logits/chosen": -2.4081058502197266, |
|
"logits/rejected": -2.3385443687438965, |
|
"logps/chosen": -403.322509765625, |
|
"logps/rejected": -437.6986389160156, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7759246826171875, |
|
"rewards/margins": 0.6043592691421509, |
|
"rewards/rejected": -1.3802839517593384, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_logits/chosen": -2.4495673179626465, |
|
"eval_logits/rejected": -2.374314069747925, |
|
"eval_logps/chosen": -429.79962158203125, |
|
"eval_logps/rejected": -435.3000793457031, |
|
"eval_loss": 0.587175726890564, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.8475508689880371, |
|
"eval_rewards/margins": 0.478402704000473, |
|
"eval_rewards/rejected": -1.325953483581543, |
|
"eval_runtime": 245.2318, |
|
"eval_samples_per_second": 8.156, |
|
"eval_steps_per_second": 0.257, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.794600183068687e-08, |
|
"logits/chosen": -2.4469664096832275, |
|
"logits/rejected": -2.386204719543457, |
|
"logps/chosen": -409.12469482421875, |
|
"logps/rejected": -439.278564453125, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8211742639541626, |
|
"rewards/margins": 0.6064980626106262, |
|
"rewards/rejected": -1.4276723861694336, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.628872737026984e-08, |
|
"logits/chosen": -2.4036548137664795, |
|
"logits/rejected": -2.3470802307128906, |
|
"logps/chosen": -398.6167907714844, |
|
"logps/rejected": -412.97027587890625, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8146356344223022, |
|
"rewards/margins": 0.4236365258693695, |
|
"rewards/rejected": -1.2382723093032837, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.4657674814705085e-08, |
|
"logits/chosen": -2.4584813117980957, |
|
"logits/rejected": -2.3649630546569824, |
|
"logps/chosen": -418.7437438964844, |
|
"logps/rejected": -406.3216857910156, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7241760492324829, |
|
"rewards/margins": 0.540256917476654, |
|
"rewards/rejected": -1.2644331455230713, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.305305411459773e-08, |
|
"logits/chosen": -2.4563419818878174, |
|
"logits/rejected": -2.410745859146118, |
|
"logps/chosen": -436.936767578125, |
|
"logps/rejected": -442.15948486328125, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7409111261367798, |
|
"rewards/margins": 0.5352380275726318, |
|
"rewards/rejected": -1.276149034500122, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.1475071818219466e-08, |
|
"logits/chosen": -2.3814117908477783, |
|
"logits/rejected": -2.3320162296295166, |
|
"logps/chosen": -450.9549865722656, |
|
"logps/rejected": -440.48974609375, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8471084833145142, |
|
"rewards/margins": 0.5237026214599609, |
|
"rewards/rejected": -1.3708112239837646, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.992393104492209e-08, |
|
"logits/chosen": -2.4187827110290527, |
|
"logits/rejected": -2.3232665061950684, |
|
"logps/chosen": -408.9256286621094, |
|
"logps/rejected": -428.8360290527344, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8053148984909058, |
|
"rewards/margins": 0.47992628812789917, |
|
"rewards/rejected": -1.2852413654327393, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.839983145899148e-08, |
|
"logits/chosen": -2.399820566177368, |
|
"logits/rejected": -2.2919247150421143, |
|
"logps/chosen": -429.00469970703125, |
|
"logps/rejected": -428.13018798828125, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7831125259399414, |
|
"rewards/margins": 0.573869526386261, |
|
"rewards/rejected": -1.3569821119308472, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.690296924394659e-08, |
|
"logits/chosen": -2.3557441234588623, |
|
"logits/rejected": -2.333583354949951, |
|
"logps/chosen": -409.0820007324219, |
|
"logps/rejected": -417.942138671875, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9311076402664185, |
|
"rewards/margins": 0.3103558421134949, |
|
"rewards/rejected": -1.2414636611938477, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.543353707728672e-08, |
|
"logits/chosen": -2.426609992980957, |
|
"logits/rejected": -2.3464412689208984, |
|
"logps/chosen": -406.8370666503906, |
|
"logps/rejected": -408.04913330078125, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.8274558186531067, |
|
"rewards/margins": 0.5477991700172424, |
|
"rewards/rejected": -1.3752549886703491, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.3991724105689736e-08, |
|
"logits/chosen": -2.3941025733947754, |
|
"logits/rejected": -2.2982020378112793, |
|
"logps/chosen": -423.27313232421875, |
|
"logps/rejected": -424.16973876953125, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.898272693157196, |
|
"rewards/margins": 0.496847003698349, |
|
"rewards/rejected": -1.3951194286346436, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_logits/chosen": -2.4452052116394043, |
|
"eval_logits/rejected": -2.3696937561035156, |
|
"eval_logps/chosen": -429.87860107421875, |
|
"eval_logps/rejected": -435.4400939941406, |
|
"eval_loss": 0.5867913961410522, |
|
"eval_rewards/accuracies": 0.7222222089767456, |
|
"eval_rewards/chosen": -0.8483405113220215, |
|
"eval_rewards/margins": 0.4790137708187103, |
|
"eval_rewards/rejected": -1.3273543119430542, |
|
"eval_runtime": 247.943, |
|
"eval_samples_per_second": 8.066, |
|
"eval_steps_per_second": 0.254, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.257771592066499e-08, |
|
"logits/chosen": -2.428584098815918, |
|
"logits/rejected": -2.3538498878479004, |
|
"logps/chosen": -428.6109924316406, |
|
"logps/rejected": -435.99542236328125, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8293555378913879, |
|
"rewards/margins": 0.58681321144104, |
|
"rewards/rejected": -1.4161686897277832, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.119169453466367e-08, |
|
"logits/chosen": -2.5020382404327393, |
|
"logits/rejected": -2.42659330368042, |
|
"logps/chosen": -431.7511291503906, |
|
"logps/rejected": -445.3756408691406, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7522695660591125, |
|
"rewards/margins": 0.48350271582603455, |
|
"rewards/rejected": -1.2357723712921143, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.983383835765038e-08, |
|
"logits/chosen": -2.4027044773101807, |
|
"logits/rejected": -2.3710248470306396, |
|
"logps/chosen": -436.78973388671875, |
|
"logps/rejected": -459.3680725097656, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7804380655288696, |
|
"rewards/margins": 0.5590247511863708, |
|
"rewards/rejected": -1.3394627571105957, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.8504322174137452e-08, |
|
"logits/chosen": -2.419508695602417, |
|
"logits/rejected": -2.393357992172241, |
|
"logps/chosen": -372.4573059082031, |
|
"logps/rejected": -410.5888671875, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.818364143371582, |
|
"rewards/margins": 0.5231537818908691, |
|
"rewards/rejected": -1.341517686843872, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.7203317120687214e-08, |
|
"logits/chosen": -2.36185359954834, |
|
"logits/rejected": -2.3087821006774902, |
|
"logps/chosen": -438.8936462402344, |
|
"logps/rejected": -449.9029846191406, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8277127146720886, |
|
"rewards/margins": 0.5720094442367554, |
|
"rewards/rejected": -1.3997222185134888, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5930990663882298e-08, |
|
"logits/chosen": -2.5004594326019287, |
|
"logits/rejected": -2.4132115840911865, |
|
"logps/chosen": -463.64031982421875, |
|
"logps/rejected": -466.9752502441406, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8853015899658203, |
|
"rewards/margins": 0.4785071015357971, |
|
"rewards/rejected": -1.3638086318969727, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.4687506578770195e-08, |
|
"logits/chosen": -2.4215641021728516, |
|
"logits/rejected": -2.373194456100464, |
|
"logps/chosen": -429.7237854003906, |
|
"logps/rejected": -465.56622314453125, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7883203029632568, |
|
"rewards/margins": 0.5758455991744995, |
|
"rewards/rejected": -1.3641657829284668, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.3473024927780888e-08, |
|
"logits/chosen": -2.3653247356414795, |
|
"logits/rejected": -2.373579502105713, |
|
"logps/chosen": -422.0316467285156, |
|
"logps/rejected": -431.6985778808594, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.880569577217102, |
|
"rewards/margins": 0.3722376823425293, |
|
"rewards/rejected": -1.252807378768921, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.228770204012448e-08, |
|
"logits/chosen": -2.4217820167541504, |
|
"logits/rejected": -2.3459651470184326, |
|
"logps/chosen": -400.4313049316406, |
|
"logps/rejected": -409.916259765625, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8538491129875183, |
|
"rewards/margins": 0.4800568222999573, |
|
"rewards/rejected": -1.3339059352874756, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.1131690491667547e-08, |
|
"logits/chosen": -2.431408405303955, |
|
"logits/rejected": -2.384169578552246, |
|
"logps/chosen": -432.4293518066406, |
|
"logps/rejected": -427.60626220703125, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9473799467086792, |
|
"rewards/margins": 0.34295937418937683, |
|
"rewards/rejected": -1.2903392314910889, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_logits/chosen": -2.439899444580078, |
|
"eval_logits/rejected": -2.364093780517578, |
|
"eval_logps/chosen": -432.5810852050781, |
|
"eval_logps/rejected": -438.9631042480469, |
|
"eval_loss": 0.585797131061554, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.8753649592399597, |
|
"eval_rewards/margins": 0.4872189164161682, |
|
"eval_rewards/rejected": -1.3625837564468384, |
|
"eval_runtime": 250.9047, |
|
"eval_samples_per_second": 7.971, |
|
"eval_steps_per_second": 0.251, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.0005139085293942e-08, |
|
"logits/chosen": -2.3748772144317627, |
|
"logits/rejected": -2.366490364074707, |
|
"logps/chosen": -394.9242248535156, |
|
"logps/rejected": -445.06317138671875, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9380524754524231, |
|
"rewards/margins": 0.4403650164604187, |
|
"rewards/rejected": -1.3784174919128418, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.8908192831750545e-08, |
|
"logits/chosen": -2.3916258811950684, |
|
"logits/rejected": -2.3020219802856445, |
|
"logps/chosen": -426.46990966796875, |
|
"logps/rejected": -416.8695373535156, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8564615249633789, |
|
"rewards/margins": 0.5280572175979614, |
|
"rewards/rejected": -1.3845187425613403, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.7840992930981345e-08, |
|
"logits/chosen": -2.4265730381011963, |
|
"logits/rejected": -2.38875150680542, |
|
"logps/chosen": -462.3692932128906, |
|
"logps/rejected": -460.39337158203125, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8171685338020325, |
|
"rewards/margins": 0.4775725305080414, |
|
"rewards/rejected": -1.294741153717041, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.6803676753952138e-08, |
|
"logits/chosen": -2.3773293495178223, |
|
"logits/rejected": -2.3178646564483643, |
|
"logps/chosen": -409.1978759765625, |
|
"logps/rejected": -452.56549072265625, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7988417744636536, |
|
"rewards/margins": 0.6674584150314331, |
|
"rewards/rejected": -1.466300129890442, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.5796377824967788e-08, |
|
"logits/chosen": -2.4757285118103027, |
|
"logits/rejected": -2.3862550258636475, |
|
"logps/chosen": -461.76483154296875, |
|
"logps/rejected": -448.6534118652344, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7839967012405396, |
|
"rewards/margins": 0.5081378817558289, |
|
"rewards/rejected": -1.2921345233917236, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.481922580448533e-08, |
|
"logits/chosen": -2.402846574783325, |
|
"logits/rejected": -2.3772921562194824, |
|
"logps/chosen": -418.25079345703125, |
|
"logps/rejected": -474.2286682128906, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7962145805358887, |
|
"rewards/margins": 0.4984118938446045, |
|
"rewards/rejected": -1.2946264743804932, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3872346472423246e-08, |
|
"logits/chosen": -2.4501101970672607, |
|
"logits/rejected": -2.356166362762451, |
|
"logps/chosen": -448.30303955078125, |
|
"logps/rejected": -447.73712158203125, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8609638214111328, |
|
"rewards/margins": 0.46454888582229614, |
|
"rewards/rejected": -1.3255127668380737, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2955861711971745e-08, |
|
"logits/chosen": -2.4125468730926514, |
|
"logits/rejected": -2.312474012374878, |
|
"logps/chosen": -454.7066955566406, |
|
"logps/rejected": -423.25390625, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8763197064399719, |
|
"rewards/margins": 0.5553014874458313, |
|
"rewards/rejected": -1.4316211938858032, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.2069889493903112e-08, |
|
"logits/chosen": -2.40596342086792, |
|
"logits/rejected": -2.3436505794525146, |
|
"logps/chosen": -426.20660400390625, |
|
"logps/rejected": -443.60552978515625, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8504959344863892, |
|
"rewards/margins": 0.5777041912078857, |
|
"rewards/rejected": -1.4282000064849854, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.1214543861387039e-08, |
|
"logits/chosen": -2.3759148120880127, |
|
"logits/rejected": -2.3197531700134277, |
|
"logps/chosen": -403.3725280761719, |
|
"logps/rejected": -445.80767822265625, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.731840193271637, |
|
"rewards/margins": 0.7046986818313599, |
|
"rewards/rejected": -1.4365389347076416, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_logits/chosen": -2.4361400604248047, |
|
"eval_logits/rejected": -2.3603618144989014, |
|
"eval_logps/chosen": -434.4620361328125, |
|
"eval_logps/rejected": -441.12109375, |
|
"eval_loss": 0.5855809450149536, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.894174337387085, |
|
"eval_rewards/margins": 0.4899892508983612, |
|
"eval_rewards/rejected": -1.384163737297058, |
|
"eval_runtime": 244.6014, |
|
"eval_samples_per_second": 8.177, |
|
"eval_steps_per_second": 0.258, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0389934915310344e-08, |
|
"logits/chosen": -2.3394923210144043, |
|
"logits/rejected": -2.297569990158081, |
|
"logps/chosen": -414.2259826660156, |
|
"logps/rejected": -447.39239501953125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9291477203369141, |
|
"rewards/margins": 0.49646610021591187, |
|
"rewards/rejected": -1.4256137609481812, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.596168800105081e-09, |
|
"logits/chosen": -2.4024291038513184, |
|
"logits/rejected": -2.347120523452759, |
|
"logps/chosen": -436.79364013671875, |
|
"logps/rejected": -450.7452697753906, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8427282571792603, |
|
"rewards/margins": 0.5560713410377502, |
|
"rewards/rejected": -1.3987995386123657, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.833347690085258e-09, |
|
"logits/chosen": -2.4367868900299072, |
|
"logits/rejected": -2.39370059967041, |
|
"logps/chosen": -430.2493591308594, |
|
"logps/rejected": -459.7605895996094, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.816397488117218, |
|
"rewards/margins": 0.48605260252952576, |
|
"rewards/rejected": -1.3024499416351318, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.101569776295087e-09, |
|
"logits/chosen": -2.44547700881958, |
|
"logits/rejected": -2.3715062141418457, |
|
"logps/chosen": -451.3525390625, |
|
"logps/rejected": -472.49871826171875, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8327264785766602, |
|
"rewards/margins": 0.5013109445571899, |
|
"rewards/rejected": -1.3340375423431396, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.400929253869537e-09, |
|
"logits/chosen": -2.386373519897461, |
|
"logits/rejected": -2.3548552989959717, |
|
"logps/chosen": -411.06036376953125, |
|
"logps/rejected": -401.76202392578125, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7602349519729614, |
|
"rewards/margins": 0.544723391532898, |
|
"rewards/rejected": -1.3049582242965698, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.731516309909619e-09, |
|
"logits/chosen": -2.4245498180389404, |
|
"logits/rejected": -2.351013660430908, |
|
"logps/chosen": -413.696533203125, |
|
"logps/rejected": -431.78143310546875, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8400930166244507, |
|
"rewards/margins": 0.5150105357170105, |
|
"rewards/rejected": -1.3551037311553955, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.093417111873306e-09, |
|
"logits/chosen": -2.405090808868408, |
|
"logits/rejected": -2.363133668899536, |
|
"logps/chosen": -446.4725036621094, |
|
"logps/rejected": -457.05291748046875, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.940416157245636, |
|
"rewards/margins": 0.46864986419677734, |
|
"rewards/rejected": -1.4090659618377686, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.486713796483966e-09, |
|
"logits/chosen": -2.4188191890716553, |
|
"logits/rejected": -2.4030518531799316, |
|
"logps/chosen": -426.7608947753906, |
|
"logps/rejected": -478.41424560546875, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9050353765487671, |
|
"rewards/margins": 0.4908295273780823, |
|
"rewards/rejected": -1.3958650827407837, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.911484459157844e-09, |
|
"logits/chosen": -2.355522632598877, |
|
"logits/rejected": -2.2627921104431152, |
|
"logps/chosen": -414.41058349609375, |
|
"logps/rejected": -408.3012390136719, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8410286903381348, |
|
"rewards/margins": 0.5534576177597046, |
|
"rewards/rejected": -1.394486427307129, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.36780314395116e-09, |
|
"logits/chosen": -2.391401767730713, |
|
"logits/rejected": -2.301657199859619, |
|
"logps/chosen": -408.6781005859375, |
|
"logps/rejected": -399.43438720703125, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.82252436876297, |
|
"rewards/margins": 0.43992215394973755, |
|
"rewards/rejected": -1.262446403503418, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_logits/chosen": -2.434546947479248, |
|
"eval_logits/rejected": -2.35845685005188, |
|
"eval_logps/chosen": -435.4488525390625, |
|
"eval_logps/rejected": -442.29296875, |
|
"eval_loss": 0.5855222344398499, |
|
"eval_rewards/accuracies": 0.726190447807312, |
|
"eval_rewards/chosen": -0.904042661190033, |
|
"eval_rewards/margins": 0.49184030294418335, |
|
"eval_rewards/rejected": -1.3958829641342163, |
|
"eval_runtime": 247.9608, |
|
"eval_samples_per_second": 8.066, |
|
"eval_steps_per_second": 0.254, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.8557398340296195e-09, |
|
"logits/chosen": -2.4358608722686768, |
|
"logits/rejected": -2.317959785461426, |
|
"logps/chosen": -421.12969970703125, |
|
"logps/rejected": -426.5562438964844, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9019654393196106, |
|
"rewards/margins": 0.5431965589523315, |
|
"rewards/rejected": -1.445162057876587, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.3753604426595417e-09, |
|
"logits/chosen": -2.3855860233306885, |
|
"logits/rejected": -2.311525344848633, |
|
"logps/chosen": -406.04010009765625, |
|
"logps/rejected": -413.10186767578125, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8721002340316772, |
|
"rewards/margins": 0.516608715057373, |
|
"rewards/rejected": -1.3887090682983398, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.926726804723917e-09, |
|
"logits/chosen": -2.3775479793548584, |
|
"logits/rejected": -2.3673160076141357, |
|
"logps/chosen": -443.3018493652344, |
|
"logps/rejected": -457.483154296875, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8438417315483093, |
|
"rewards/margins": 0.5027114748954773, |
|
"rewards/rejected": -1.3465534448623657, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.5098966687626954e-09, |
|
"logits/chosen": -2.4318108558654785, |
|
"logits/rejected": -2.3373677730560303, |
|
"logps/chosen": -429.6363830566406, |
|
"logps/rejected": -441.6180725097656, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8596477508544922, |
|
"rewards/margins": 0.6358083486557007, |
|
"rewards/rejected": -1.4954560995101929, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.124923689539426e-09, |
|
"logits/chosen": -2.4223837852478027, |
|
"logits/rejected": -2.356449604034424, |
|
"logps/chosen": -418.0809631347656, |
|
"logps/rejected": -436.1025390625, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8189032673835754, |
|
"rewards/margins": 0.5849324464797974, |
|
"rewards/rejected": -1.4038358926773071, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7718574211347537e-09, |
|
"logits/chosen": -2.3926260471343994, |
|
"logits/rejected": -2.3217663764953613, |
|
"logps/chosen": -391.73358154296875, |
|
"logps/rejected": -397.6258239746094, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8713921308517456, |
|
"rewards/margins": 0.4508208632469177, |
|
"rewards/rejected": -1.3222129344940186, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.4507433105677703e-09, |
|
"logits/chosen": -2.361290454864502, |
|
"logits/rejected": -2.2786500453948975, |
|
"logps/chosen": -435.0477600097656, |
|
"logps/rejected": -461.3599548339844, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8669681549072266, |
|
"rewards/margins": 0.5835943222045898, |
|
"rewards/rejected": -1.4505623579025269, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.1616226919460015e-09, |
|
"logits/chosen": -2.339124917984009, |
|
"logits/rejected": -2.259683132171631, |
|
"logps/chosen": -381.10076904296875, |
|
"logps/rejected": -404.74615478515625, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9128230214118958, |
|
"rewards/margins": 0.4604857563972473, |
|
"rewards/rejected": -1.3733086585998535, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 9.045327811449676e-10, |
|
"logits/chosen": -2.357779026031494, |
|
"logits/rejected": -2.281944990158081, |
|
"logps/chosen": -403.49346923828125, |
|
"logps/rejected": -422.0860290527344, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8404678106307983, |
|
"rewards/margins": 0.6726306676864624, |
|
"rewards/rejected": -1.5130985975265503, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.795066710175157e-10, |
|
"logits/chosen": -2.414121150970459, |
|
"logits/rejected": -2.334519863128662, |
|
"logps/chosen": -422.6380920410156, |
|
"logps/rejected": -431.3646545410156, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8720327615737915, |
|
"rewards/margins": 0.5439731478691101, |
|
"rewards/rejected": -1.4160058498382568, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_logits/chosen": -2.434152364730835, |
|
"eval_logits/rejected": -2.358067512512207, |
|
"eval_logps/chosen": -435.0786437988281, |
|
"eval_logps/rejected": -441.68048095703125, |
|
"eval_loss": 0.5856688618659973, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.9003406763076782, |
|
"eval_rewards/margins": 0.4894171357154846, |
|
"eval_rewards/rejected": -1.389757752418518, |
|
"eval_runtime": 249.9152, |
|
"eval_samples_per_second": 8.003, |
|
"eval_steps_per_second": 0.252, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.86573327134282e-10, |
|
"logits/chosen": -2.392698287963867, |
|
"logits/rejected": -2.3596174716949463, |
|
"logps/chosen": -470.7052307128906, |
|
"logps/rejected": -468.05133056640625, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9614042043685913, |
|
"rewards/margins": 0.41344791650772095, |
|
"rewards/rejected": -1.374852180480957, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.2575758405506414e-10, |
|
"logits/chosen": -2.449122428894043, |
|
"logits/rejected": -2.3575618267059326, |
|
"logps/chosen": -462.04248046875, |
|
"logps/rejected": -493.18243408203125, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8269790410995483, |
|
"rewards/margins": 0.6107988357543945, |
|
"rewards/rejected": -1.4377778768539429, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.9708014213221101e-10, |
|
"logits/chosen": -2.3595032691955566, |
|
"logits/rejected": -2.317110538482666, |
|
"logps/chosen": -446.1568298339844, |
|
"logps/rejected": -459.81341552734375, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9512478709220886, |
|
"rewards/margins": 0.4871467649936676, |
|
"rewards/rejected": -1.4383947849273682, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.0055756484589339e-10, |
|
"logits/chosen": -2.350008249282837, |
|
"logits/rejected": -2.2946763038635254, |
|
"logps/chosen": -425.847412109375, |
|
"logps/rejected": -416.67535400390625, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7966060042381287, |
|
"rewards/margins": 0.5134121179580688, |
|
"rewards/rejected": -1.3100181818008423, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.620227667228137e-11, |
|
"logits/chosen": -2.44765043258667, |
|
"logits/rejected": -2.388314723968506, |
|
"logps/chosen": -457.08624267578125, |
|
"logps/rejected": -460.8662109375, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.860503077507019, |
|
"rewards/margins": 0.4813441336154938, |
|
"rewards/rejected": -1.3418471813201904, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.022561484018361e-12, |
|
"logits/chosen": -2.4590046405792236, |
|
"logits/rejected": -2.3513126373291016, |
|
"logps/chosen": -433.288330078125, |
|
"logps/rejected": -440.690673828125, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8339791297912598, |
|
"rewards/margins": 0.594098687171936, |
|
"rewards/rejected": -1.4280778169631958, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2865, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5928295368507478, |
|
"train_runtime": 47453.5759, |
|
"train_samples_per_second": 3.865, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2865, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|