|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.6597137451171875, |
|
"logits/rejected": -2.5902962684631348, |
|
"logps/chosen": -296.01092529296875, |
|
"logps/rejected": -290.09039306640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.624401807785034, |
|
"logits/rejected": -2.58382511138916, |
|
"logps/chosen": -278.5157775878906, |
|
"logps/rejected": -242.1708984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4618055522441864, |
|
"rewards/chosen": 0.00044631207128986716, |
|
"rewards/margins": 0.0005090843187645078, |
|
"rewards/rejected": -6.277219654293731e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.6496779918670654, |
|
"logits/rejected": -2.5882985591888428, |
|
"logps/chosen": -276.2167663574219, |
|
"logps/rejected": -246.029052734375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0011385921388864517, |
|
"rewards/margins": 0.001740106614306569, |
|
"rewards/rejected": -0.0006015143590047956, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6365559101104736, |
|
"logits/rejected": -2.5856800079345703, |
|
"logps/chosen": -291.7817077636719, |
|
"logps/rejected": -274.3924255371094, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.0060002789832651615, |
|
"rewards/margins": 0.009898515418171883, |
|
"rewards/rejected": -0.0038982369005680084, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.5962817668914795, |
|
"logits/rejected": -2.533491611480713, |
|
"logps/chosen": -304.540283203125, |
|
"logps/rejected": -277.9544677734375, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02992216870188713, |
|
"rewards/margins": 0.03784631937742233, |
|
"rewards/rejected": -0.007924148812890053, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -2.550187110900879, |
|
"logits/rejected": -2.5181498527526855, |
|
"logps/chosen": -298.8063049316406, |
|
"logps/rejected": -287.06878662109375, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.0016534685855731368, |
|
"rewards/margins": 0.07625629007816315, |
|
"rewards/rejected": -0.07790975272655487, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.5104401111602783, |
|
"logits/rejected": -2.409574031829834, |
|
"logps/chosen": -286.75982666015625, |
|
"logps/rejected": -255.2505340576172, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.038099173456430435, |
|
"rewards/margins": 0.16247370839118958, |
|
"rewards/rejected": -0.2005728781223297, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -2.5491867065429688, |
|
"logits/rejected": -2.4775824546813965, |
|
"logps/chosen": -305.6695556640625, |
|
"logps/rejected": -294.116455078125, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.25629425048828125, |
|
"rewards/margins": 0.2100234478712082, |
|
"rewards/rejected": -0.466317743062973, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -2.512619972229004, |
|
"logits/rejected": -2.459233522415161, |
|
"logps/chosen": -313.1577453613281, |
|
"logps/rejected": -333.73199462890625, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.26731860637664795, |
|
"rewards/margins": 0.19400887191295624, |
|
"rewards/rejected": -0.4613274931907654, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.453090190887451, |
|
"logits/rejected": -2.3753929138183594, |
|
"logps/chosen": -286.51220703125, |
|
"logps/rejected": -299.60174560546875, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.2537994682788849, |
|
"rewards/margins": 0.3461161255836487, |
|
"rewards/rejected": -0.5999155044555664, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -2.481619358062744, |
|
"logits/rejected": -2.3836333751678467, |
|
"logps/chosen": -316.19061279296875, |
|
"logps/rejected": -341.30596923828125, |
|
"loss": 0.5736, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3343900740146637, |
|
"rewards/margins": 0.4206913113594055, |
|
"rewards/rejected": -0.755081295967102, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.432966470718384, |
|
"eval_logits/rejected": -2.3509771823883057, |
|
"eval_logps/chosen": -322.4753723144531, |
|
"eval_logps/rejected": -346.4595947265625, |
|
"eval_loss": 0.584247350692749, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.3836560547351837, |
|
"eval_rewards/margins": 0.46315449476242065, |
|
"eval_rewards/rejected": -0.846810519695282, |
|
"eval_runtime": 210.4195, |
|
"eval_samples_per_second": 9.505, |
|
"eval_steps_per_second": 0.299, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -2.392451763153076, |
|
"logits/rejected": -2.2609496116638184, |
|
"logps/chosen": -329.49127197265625, |
|
"logps/rejected": -338.2959289550781, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.32086434960365295, |
|
"rewards/margins": 0.4109874665737152, |
|
"rewards/rejected": -0.7318518161773682, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -1.5522091388702393, |
|
"logits/rejected": -1.2740141153335571, |
|
"logps/chosen": -365.6708984375, |
|
"logps/rejected": -378.64117431640625, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.8018746376037598, |
|
"rewards/margins": 0.6111066341400146, |
|
"rewards/rejected": -1.4129812717437744, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -1.4644163846969604, |
|
"logits/rejected": -1.1872146129608154, |
|
"logps/chosen": -324.7268371582031, |
|
"logps/rejected": -374.5579833984375, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4890199601650238, |
|
"rewards/margins": 0.5601330995559692, |
|
"rewards/rejected": -1.0491530895233154, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -0.9067952036857605, |
|
"logits/rejected": -0.45260438323020935, |
|
"logps/chosen": -365.0186462402344, |
|
"logps/rejected": -414.08526611328125, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.6849763989448547, |
|
"rewards/margins": 0.7878872752189636, |
|
"rewards/rejected": -1.4728636741638184, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -0.6669697761535645, |
|
"logits/rejected": -0.12073500454425812, |
|
"logps/chosen": -363.00494384765625, |
|
"logps/rejected": -401.21533203125, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.7877362966537476, |
|
"rewards/margins": 0.7080036401748657, |
|
"rewards/rejected": -1.4957398176193237, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -0.41461697220802307, |
|
"logits/rejected": 0.06870967149734497, |
|
"logps/chosen": -365.81103515625, |
|
"logps/rejected": -428.4544982910156, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8387645483016968, |
|
"rewards/margins": 0.7532976269721985, |
|
"rewards/rejected": -1.59206223487854, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -0.7587008476257324, |
|
"logits/rejected": -0.3191295862197876, |
|
"logps/chosen": -342.67425537109375, |
|
"logps/rejected": -367.0728759765625, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.8000960350036621, |
|
"rewards/margins": 0.5834600329399109, |
|
"rewards/rejected": -1.3835561275482178, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -0.17186331748962402, |
|
"logits/rejected": 0.25430601835250854, |
|
"logps/chosen": -365.8907165527344, |
|
"logps/rejected": -429.11688232421875, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.884574294090271, |
|
"rewards/margins": 0.7979339361190796, |
|
"rewards/rejected": -1.6825082302093506, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": 0.03502330183982849, |
|
"logits/rejected": 0.6186638474464417, |
|
"logps/chosen": -348.1022644042969, |
|
"logps/rejected": -428.8955993652344, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7972752451896667, |
|
"rewards/margins": 0.9095140695571899, |
|
"rewards/rejected": -1.706789255142212, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -0.39503103494644165, |
|
"logits/rejected": 0.1367359161376953, |
|
"logps/chosen": -365.77545166015625, |
|
"logps/rejected": -400.91998291015625, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.7570338249206543, |
|
"rewards/margins": 0.7489927411079407, |
|
"rewards/rejected": -1.5060265064239502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.04591968283057213, |
|
"eval_logits/rejected": 0.5012260675430298, |
|
"eval_logps/chosen": -374.5287780761719, |
|
"eval_logps/rejected": -435.0859375, |
|
"eval_loss": 0.5307875871658325, |
|
"eval_rewards/accuracies": 0.7519841194152832, |
|
"eval_rewards/chosen": -0.904190182685852, |
|
"eval_rewards/margins": 0.8288835883140564, |
|
"eval_rewards/rejected": -1.7330738306045532, |
|
"eval_runtime": 211.7151, |
|
"eval_samples_per_second": 9.447, |
|
"eval_steps_per_second": 0.298, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": 0.21950086951255798, |
|
"logits/rejected": 0.7312016487121582, |
|
"logps/chosen": -394.24078369140625, |
|
"logps/rejected": -439.376220703125, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -1.0525823831558228, |
|
"rewards/margins": 0.7364203929901123, |
|
"rewards/rejected": -1.789002776145935, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": 0.034196797758340836, |
|
"logits/rejected": 0.35187411308288574, |
|
"logps/chosen": -332.6381530761719, |
|
"logps/rejected": -382.64434814453125, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.7204285860061646, |
|
"rewards/margins": 0.6646216511726379, |
|
"rewards/rejected": -1.3850500583648682, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": 0.3270489573478699, |
|
"logits/rejected": 0.8940129280090332, |
|
"logps/chosen": -341.1247253417969, |
|
"logps/rejected": -408.52337646484375, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.765308141708374, |
|
"rewards/margins": 0.8177844882011414, |
|
"rewards/rejected": -1.583092451095581, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": 0.8036887049674988, |
|
"logits/rejected": 1.329053521156311, |
|
"logps/chosen": -384.66473388671875, |
|
"logps/rejected": -448.1686096191406, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1919479370117188, |
|
"rewards/margins": 0.8430492281913757, |
|
"rewards/rejected": -2.0349972248077393, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": 0.51897132396698, |
|
"logits/rejected": 1.1455881595611572, |
|
"logps/chosen": -373.64044189453125, |
|
"logps/rejected": -425.75494384765625, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.9999877214431763, |
|
"rewards/margins": 0.7631025314331055, |
|
"rewards/rejected": -1.7630901336669922, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": 0.5749747157096863, |
|
"logits/rejected": 1.220221757888794, |
|
"logps/chosen": -371.4618225097656, |
|
"logps/rejected": -424.69439697265625, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.9122276306152344, |
|
"rewards/margins": 0.8183482885360718, |
|
"rewards/rejected": -1.7305759191513062, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": 0.8650910258293152, |
|
"logits/rejected": 1.3838642835617065, |
|
"logps/chosen": -398.350341796875, |
|
"logps/rejected": -484.79547119140625, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.091962456703186, |
|
"rewards/margins": 0.9698148965835571, |
|
"rewards/rejected": -2.061777353286743, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": 0.9545858502388, |
|
"logits/rejected": 1.6210914850234985, |
|
"logps/chosen": -365.74053955078125, |
|
"logps/rejected": -422.39300537109375, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.9749383926391602, |
|
"rewards/margins": 0.841883659362793, |
|
"rewards/rejected": -1.8168220520019531, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": 1.363952875137329, |
|
"logits/rejected": 2.0388360023498535, |
|
"logps/chosen": -385.5817565917969, |
|
"logps/rejected": -436.33209228515625, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -1.1055189371109009, |
|
"rewards/margins": 0.77390056848526, |
|
"rewards/rejected": -1.8794195652008057, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": 1.3142567873001099, |
|
"logits/rejected": 1.9236018657684326, |
|
"logps/chosen": -394.31964111328125, |
|
"logps/rejected": -467.43756103515625, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9480918049812317, |
|
"rewards/margins": 0.7692841291427612, |
|
"rewards/rejected": -1.7173759937286377, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 1.1385366916656494, |
|
"eval_logits/rejected": 1.822389006614685, |
|
"eval_logps/chosen": -372.8834228515625, |
|
"eval_logps/rejected": -436.4477844238281, |
|
"eval_loss": 0.508425235748291, |
|
"eval_rewards/accuracies": 0.7638888955116272, |
|
"eval_rewards/chosen": -0.887736976146698, |
|
"eval_rewards/margins": 0.8589555621147156, |
|
"eval_rewards/rejected": -1.746692419052124, |
|
"eval_runtime": 211.1789, |
|
"eval_samples_per_second": 9.471, |
|
"eval_steps_per_second": 0.298, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": 1.0226597785949707, |
|
"logits/rejected": 1.96248459815979, |
|
"logps/chosen": -367.84130859375, |
|
"logps/rejected": -404.55963134765625, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9431735277175903, |
|
"rewards/margins": 0.8295876383781433, |
|
"rewards/rejected": -1.7727611064910889, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": 1.6019757986068726, |
|
"logits/rejected": 2.321258068084717, |
|
"logps/chosen": -374.2918701171875, |
|
"logps/rejected": -456.11602783203125, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -1.0117809772491455, |
|
"rewards/margins": 0.9862319231033325, |
|
"rewards/rejected": -1.998012900352478, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": 1.089303970336914, |
|
"logits/rejected": 1.724713921546936, |
|
"logps/chosen": -422.41314697265625, |
|
"logps/rejected": -462.42999267578125, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.184761881828308, |
|
"rewards/margins": 0.7901886701583862, |
|
"rewards/rejected": -1.9749505519866943, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": 1.2003899812698364, |
|
"logits/rejected": 1.7042887210845947, |
|
"logps/chosen": -374.2175598144531, |
|
"logps/rejected": -423.61114501953125, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0190467834472656, |
|
"rewards/margins": 0.6825306415557861, |
|
"rewards/rejected": -1.7015774250030518, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": 1.183173418045044, |
|
"logits/rejected": 2.0356898307800293, |
|
"logps/chosen": -359.0115661621094, |
|
"logps/rejected": -438.88336181640625, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -1.0118539333343506, |
|
"rewards/margins": 0.9436109662055969, |
|
"rewards/rejected": -1.9554650783538818, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 1.1831741333007812, |
|
"logits/rejected": 1.9693758487701416, |
|
"logps/chosen": -408.16705322265625, |
|
"logps/rejected": -451.48370361328125, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.0999749898910522, |
|
"rewards/margins": 0.8795393705368042, |
|
"rewards/rejected": -1.9795143604278564, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": 1.1307913064956665, |
|
"logits/rejected": 2.096768856048584, |
|
"logps/chosen": -413.69805908203125, |
|
"logps/rejected": -458.0303649902344, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0460116863250732, |
|
"rewards/margins": 0.9790540933609009, |
|
"rewards/rejected": -2.0250658988952637, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": 0.9909412264823914, |
|
"logits/rejected": 1.6467933654785156, |
|
"logps/chosen": -381.5520324707031, |
|
"logps/rejected": -453.1272888183594, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0082350969314575, |
|
"rewards/margins": 0.8770313262939453, |
|
"rewards/rejected": -1.8852663040161133, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": 1.356707215309143, |
|
"logits/rejected": 1.9564104080200195, |
|
"logps/chosen": -383.09393310546875, |
|
"logps/rejected": -455.9044494628906, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0799481868743896, |
|
"rewards/margins": 0.7995551228523254, |
|
"rewards/rejected": -1.879503607749939, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": 1.2665964365005493, |
|
"logits/rejected": 1.9937832355499268, |
|
"logps/chosen": -441.0684509277344, |
|
"logps/rejected": -508.45269775390625, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.202941656112671, |
|
"rewards/margins": 1.0496307611465454, |
|
"rewards/rejected": -2.2525722980499268, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.3074696063995361, |
|
"eval_logits/rejected": 1.9977563619613647, |
|
"eval_logps/chosen": -403.6374816894531, |
|
"eval_logps/rejected": -476.98516845703125, |
|
"eval_loss": 0.5037237405776978, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": -1.195277214050293, |
|
"eval_rewards/margins": 0.9567888975143433, |
|
"eval_rewards/rejected": -2.152066230773926, |
|
"eval_runtime": 212.3326, |
|
"eval_samples_per_second": 9.419, |
|
"eval_steps_per_second": 0.297, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": 1.6189842224121094, |
|
"logits/rejected": 2.477853775024414, |
|
"logps/chosen": -386.3858947753906, |
|
"logps/rejected": -442.0225524902344, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.2031550407409668, |
|
"rewards/margins": 0.845988929271698, |
|
"rewards/rejected": -2.0491440296173096, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": 1.4740936756134033, |
|
"logits/rejected": 1.9605319499969482, |
|
"logps/chosen": -398.62274169921875, |
|
"logps/rejected": -460.81201171875, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -1.1762058734893799, |
|
"rewards/margins": 0.8007314801216125, |
|
"rewards/rejected": -1.9769372940063477, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": 1.1320947408676147, |
|
"logits/rejected": 1.8564393520355225, |
|
"logps/chosen": -397.1152648925781, |
|
"logps/rejected": -444.3277282714844, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.07761549949646, |
|
"rewards/margins": 0.886116623878479, |
|
"rewards/rejected": -1.963732361793518, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": 1.1670544147491455, |
|
"logits/rejected": 1.9005542993545532, |
|
"logps/chosen": -362.5390319824219, |
|
"logps/rejected": -448.8984375, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.019899606704712, |
|
"rewards/margins": 0.9710233807563782, |
|
"rewards/rejected": -1.9909226894378662, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": 1.165825605392456, |
|
"logits/rejected": 1.7851331233978271, |
|
"logps/chosen": -393.0291442871094, |
|
"logps/rejected": -466.2897033691406, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1060134172439575, |
|
"rewards/margins": 0.8082348704338074, |
|
"rewards/rejected": -1.9142482280731201, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": 1.1381809711456299, |
|
"logits/rejected": 2.048168897628784, |
|
"logps/chosen": -380.725830078125, |
|
"logps/rejected": -454.0580139160156, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.0911556482315063, |
|
"rewards/margins": 0.9514607191085815, |
|
"rewards/rejected": -2.042616367340088, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": 0.9774567484855652, |
|
"logits/rejected": 1.967919111251831, |
|
"logps/chosen": -411.60504150390625, |
|
"logps/rejected": -474.35650634765625, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.1035670042037964, |
|
"rewards/margins": 0.9632620811462402, |
|
"rewards/rejected": -2.066829204559326, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.540120158305458, |
|
"train_runtime": 18392.9814, |
|
"train_samples_per_second": 3.324, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|