|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9989071038251366, |
|
"eval_steps": 400, |
|
"global_step": 457, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01092896174863388, |
|
"grad_norm": 315.2451683922351, |
|
"learning_rate": 1.0869565217391303e-07, |
|
"logits/chosen": -1.0065257549285889, |
|
"logits/rejected": -1.0008176565170288, |
|
"logps/chosen": -0.28065255284309387, |
|
"logps/rejected": -0.28539329767227173, |
|
"loss": 3.4114, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.806525468826294, |
|
"rewards/margins": 0.04740738496184349, |
|
"rewards/rejected": -2.8539328575134277, |
|
"semantic_entropy": 0.7513969540596008, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"grad_norm": 181.1484663719842, |
|
"learning_rate": 2.1739130434782607e-07, |
|
"logits/chosen": -1.0534369945526123, |
|
"logits/rejected": -1.0029994249343872, |
|
"logps/chosen": -0.2570807933807373, |
|
"logps/rejected": -0.27113229036331177, |
|
"loss": 3.3911, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.570807933807373, |
|
"rewards/margins": 0.14051488041877747, |
|
"rewards/rejected": -2.711322784423828, |
|
"semantic_entropy": 0.710273802280426, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03278688524590164, |
|
"grad_norm": 178.65067568018998, |
|
"learning_rate": 3.260869565217391e-07, |
|
"logits/chosen": -1.0082308053970337, |
|
"logits/rejected": -0.9609392285346985, |
|
"logps/chosen": -0.26744094491004944, |
|
"logps/rejected": -0.27332359552383423, |
|
"loss": 3.3533, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.6744089126586914, |
|
"rewards/margins": 0.05882669612765312, |
|
"rewards/rejected": -2.7332358360290527, |
|
"semantic_entropy": 0.7273439168930054, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 258.46792679447157, |
|
"learning_rate": 4.3478260869565214e-07, |
|
"logits/chosen": -0.9462105631828308, |
|
"logits/rejected": -0.8957524299621582, |
|
"logps/chosen": -0.27257752418518066, |
|
"logps/rejected": -0.2848864197731018, |
|
"loss": 3.3976, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.7257750034332275, |
|
"rewards/margins": 0.12308906018733978, |
|
"rewards/rejected": -2.8488640785217285, |
|
"semantic_entropy": 0.7455072999000549, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0546448087431694, |
|
"grad_norm": 273.0706988791117, |
|
"learning_rate": 5.434782608695652e-07, |
|
"logits/chosen": -0.9422909617424011, |
|
"logits/rejected": -0.8697713613510132, |
|
"logps/chosen": -0.2761459946632385, |
|
"logps/rejected": -0.2941877543926239, |
|
"loss": 3.346, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.761460065841675, |
|
"rewards/margins": 0.18041765689849854, |
|
"rewards/rejected": -2.9418773651123047, |
|
"semantic_entropy": 0.7553174495697021, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 260.6158887162735, |
|
"learning_rate": 6.521739130434782e-07, |
|
"logits/chosen": -1.0548616647720337, |
|
"logits/rejected": -0.9892600774765015, |
|
"logps/chosen": -0.2682558596134186, |
|
"logps/rejected": -0.284037709236145, |
|
"loss": 3.4058, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.682558536529541, |
|
"rewards/margins": 0.1578185111284256, |
|
"rewards/rejected": -2.8403773307800293, |
|
"semantic_entropy": 0.7250551581382751, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07650273224043716, |
|
"grad_norm": 124.4121289092538, |
|
"learning_rate": 7.608695652173913e-07, |
|
"logits/chosen": -1.0096337795257568, |
|
"logits/rejected": -0.9423675537109375, |
|
"logps/chosen": -0.2600244879722595, |
|
"logps/rejected": -0.27900081872940063, |
|
"loss": 3.313, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.6002449989318848, |
|
"rewards/margins": 0.18976299464702606, |
|
"rewards/rejected": -2.790008068084717, |
|
"semantic_entropy": 0.7209498882293701, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 147.2873491379567, |
|
"learning_rate": 8.695652173913043e-07, |
|
"logits/chosen": -0.9600120782852173, |
|
"logits/rejected": -0.8983286619186401, |
|
"logps/chosen": -0.2835314869880676, |
|
"logps/rejected": -0.2980334460735321, |
|
"loss": 3.454, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.835314989089966, |
|
"rewards/margins": 0.1450195610523224, |
|
"rewards/rejected": -2.980334520339966, |
|
"semantic_entropy": 0.7609063386917114, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"grad_norm": 93.94573860721647, |
|
"learning_rate": 9.782608695652173e-07, |
|
"logits/chosen": -1.0171349048614502, |
|
"logits/rejected": -0.9333709478378296, |
|
"logps/chosen": -0.28658169507980347, |
|
"logps/rejected": -0.3058907389640808, |
|
"loss": 3.1741, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.865816831588745, |
|
"rewards/margins": 0.1930905282497406, |
|
"rewards/rejected": -3.0589072704315186, |
|
"semantic_entropy": 0.7610034346580505, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 286.4609615603791, |
|
"learning_rate": 9.997663088532014e-07, |
|
"logits/chosen": -0.9543835520744324, |
|
"logits/rejected": -0.8730956315994263, |
|
"logps/chosen": -0.2823755145072937, |
|
"logps/rejected": -0.2902544140815735, |
|
"loss": 3.3278, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -2.8237555027008057, |
|
"rewards/margins": 0.07878823578357697, |
|
"rewards/rejected": -2.902543544769287, |
|
"semantic_entropy": 0.7529318332672119, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12021857923497267, |
|
"grad_norm": 122.74091082377944, |
|
"learning_rate": 9.98817312944725e-07, |
|
"logits/chosen": -0.9809161424636841, |
|
"logits/rejected": -0.8649328947067261, |
|
"logps/chosen": -0.2803560495376587, |
|
"logps/rejected": -0.3149644732475281, |
|
"loss": 3.1758, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.803560256958008, |
|
"rewards/margins": 0.3460845947265625, |
|
"rewards/rejected": -3.1496450901031494, |
|
"semantic_entropy": 0.7631497979164124, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13114754098360656, |
|
"grad_norm": 87.3444735452583, |
|
"learning_rate": 9.971397915250336e-07, |
|
"logits/chosen": -1.033070683479309, |
|
"logits/rejected": -0.9856836199760437, |
|
"logps/chosen": -0.2708079218864441, |
|
"logps/rejected": -0.31113672256469727, |
|
"loss": 3.0283, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.7080790996551514, |
|
"rewards/margins": 0.40328770875930786, |
|
"rewards/rejected": -3.1113669872283936, |
|
"semantic_entropy": 0.7593907713890076, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14207650273224043, |
|
"grad_norm": 162.53486819318002, |
|
"learning_rate": 9.94736194623663e-07, |
|
"logits/chosen": -1.0021493434906006, |
|
"logits/rejected": -0.9318512082099915, |
|
"logps/chosen": -0.31085288524627686, |
|
"logps/rejected": -0.3405633866786957, |
|
"loss": 3.2559, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.1085288524627686, |
|
"rewards/margins": 0.29710477590560913, |
|
"rewards/rejected": -3.4056334495544434, |
|
"semantic_entropy": 0.8082467317581177, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15300546448087432, |
|
"grad_norm": 348.65729079716573, |
|
"learning_rate": 9.916100327075037e-07, |
|
"logits/chosen": -0.9440506100654602, |
|
"logits/rejected": -0.9250672459602356, |
|
"logps/chosen": -0.29765281081199646, |
|
"logps/rejected": -0.3226909935474396, |
|
"loss": 3.0703, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.9765281677246094, |
|
"rewards/margins": 0.2503815293312073, |
|
"rewards/rejected": -3.22691011428833, |
|
"semantic_entropy": 0.7739163041114807, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16393442622950818, |
|
"grad_norm": 78.90080101900976, |
|
"learning_rate": 9.877658715537428e-07, |
|
"logits/chosen": -0.9282974004745483, |
|
"logits/rejected": -0.912223219871521, |
|
"logps/chosen": -0.3213742971420288, |
|
"logps/rejected": -0.351571649312973, |
|
"loss": 3.1657, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -3.213742733001709, |
|
"rewards/margins": 0.30197376012802124, |
|
"rewards/rejected": -3.515717029571533, |
|
"semantic_entropy": 0.8054312467575073, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17486338797814208, |
|
"grad_norm": 86.95229439852206, |
|
"learning_rate": 9.832093255815216e-07, |
|
"logits/chosen": -0.9330040812492371, |
|
"logits/rejected": -0.8699474334716797, |
|
"logps/chosen": -0.3141597807407379, |
|
"logps/rejected": -0.3350343406200409, |
|
"loss": 3.1277, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -3.1415977478027344, |
|
"rewards/margins": 0.20874571800231934, |
|
"rewards/rejected": -3.350343704223633, |
|
"semantic_entropy": 0.7814024686813354, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18579234972677597, |
|
"grad_norm": 129.31831333568232, |
|
"learning_rate": 9.779470496520441e-07, |
|
"logits/chosen": -0.9329907298088074, |
|
"logits/rejected": -0.8826324343681335, |
|
"logps/chosen": -0.31158381700515747, |
|
"logps/rejected": -0.3667066693305969, |
|
"loss": 2.9946, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -3.1158382892608643, |
|
"rewards/margins": 0.5512284636497498, |
|
"rewards/rejected": -3.667067050933838, |
|
"semantic_entropy": 0.802832305431366, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19672131147540983, |
|
"grad_norm": 101.69393552819945, |
|
"learning_rate": 9.719867293491144e-07, |
|
"logits/chosen": -1.0058772563934326, |
|
"logits/rejected": -0.9232236742973328, |
|
"logps/chosen": -0.33966144919395447, |
|
"logps/rejected": -0.37590381503105164, |
|
"loss": 3.0893, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -3.3966145515441895, |
|
"rewards/margins": 0.3624236285686493, |
|
"rewards/rejected": -3.759038209915161, |
|
"semantic_entropy": 0.8507563471794128, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20765027322404372, |
|
"grad_norm": 91.30230852825771, |
|
"learning_rate": 9.653370697542987e-07, |
|
"logits/chosen": -0.9487398862838745, |
|
"logits/rejected": -0.9483828544616699, |
|
"logps/chosen": -0.3381520211696625, |
|
"logps/rejected": -0.362968385219574, |
|
"loss": 2.9005, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.3815205097198486, |
|
"rewards/margins": 0.24816343188285828, |
|
"rewards/rejected": -3.62968373298645, |
|
"semantic_entropy": 0.8359481692314148, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 108.89056387914384, |
|
"learning_rate": 9.580077827331037e-07, |
|
"logits/chosen": -0.9571771621704102, |
|
"logits/rejected": -0.9095252752304077, |
|
"logps/chosen": -0.3725859522819519, |
|
"logps/rejected": -0.4279399514198303, |
|
"loss": 2.8907, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -3.7258598804473877, |
|
"rewards/margins": 0.553540050983429, |
|
"rewards/rejected": -4.2793989181518555, |
|
"semantic_entropy": 0.8787097930908203, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22950819672131148, |
|
"grad_norm": 127.65723295420393, |
|
"learning_rate": 9.500095727507419e-07, |
|
"logits/chosen": -1.008998155593872, |
|
"logits/rejected": -0.9757212400436401, |
|
"logps/chosen": -0.3544849753379822, |
|
"logps/rejected": -0.3977915942668915, |
|
"loss": 2.9862, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.5448498725891113, |
|
"rewards/margins": 0.4330664277076721, |
|
"rewards/rejected": -3.9779160022735596, |
|
"semantic_entropy": 0.8548823595046997, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.24043715846994534, |
|
"grad_norm": 114.56272891566377, |
|
"learning_rate": 9.413541212382004e-07, |
|
"logits/chosen": -0.9939772486686707, |
|
"logits/rejected": -0.9762369990348816, |
|
"logps/chosen": -0.3617566227912903, |
|
"logps/rejected": -0.4416491985321045, |
|
"loss": 2.8457, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -3.617565870285034, |
|
"rewards/margins": 0.7989261746406555, |
|
"rewards/rejected": -4.416492462158203, |
|
"semantic_entropy": 0.8938226699829102, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25136612021857924, |
|
"grad_norm": 93.3647181422965, |
|
"learning_rate": 9.320540695314438e-07, |
|
"logits/chosen": -1.007943868637085, |
|
"logits/rejected": -0.9657400846481323, |
|
"logps/chosen": -0.3646220564842224, |
|
"logps/rejected": -0.46302324533462524, |
|
"loss": 2.7542, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.6462206840515137, |
|
"rewards/margins": 0.9840116500854492, |
|
"rewards/rejected": -4.630232334136963, |
|
"semantic_entropy": 0.878681480884552, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.26229508196721313, |
|
"grad_norm": 83.92874813803249, |
|
"learning_rate": 9.221230004086721e-07, |
|
"logits/chosen": -1.0344518423080444, |
|
"logits/rejected": -0.9601195454597473, |
|
"logps/chosen": -0.37165606021881104, |
|
"logps/rejected": -0.42726248502731323, |
|
"loss": 2.8688, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -3.7165608406066895, |
|
"rewards/margins": 0.5560643076896667, |
|
"rewards/rejected": -4.272624969482422, |
|
"semantic_entropy": 0.8952873945236206, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.273224043715847, |
|
"grad_norm": 190.904470995411, |
|
"learning_rate": 9.11575418252596e-07, |
|
"logits/chosen": -0.9347244501113892, |
|
"logits/rejected": -0.8975458145141602, |
|
"logps/chosen": -0.38535335659980774, |
|
"logps/rejected": -0.4541945457458496, |
|
"loss": 2.7207, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.853533983230591, |
|
"rewards/margins": 0.68841153383255, |
|
"rewards/rejected": -4.541945457458496, |
|
"semantic_entropy": 0.9004859924316406, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28415300546448086, |
|
"grad_norm": 92.76045922249655, |
|
"learning_rate": 9.004267278667031e-07, |
|
"logits/chosen": -0.9624107480049133, |
|
"logits/rejected": -0.9534618258476257, |
|
"logps/chosen": -0.4056780934333801, |
|
"logps/rejected": -0.5245551466941833, |
|
"loss": 2.7139, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -4.056780815124512, |
|
"rewards/margins": 1.1887714862823486, |
|
"rewards/rejected": -5.245552062988281, |
|
"semantic_entropy": 0.8837997317314148, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29508196721311475, |
|
"grad_norm": 73.9902335755549, |
|
"learning_rate": 8.886932119764565e-07, |
|
"logits/chosen": -1.0003821849822998, |
|
"logits/rejected": -0.9125338792800903, |
|
"logps/chosen": -0.3864729106426239, |
|
"logps/rejected": -0.4857531189918518, |
|
"loss": 2.6137, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.8647284507751465, |
|
"rewards/margins": 0.9928020238876343, |
|
"rewards/rejected": -4.8575310707092285, |
|
"semantic_entropy": 0.9038194417953491, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30601092896174864, |
|
"grad_norm": 86.17327659708472, |
|
"learning_rate": 8.763920074482809e-07, |
|
"logits/chosen": -0.9963301420211792, |
|
"logits/rejected": -0.9396141767501831, |
|
"logps/chosen": -0.4139133095741272, |
|
"logps/rejected": -0.5436104536056519, |
|
"loss": 2.4158, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -4.139132976531982, |
|
"rewards/margins": 1.296971082687378, |
|
"rewards/rejected": -5.4361042976379395, |
|
"semantic_entropy": 0.9314233064651489, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31693989071038253, |
|
"grad_norm": 95.91548705026597, |
|
"learning_rate": 8.635410802610723e-07, |
|
"logits/chosen": -0.9837471842765808, |
|
"logits/rejected": -0.96197909116745, |
|
"logps/chosen": -0.3892672657966614, |
|
"logps/rejected": -0.4463191032409668, |
|
"loss": 2.5469, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.8926727771759033, |
|
"rewards/margins": 0.5705188512802124, |
|
"rewards/rejected": -4.463191986083984, |
|
"semantic_entropy": 0.9131715893745422, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 62.831854072245996, |
|
"learning_rate": 8.501591992667849e-07, |
|
"logits/chosen": -1.0432965755462646, |
|
"logits/rejected": -1.0063092708587646, |
|
"logps/chosen": -0.42201298475265503, |
|
"logps/rejected": -0.5824503302574158, |
|
"loss": 2.4081, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -4.22012996673584, |
|
"rewards/margins": 1.6043736934661865, |
|
"rewards/rejected": -5.8245038986206055, |
|
"semantic_entropy": 0.9204033613204956, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33879781420765026, |
|
"grad_norm": 71.34642946039108, |
|
"learning_rate": 8.362659087784152e-07, |
|
"logits/chosen": -1.0033342838287354, |
|
"logits/rejected": -0.943057656288147, |
|
"logps/chosen": -0.4163185656070709, |
|
"logps/rejected": -0.5151209831237793, |
|
"loss": 2.5079, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -4.163184642791748, |
|
"rewards/margins": 0.9880247116088867, |
|
"rewards/rejected": -5.151209831237793, |
|
"semantic_entropy": 0.9188691973686218, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34972677595628415, |
|
"grad_norm": 95.49736374278223, |
|
"learning_rate": 8.218815000254231e-07, |
|
"logits/chosen": -1.036727786064148, |
|
"logits/rejected": -0.9749704599380493, |
|
"logps/chosen": -0.46870869398117065, |
|
"logps/rejected": -0.5485578775405884, |
|
"loss": 2.5679, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.68708610534668, |
|
"rewards/margins": 0.7984916567802429, |
|
"rewards/rejected": -5.4855780601501465, |
|
"semantic_entropy": 0.9369996786117554, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36065573770491804, |
|
"grad_norm": 90.06304805222751, |
|
"learning_rate": 8.07026981518276e-07, |
|
"logits/chosen": -1.0219743251800537, |
|
"logits/rejected": -0.9637954831123352, |
|
"logps/chosen": -0.4483868181705475, |
|
"logps/rejected": -0.5347827076911926, |
|
"loss": 2.5189, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -4.48386812210083, |
|
"rewards/margins": 0.8639583587646484, |
|
"rewards/rejected": -5.3478264808654785, |
|
"semantic_entropy": 0.9511811137199402, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.37158469945355194, |
|
"grad_norm": 99.59126068255084, |
|
"learning_rate": 7.917240483654e-07, |
|
"logits/chosen": -1.01731276512146, |
|
"logits/rejected": -0.9495924115180969, |
|
"logps/chosen": -0.44470348954200745, |
|
"logps/rejected": -0.5198506712913513, |
|
"loss": 2.6168, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.447035312652588, |
|
"rewards/margins": 0.7514716386795044, |
|
"rewards/rejected": -5.198506832122803, |
|
"semantic_entropy": 0.9501636624336243, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3825136612021858, |
|
"grad_norm": 91.44599232569668, |
|
"learning_rate": 7.759950505873521e-07, |
|
"logits/chosen": -1.067455768585205, |
|
"logits/rejected": -1.031198263168335, |
|
"logps/chosen": -0.4639251232147217, |
|
"logps/rejected": -0.5280762910842896, |
|
"loss": 2.4665, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -4.639250755310059, |
|
"rewards/margins": 0.6415112614631653, |
|
"rewards/rejected": -5.280762672424316, |
|
"semantic_entropy": 0.9261935949325562, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 90.82885753738844, |
|
"learning_rate": 7.598629604744872e-07, |
|
"logits/chosen": -1.0707954168319702, |
|
"logits/rejected": -1.0595567226409912, |
|
"logps/chosen": -0.43981847167015076, |
|
"logps/rejected": -0.5758017897605896, |
|
"loss": 2.3437, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.398184776306152, |
|
"rewards/margins": 1.3598332405090332, |
|
"rewards/rejected": -5.7580180168151855, |
|
"semantic_entropy": 0.966151237487793, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.40437158469945356, |
|
"grad_norm": 67.92403731887116, |
|
"learning_rate": 7.433513390357989e-07, |
|
"logits/chosen": -1.108884572982788, |
|
"logits/rejected": -1.1143901348114014, |
|
"logps/chosen": -0.46474918723106384, |
|
"logps/rejected": -0.5912537574768066, |
|
"loss": 2.3628, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.647492408752441, |
|
"rewards/margins": 1.265044927597046, |
|
"rewards/rejected": -5.912537097930908, |
|
"semantic_entropy": 0.9750612378120422, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.41530054644808745, |
|
"grad_norm": 108.44092763175198, |
|
"learning_rate": 7.264843015879321e-07, |
|
"logits/chosen": -1.1020928621292114, |
|
"logits/rejected": -1.0545780658721924, |
|
"logps/chosen": -0.4519892632961273, |
|
"logps/rejected": -0.6003154516220093, |
|
"loss": 2.4166, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -4.51989221572876, |
|
"rewards/margins": 1.483262062072754, |
|
"rewards/rejected": -6.003154754638672, |
|
"semantic_entropy": 0.9666361808776855, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4262295081967213, |
|
"grad_norm": 83.4733675057699, |
|
"learning_rate": 7.092864825346266e-07, |
|
"logits/chosen": -1.129482626914978, |
|
"logits/rejected": -1.0993843078613281, |
|
"logps/chosen": -0.5358282327651978, |
|
"logps/rejected": -0.7053772211074829, |
|
"loss": 2.467, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -5.358282566070557, |
|
"rewards/margins": 1.6954904794692993, |
|
"rewards/rejected": -7.053772926330566, |
|
"semantic_entropy": 0.9725033044815063, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 84.36629950168863, |
|
"learning_rate": 6.917829993880302e-07, |
|
"logits/chosen": -1.1204617023468018, |
|
"logits/rejected": -1.0279228687286377, |
|
"logps/chosen": -0.5025330185890198, |
|
"logps/rejected": -0.6288330554962158, |
|
"loss": 2.3371, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.025330543518066, |
|
"rewards/margins": 1.263000249862671, |
|
"rewards/rejected": -6.288330078125, |
|
"semantic_entropy": 0.9786258935928345, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44808743169398907, |
|
"grad_norm": 69.08292746057649, |
|
"learning_rate": 6.739994160844309e-07, |
|
"logits/chosen": -1.0733792781829834, |
|
"logits/rejected": -1.0833173990249634, |
|
"logps/chosen": -0.4674602448940277, |
|
"logps/rejected": -0.6061697006225586, |
|
"loss": 2.2298, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.67460298538208, |
|
"rewards/margins": 1.3870941400527954, |
|
"rewards/rejected": -6.061697006225586, |
|
"semantic_entropy": 0.9808717966079712, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.45901639344262296, |
|
"grad_norm": 173.88772431903314, |
|
"learning_rate": 6.559617056479827e-07, |
|
"logits/chosen": -1.1001962423324585, |
|
"logits/rejected": -1.0926573276519775, |
|
"logps/chosen": -0.5027323961257935, |
|
"logps/rejected": -0.6716328263282776, |
|
"loss": 2.2974, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -5.027324676513672, |
|
"rewards/margins": 1.6890045404434204, |
|
"rewards/rejected": -6.7163286209106445, |
|
"semantic_entropy": 0.9590319395065308, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46994535519125685, |
|
"grad_norm": 94.73792447265116, |
|
"learning_rate": 6.376962122569567e-07, |
|
"logits/chosen": -1.1140978336334229, |
|
"logits/rejected": -1.0545861721038818, |
|
"logps/chosen": -0.5114679336547852, |
|
"logps/rejected": -0.6812509298324585, |
|
"loss": 2.4425, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -5.114679336547852, |
|
"rewards/margins": 1.6978304386138916, |
|
"rewards/rejected": -6.812510013580322, |
|
"semantic_entropy": 1.0018761157989502, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4808743169398907, |
|
"grad_norm": 92.5570818396132, |
|
"learning_rate": 6.192296127679192e-07, |
|
"logits/chosen": -1.1659886837005615, |
|
"logits/rejected": -1.1348073482513428, |
|
"logps/chosen": -0.5338795781135559, |
|
"logps/rejected": -0.6664601564407349, |
|
"loss": 2.2908, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -5.338796138763428, |
|
"rewards/margins": 1.325805425643921, |
|
"rewards/rejected": -6.664601802825928, |
|
"semantic_entropy": 0.9806681871414185, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 69.72669762595787, |
|
"learning_rate": 6.005888777540319e-07, |
|
"logits/chosen": -1.1951611042022705, |
|
"logits/rejected": -1.1498881578445435, |
|
"logps/chosen": -0.5304981470108032, |
|
"logps/rejected": -0.6832343935966492, |
|
"loss": 2.335, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.304981708526611, |
|
"rewards/margins": 1.5273630619049072, |
|
"rewards/rejected": -6.832344055175781, |
|
"semantic_entropy": 1.0007470846176147, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5027322404371585, |
|
"grad_norm": 75.21829123168051, |
|
"learning_rate": 5.818012321143773e-07, |
|
"logits/chosen": -1.0969598293304443, |
|
"logits/rejected": -1.0912028551101685, |
|
"logps/chosen": -0.5201154351234436, |
|
"logps/rejected": -0.7016697525978088, |
|
"loss": 2.2528, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.2011542320251465, |
|
"rewards/margins": 1.8155431747436523, |
|
"rewards/rejected": -7.016697883605957, |
|
"semantic_entropy": 0.9929972887039185, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5136612021857924, |
|
"grad_norm": 83.71823549926938, |
|
"learning_rate": 5.628941153118388e-07, |
|
"logits/chosen": -1.1018104553222656, |
|
"logits/rejected": -1.0575555562973022, |
|
"logps/chosen": -0.5212110280990601, |
|
"logps/rejected": -0.664139986038208, |
|
"loss": 2.2375, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.21211051940918, |
|
"rewards/margins": 1.4292891025543213, |
|
"rewards/rejected": -6.641399383544922, |
|
"semantic_entropy": 0.9852234125137329, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5245901639344263, |
|
"grad_norm": 69.35272919599275, |
|
"learning_rate": 5.438951412976098e-07, |
|
"logits/chosen": -1.1364176273345947, |
|
"logits/rejected": -1.141788125038147, |
|
"logps/chosen": -0.49681615829467773, |
|
"logps/rejected": -0.6832265853881836, |
|
"loss": 2.0805, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.9681620597839355, |
|
"rewards/margins": 1.8641045093536377, |
|
"rewards/rejected": -6.832266330718994, |
|
"semantic_entropy": 1.0116462707519531, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5355191256830601, |
|
"grad_norm": 71.19254935234711, |
|
"learning_rate": 5.248320581808619e-07, |
|
"logits/chosen": -1.0613957643508911, |
|
"logits/rejected": -1.0091025829315186, |
|
"logps/chosen": -0.5141640901565552, |
|
"logps/rejected": -0.6968377828598022, |
|
"loss": 2.1843, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.141640663146973, |
|
"rewards/margins": 1.8267381191253662, |
|
"rewards/rejected": -6.96837854385376, |
|
"semantic_entropy": 0.9718330502510071, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 70.83927823916859, |
|
"learning_rate": 5.057327077024744e-07, |
|
"logits/chosen": -1.146533727645874, |
|
"logits/rejected": -1.1047497987747192, |
|
"logps/chosen": -0.5028788447380066, |
|
"logps/rejected": -0.6331702470779419, |
|
"loss": 2.2718, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -5.028789043426514, |
|
"rewards/margins": 1.3029136657714844, |
|
"rewards/rejected": -6.331702709197998, |
|
"semantic_entropy": 0.9849420785903931, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5573770491803278, |
|
"grad_norm": 78.29453714448445, |
|
"learning_rate": 4.866249845720132e-07, |
|
"logits/chosen": -1.1301579475402832, |
|
"logits/rejected": -1.091973900794983, |
|
"logps/chosen": -0.555388331413269, |
|
"logps/rejected": -0.7187477946281433, |
|
"loss": 2.1692, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.553883075714111, |
|
"rewards/margins": 1.633594274520874, |
|
"rewards/rejected": -7.187478065490723, |
|
"semantic_entropy": 0.9999436140060425, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5683060109289617, |
|
"grad_norm": 74.66530559540921, |
|
"learning_rate": 4.675367957273505e-07, |
|
"logits/chosen": -1.096861720085144, |
|
"logits/rejected": -1.0846450328826904, |
|
"logps/chosen": -0.5131552815437317, |
|
"logps/rejected": -0.671288251876831, |
|
"loss": 2.1911, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.131552219390869, |
|
"rewards/margins": 1.5813300609588623, |
|
"rewards/rejected": -6.712882041931152, |
|
"semantic_entropy": 0.9927156567573547, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5792349726775956, |
|
"grad_norm": 122.60471760919329, |
|
"learning_rate": 4.4849601957642285e-07, |
|
"logits/chosen": -1.124089002609253, |
|
"logits/rejected": -1.0828189849853516, |
|
"logps/chosen": -0.5186060070991516, |
|
"logps/rejected": -0.6825847625732422, |
|
"loss": 2.1964, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.186059474945068, |
|
"rewards/margins": 1.6397874355316162, |
|
"rewards/rejected": -6.8258466720581055, |
|
"semantic_entropy": 0.9880490303039551, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5901639344262295, |
|
"grad_norm": 86.34568213442027, |
|
"learning_rate": 4.295304652806592e-07, |
|
"logits/chosen": -1.1392979621887207, |
|
"logits/rejected": -1.1078673601150513, |
|
"logps/chosen": -0.5172940492630005, |
|
"logps/rejected": -0.699386477470398, |
|
"loss": 2.0791, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.172940254211426, |
|
"rewards/margins": 1.8209247589111328, |
|
"rewards/rejected": -6.993865013122559, |
|
"semantic_entropy": 0.9866276979446411, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6010928961748634, |
|
"grad_norm": 58.20062582439532, |
|
"learning_rate": 4.106678321395433e-07, |
|
"logits/chosen": -1.1032135486602783, |
|
"logits/rejected": -1.0302824974060059, |
|
"logps/chosen": -0.5297619104385376, |
|
"logps/rejected": -0.627161979675293, |
|
"loss": 2.1916, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -5.2976179122924805, |
|
"rewards/margins": 0.9740018844604492, |
|
"rewards/rejected": -6.271620750427246, |
|
"semantic_entropy": 0.9879854917526245, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6120218579234973, |
|
"grad_norm": 84.89751844734043, |
|
"learning_rate": 3.9193566913562915e-07, |
|
"logits/chosen": -1.0617036819458008, |
|
"logits/rejected": -1.0624239444732666, |
|
"logps/chosen": -0.5223734378814697, |
|
"logps/rejected": -0.7275804281234741, |
|
"loss": 2.1763, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -5.223733901977539, |
|
"rewards/margins": 2.0520694255828857, |
|
"rewards/rejected": -7.275804042816162, |
|
"semantic_entropy": 0.986528754234314, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6229508196721312, |
|
"grad_norm": 72.19485427611927, |
|
"learning_rate": 3.7336133469909623e-07, |
|
"logits/chosen": -1.1958709955215454, |
|
"logits/rejected": -1.1594369411468506, |
|
"logps/chosen": -0.5087668895721436, |
|
"logps/rejected": -0.702479898929596, |
|
"loss": 2.1358, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.0876688957214355, |
|
"rewards/margins": 1.9371296167373657, |
|
"rewards/rejected": -7.024799346923828, |
|
"semantic_entropy": 0.9978361129760742, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6338797814207651, |
|
"grad_norm": 81.98995443073827, |
|
"learning_rate": 3.549719567506076e-07, |
|
"logits/chosen": -1.1317315101623535, |
|
"logits/rejected": -1.0870287418365479, |
|
"logps/chosen": -0.5346897840499878, |
|
"logps/rejected": -0.6969183087348938, |
|
"loss": 2.1523, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.346898078918457, |
|
"rewards/margins": 1.62228524684906, |
|
"rewards/rejected": -6.969183444976807, |
|
"semantic_entropy": 1.0123668909072876, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.644808743169399, |
|
"grad_norm": 67.90034025619751, |
|
"learning_rate": 3.3679439308082774e-07, |
|
"logits/chosen": -1.115994930267334, |
|
"logits/rejected": -1.1152690649032593, |
|
"logps/chosen": -0.5274439454078674, |
|
"logps/rejected": -0.7239035367965698, |
|
"loss": 1.9278, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.274438381195068, |
|
"rewards/margins": 1.9645967483520508, |
|
"rewards/rejected": -7.239035606384277, |
|
"semantic_entropy": 1.0061827898025513, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 70.85439265034472, |
|
"learning_rate": 3.1885519212446716e-07, |
|
"logits/chosen": -1.144639253616333, |
|
"logits/rejected": -1.1228580474853516, |
|
"logps/chosen": -0.542576253414154, |
|
"logps/rejected": -0.7291213274002075, |
|
"loss": 2.0159, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.42576265335083, |
|
"rewards/margins": 1.865450143814087, |
|
"rewards/rejected": -7.291213035583496, |
|
"semantic_entropy": 0.9855409860610962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 76.04305212768787, |
|
"learning_rate": 3.0118055418614295e-07, |
|
"logits/chosen": -1.1450592279434204, |
|
"logits/rejected": -1.0869606733322144, |
|
"logps/chosen": -0.5319762229919434, |
|
"logps/rejected": -0.7148723006248474, |
|
"loss": 2.1436, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.319762706756592, |
|
"rewards/margins": 1.8289600610733032, |
|
"rewards/rejected": -7.1487226486206055, |
|
"semantic_entropy": 1.007246732711792, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6775956284153005, |
|
"grad_norm": 108.42660481521786, |
|
"learning_rate": 2.83796293174686e-07, |
|
"logits/chosen": -1.0885827541351318, |
|
"logits/rejected": -1.092543363571167, |
|
"logps/chosen": -0.5401273369789124, |
|
"logps/rejected": -0.748576283454895, |
|
"loss": 2.2592, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.401273250579834, |
|
"rewards/margins": 2.084489107131958, |
|
"rewards/rejected": -7.485762119293213, |
|
"semantic_entropy": 0.9948571920394897, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6885245901639344, |
|
"grad_norm": 70.86094179855972, |
|
"learning_rate": 2.6672779890178046e-07, |
|
"logits/chosen": -1.1491663455963135, |
|
"logits/rejected": -1.1490873098373413, |
|
"logps/chosen": -0.5631974935531616, |
|
"logps/rejected": -0.6903260946273804, |
|
"loss": 2.1308, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -5.631974697113037, |
|
"rewards/margins": 1.2712849378585815, |
|
"rewards/rejected": -6.903260231018066, |
|
"semantic_entropy": 0.9926439523696899, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6994535519125683, |
|
"grad_norm": 57.13767602235213, |
|
"learning_rate": 2.500000000000001e-07, |
|
"logits/chosen": -1.2022249698638916, |
|
"logits/rejected": -1.1517468690872192, |
|
"logps/chosen": -0.5420448184013367, |
|
"logps/rejected": -0.747357964515686, |
|
"loss": 2.0999, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.42044734954834, |
|
"rewards/margins": 2.053131580352783, |
|
"rewards/rejected": -7.473579406738281, |
|
"semantic_entropy": 1.00899338722229, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7103825136612022, |
|
"grad_norm": 82.09122654285451, |
|
"learning_rate": 2.3363732751439923e-07, |
|
"logits/chosen": -1.1618945598602295, |
|
"logits/rejected": -1.143754243850708, |
|
"logps/chosen": -0.5291402339935303, |
|
"logps/rejected": -0.7228410243988037, |
|
"loss": 2.0999, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.291402339935303, |
|
"rewards/margins": 1.9370079040527344, |
|
"rewards/rejected": -7.228410243988037, |
|
"semantic_entropy": 1.0088245868682861, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7213114754098361, |
|
"grad_norm": 62.18032792736575, |
|
"learning_rate": 2.1766367922083283e-07, |
|
"logits/chosen": -1.112157940864563, |
|
"logits/rejected": -1.0798307657241821, |
|
"logps/chosen": -0.4986172318458557, |
|
"logps/rejected": -0.7466678023338318, |
|
"loss": 2.0623, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -4.986172676086426, |
|
"rewards/margins": 2.4805047512054443, |
|
"rewards/rejected": -7.466677188873291, |
|
"semantic_entropy": 0.98463374376297, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.73224043715847, |
|
"grad_norm": 77.93323322539872, |
|
"learning_rate": 2.021023847231202e-07, |
|
"logits/chosen": -1.1002051830291748, |
|
"logits/rejected": -1.0612647533416748, |
|
"logps/chosen": -0.5647180080413818, |
|
"logps/rejected": -0.758574366569519, |
|
"loss": 2.0578, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.647180557250977, |
|
"rewards/margins": 1.938563346862793, |
|
"rewards/rejected": -7.5857439041137695, |
|
"semantic_entropy": 0.9851423501968384, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7431693989071039, |
|
"grad_norm": 86.17026425537334, |
|
"learning_rate": 1.869761713800254e-07, |
|
"logits/chosen": -1.1088799238204956, |
|
"logits/rejected": -1.064263939857483, |
|
"logps/chosen": -0.5516521334648132, |
|
"logps/rejected": -0.7183089256286621, |
|
"loss": 2.1191, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -5.516521453857422, |
|
"rewards/margins": 1.6665668487548828, |
|
"rewards/rejected": -7.183088779449463, |
|
"semantic_entropy": 0.9953676462173462, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7540983606557377, |
|
"grad_norm": 80.58633018790611, |
|
"learning_rate": 1.7230713111182164e-07, |
|
"logits/chosen": -1.156589150428772, |
|
"logits/rejected": -1.1543285846710205, |
|
"logps/chosen": -0.5463498830795288, |
|
"logps/rejected": -0.7534428238868713, |
|
"loss": 2.1553, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.463499546051025, |
|
"rewards/margins": 2.0709292888641357, |
|
"rewards/rejected": -7.534428596496582, |
|
"semantic_entropy": 0.9916456341743469, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7650273224043715, |
|
"grad_norm": 89.6142113445473, |
|
"learning_rate": 1.5811668813491696e-07, |
|
"logits/chosen": -1.1436890363693237, |
|
"logits/rejected": -1.124874234199524, |
|
"logps/chosen": -0.5143482685089111, |
|
"logps/rejected": -0.6786841154098511, |
|
"loss": 2.0898, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.143482685089111, |
|
"rewards/margins": 1.643358588218689, |
|
"rewards/rejected": -6.786840915679932, |
|
"semantic_entropy": 0.9892457127571106, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7759562841530054, |
|
"grad_norm": 66.31246023221092, |
|
"learning_rate": 1.4442556767166369e-07, |
|
"logits/chosen": -1.1231715679168701, |
|
"logits/rejected": -1.0935585498809814, |
|
"logps/chosen": -0.5266932845115662, |
|
"logps/rejected": -0.6977185010910034, |
|
"loss": 2.0776, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.266932487487793, |
|
"rewards/margins": 1.7102525234222412, |
|
"rewards/rejected": -6.9771857261657715, |
|
"semantic_entropy": 1.0020959377288818, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 83.110534751007, |
|
"learning_rate": 1.312537656810549e-07, |
|
"logits/chosen": -1.0739078521728516, |
|
"logits/rejected": -1.0743262767791748, |
|
"logps/chosen": -0.5362976789474487, |
|
"logps/rejected": -0.714411735534668, |
|
"loss": 2.1365, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -5.362977504730225, |
|
"rewards/margins": 1.7811400890350342, |
|
"rewards/rejected": -7.144117832183838, |
|
"semantic_entropy": 0.9827717542648315, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7978142076502732, |
|
"grad_norm": 141.91877356203636, |
|
"learning_rate": 1.1862051965451214e-07, |
|
"logits/chosen": -1.1579176187515259, |
|
"logits/rejected": -1.1566094160079956, |
|
"logps/chosen": -0.5423863530158997, |
|
"logps/rejected": -0.7343495488166809, |
|
"loss": 2.054, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.423863410949707, |
|
"rewards/margins": 1.9196319580078125, |
|
"rewards/rejected": -7.3434953689575195, |
|
"semantic_entropy": 1.011482834815979, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8087431693989071, |
|
"grad_norm": 78.59703524378126, |
|
"learning_rate": 1.0654428051942138e-07, |
|
"logits/chosen": -1.165038824081421, |
|
"logits/rejected": -1.1290335655212402, |
|
"logps/chosen": -0.5577388405799866, |
|
"logps/rejected": -0.7825115919113159, |
|
"loss": 2.1829, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -5.577388286590576, |
|
"rewards/margins": 2.247727632522583, |
|
"rewards/rejected": -7.825116157531738, |
|
"semantic_entropy": 1.0018393993377686, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 63.6353735816688, |
|
"learning_rate": 9.504268569144763e-08, |
|
"logits/chosen": -1.2013657093048096, |
|
"logits/rejected": -1.1338837146759033, |
|
"logps/chosen": -0.5316141843795776, |
|
"logps/rejected": -0.7204681038856506, |
|
"loss": 2.0911, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.316141605377197, |
|
"rewards/margins": 1.8885393142700195, |
|
"rewards/rejected": -7.204681396484375, |
|
"semantic_entropy": 1.0054622888565063, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8306010928961749, |
|
"grad_norm": 64.71607446988344, |
|
"learning_rate": 8.413253331499049e-08, |
|
"logits/chosen": -1.0807088613510132, |
|
"logits/rejected": -1.102399230003357, |
|
"logps/chosen": -0.549530029296875, |
|
"logps/rejected": -0.7361005544662476, |
|
"loss": 2.0374, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.49530029296875, |
|
"rewards/margins": 1.8657052516937256, |
|
"rewards/rejected": -7.361004829406738, |
|
"semantic_entropy": 1.0044524669647217, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8415300546448088, |
|
"grad_norm": 67.03443291040514, |
|
"learning_rate": 7.382975772939865e-08, |
|
"logits/chosen": -1.1790930032730103, |
|
"logits/rejected": -1.1615774631500244, |
|
"logps/chosen": -0.590388834476471, |
|
"logps/rejected": -0.7754439115524292, |
|
"loss": 2.1706, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.903887748718262, |
|
"rewards/margins": 1.850551962852478, |
|
"rewards/rejected": -7.754439353942871, |
|
"semantic_entropy": 1.0115418434143066, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8524590163934426, |
|
"grad_norm": 103.88519596336293, |
|
"learning_rate": 6.414940619677734e-08, |
|
"logits/chosen": -1.166526436805725, |
|
"logits/rejected": -1.1425046920776367, |
|
"logps/chosen": -0.5350316762924194, |
|
"logps/rejected": -0.7514439821243286, |
|
"loss": 2.1188, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -5.350315570831299, |
|
"rewards/margins": 2.164124011993408, |
|
"rewards/rejected": -7.514439582824707, |
|
"semantic_entropy": 1.0120559930801392, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8633879781420765, |
|
"grad_norm": 71.31872002268815, |
|
"learning_rate": 5.5105616925376296e-08, |
|
"logits/chosen": -1.1460245847702026, |
|
"logits/rejected": -1.1267726421356201, |
|
"logps/chosen": -0.5442631244659424, |
|
"logps/rejected": -0.6925864815711975, |
|
"loss": 2.0494, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -5.442631721496582, |
|
"rewards/margins": 1.4832336902618408, |
|
"rewards/rejected": -6.925864219665527, |
|
"semantic_entropy": 1.0200846195220947, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 75.02341041681119, |
|
"learning_rate": 4.6711598420656976e-08, |
|
"logits/chosen": -1.0774163007736206, |
|
"logits/rejected": -1.0491969585418701, |
|
"logps/chosen": -0.5711244344711304, |
|
"logps/rejected": -0.7721945643424988, |
|
"loss": 2.0424, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.711243629455566, |
|
"rewards/margins": 2.010702133178711, |
|
"rewards/rejected": -7.721946716308594, |
|
"semantic_entropy": 0.9843025207519531, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"eval_logits/chosen": -1.373081088066101, |
|
"eval_logits/rejected": -1.332649827003479, |
|
"eval_logps/chosen": -0.5314387083053589, |
|
"eval_logps/rejected": -0.7141891121864319, |
|
"eval_loss": 2.0935795307159424, |
|
"eval_rewards/accuracies": 0.7771084308624268, |
|
"eval_rewards/chosen": -5.314386367797852, |
|
"eval_rewards/margins": 1.8275047540664673, |
|
"eval_rewards/rejected": -7.1418914794921875, |
|
"eval_runtime": 37.9946, |
|
"eval_samples_per_second": 34.689, |
|
"eval_semantic_entropy": 0.9976296424865723, |
|
"eval_steps_per_second": 2.185, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8852459016393442, |
|
"grad_norm": 68.55432141696805, |
|
"learning_rate": 3.897961019419516e-08, |
|
"logits/chosen": -1.1141546964645386, |
|
"logits/rejected": -1.046690583229065, |
|
"logps/chosen": -0.5127943754196167, |
|
"logps/rejected": -0.656581699848175, |
|
"loss": 1.9895, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -5.127943992614746, |
|
"rewards/margins": 1.4378730058670044, |
|
"rewards/rejected": -6.565816402435303, |
|
"semantic_entropy": 1.010331630706787, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8961748633879781, |
|
"grad_norm": 78.80129315841073, |
|
"learning_rate": 3.192094485859526e-08, |
|
"logits/chosen": -1.1211316585540771, |
|
"logits/rejected": -1.1407296657562256, |
|
"logps/chosen": -0.5510164499282837, |
|
"logps/rejected": -0.7787143588066101, |
|
"loss": 2.0771, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.5101637840271, |
|
"rewards/margins": 2.2769789695739746, |
|
"rewards/rejected": -7.787143707275391, |
|
"semantic_entropy": 0.9897591471672058, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.907103825136612, |
|
"grad_norm": 63.07603955603986, |
|
"learning_rate": 2.5545911634565265e-08, |
|
"logits/chosen": -1.1598929166793823, |
|
"logits/rejected": -1.1571664810180664, |
|
"logps/chosen": -0.5616727471351624, |
|
"logps/rejected": -0.7835390567779541, |
|
"loss": 2.1028, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.616726875305176, |
|
"rewards/margins": 2.2186641693115234, |
|
"rewards/rejected": -7.835390567779541, |
|
"semantic_entropy": 0.9976503252983093, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9180327868852459, |
|
"grad_norm": 89.74277499235377, |
|
"learning_rate": 1.9863821294241522e-08, |
|
"logits/chosen": -1.1581684350967407, |
|
"logits/rejected": -1.1270772218704224, |
|
"logps/chosen": -0.5167144536972046, |
|
"logps/rejected": -0.7354345321655273, |
|
"loss": 1.9853, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -5.167144298553467, |
|
"rewards/margins": 2.1872007846832275, |
|
"rewards/rejected": -7.354344844818115, |
|
"semantic_entropy": 1.002636194229126, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9289617486338798, |
|
"grad_norm": 71.82508706026904, |
|
"learning_rate": 1.4882972562753615e-08, |
|
"logits/chosen": -1.1459519863128662, |
|
"logits/rejected": -1.1357475519180298, |
|
"logps/chosen": -0.5768141746520996, |
|
"logps/rejected": -0.7887662053108215, |
|
"loss": 2.1228, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.768141746520996, |
|
"rewards/margins": 2.1195199489593506, |
|
"rewards/rejected": -7.887660980224609, |
|
"semantic_entropy": 0.9999387860298157, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9398907103825137, |
|
"grad_norm": 91.24129023810345, |
|
"learning_rate": 1.0610639997888915e-08, |
|
"logits/chosen": -1.0857610702514648, |
|
"logits/rejected": -1.0863049030303955, |
|
"logps/chosen": -0.520858883857727, |
|
"logps/rejected": -0.7221606969833374, |
|
"loss": 1.9229, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.208588600158691, |
|
"rewards/margins": 2.0130181312561035, |
|
"rewards/rejected": -7.221606254577637, |
|
"semantic_entropy": 1.0139762163162231, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9508196721311475, |
|
"grad_norm": 88.53510645254667, |
|
"learning_rate": 7.053063365559997e-09, |
|
"logits/chosen": -1.147918939590454, |
|
"logits/rejected": -1.1737738847732544, |
|
"logps/chosen": -0.5408393740653992, |
|
"logps/rejected": -0.7648274898529053, |
|
"loss": 2.0597, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.408394813537598, |
|
"rewards/margins": 2.2398805618286133, |
|
"rewards/rejected": -7.6482744216918945, |
|
"semantic_entropy": 1.0074278116226196, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9617486338797814, |
|
"grad_norm": 77.47275941246323, |
|
"learning_rate": 4.215438526591064e-09, |
|
"logits/chosen": -1.08914053440094, |
|
"logits/rejected": -1.0488555431365967, |
|
"logps/chosen": -0.5592411160469055, |
|
"logps/rejected": -0.7156537175178528, |
|
"loss": 2.0346, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -5.592411518096924, |
|
"rewards/margins": 1.5641257762908936, |
|
"rewards/rejected": -7.156537055969238, |
|
"semantic_entropy": 0.9892482757568359, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9726775956284153, |
|
"grad_norm": 85.37542604306078, |
|
"learning_rate": 2.1019098481337426e-09, |
|
"logits/chosen": -1.1617168188095093, |
|
"logits/rejected": -1.1342122554779053, |
|
"logps/chosen": -0.5354763865470886, |
|
"logps/rejected": -0.7130267024040222, |
|
"loss": 2.0073, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.354763984680176, |
|
"rewards/margins": 1.7755035161972046, |
|
"rewards/rejected": -7.130267143249512, |
|
"semantic_entropy": 1.0182139873504639, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 92.55545564226749, |
|
"learning_rate": 7.155641507955445e-10, |
|
"logits/chosen": -1.0736119747161865, |
|
"logits/rejected": -1.0667097568511963, |
|
"logps/chosen": -0.5805756449699402, |
|
"logps/rejected": -0.7572126388549805, |
|
"loss": 2.146, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -5.805756568908691, |
|
"rewards/margins": 1.7663694620132446, |
|
"rewards/rejected": -7.5721259117126465, |
|
"semantic_entropy": 0.9835384488105774, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.994535519125683, |
|
"grad_norm": 79.92510322372067, |
|
"learning_rate": 5.842620032053824e-11, |
|
"logits/chosen": -1.0938892364501953, |
|
"logits/rejected": -1.0882636308670044, |
|
"logps/chosen": -0.5764094591140747, |
|
"logps/rejected": -0.7290435433387756, |
|
"loss": 2.187, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -5.764094829559326, |
|
"rewards/margins": 1.5263407230377197, |
|
"rewards/rejected": -7.290434837341309, |
|
"semantic_entropy": 1.0177193880081177, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9989071038251366, |
|
"step": 457, |
|
"total_flos": 0.0, |
|
"train_loss": 2.4655840506438875, |
|
"train_runtime": 5955.1851, |
|
"train_samples_per_second": 9.833, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 457, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|