|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984591679506933, |
|
"eval_steps": 100, |
|
"global_step": 324, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6.101982836222645, |
|
"learning_rate": 1.5151515151515152e-07, |
|
"logits/chosen": -0.362821102142334, |
|
"logits/rejected": -0.6466645002365112, |
|
"logps/chosen": -1025.3448486328125, |
|
"logps/rejected": -1304.718017578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.0068256381560765, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": -0.6083016991615295, |
|
"logits/rejected": -0.6111394166946411, |
|
"logps/chosen": -990.301025390625, |
|
"logps/rejected": -1385.5863037109375, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": -0.00793336983770132, |
|
"rewards/margins": 0.0015673839952796698, |
|
"rewards/rejected": -0.009500754997134209, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.64385894959999, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": -0.39747971296310425, |
|
"logits/rejected": -0.5266290903091431, |
|
"logps/chosen": -1019.9202270507812, |
|
"logps/rejected": -1275.5029296875, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17681096494197845, |
|
"rewards/margins": 0.19123901426792145, |
|
"rewards/rejected": -0.3680500090122223, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.146938095650329, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": -0.3289431631565094, |
|
"logits/rejected": -0.3537369966506958, |
|
"logps/chosen": -914.8097534179688, |
|
"logps/rejected": -1425.679443359375, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.23936215043067932, |
|
"rewards/margins": 0.8084670305252075, |
|
"rewards/rejected": -1.0478291511535645, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.382612181193535, |
|
"learning_rate": 4.9928646847826494e-06, |
|
"logits/chosen": -0.27268069982528687, |
|
"logits/rejected": -0.3392156958580017, |
|
"logps/chosen": -1024.892578125, |
|
"logps/rejected": -1513.9617919921875, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.20300360023975372, |
|
"rewards/margins": 1.8095756769180298, |
|
"rewards/rejected": -2.0125787258148193, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.485452143975147, |
|
"learning_rate": 4.958014217656855e-06, |
|
"logits/chosen": -0.23044054210186005, |
|
"logits/rejected": -0.25221356749534607, |
|
"logps/chosen": -967.2037353515625, |
|
"logps/rejected": -1537.017333984375, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.12485456466674805, |
|
"rewards/margins": 2.005478620529175, |
|
"rewards/rejected": -2.130333185195923, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.8070457431642892, |
|
"learning_rate": 4.894543310469968e-06, |
|
"logits/chosen": -0.19517004489898682, |
|
"logits/rejected": -0.22597365081310272, |
|
"logps/chosen": -916.4852294921875, |
|
"logps/rejected": -1595.1839599609375, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.2139274626970291, |
|
"rewards/margins": 2.360320568084717, |
|
"rewards/rejected": -2.5742483139038086, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.6141979805383726, |
|
"learning_rate": 4.803191000971128e-06, |
|
"logits/chosen": -0.17929306626319885, |
|
"logits/rejected": -0.18331752717494965, |
|
"logps/chosen": -965.8648681640625, |
|
"logps/rejected": -1572.9818115234375, |
|
"loss": 0.3243, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.4429554343223572, |
|
"rewards/margins": 2.501615047454834, |
|
"rewards/rejected": -2.944570541381836, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.9698745789179712, |
|
"learning_rate": 4.68502097027319e-06, |
|
"logits/chosen": -0.18549516797065735, |
|
"logits/rejected": -0.30454546213150024, |
|
"logps/chosen": -881.955078125, |
|
"logps/rejected": -1555.6883544921875, |
|
"loss": 0.284, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10575978457927704, |
|
"rewards/margins": 2.606116771697998, |
|
"rewards/rejected": -2.711876392364502, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.915459926093003, |
|
"learning_rate": 4.541409157643027e-06, |
|
"logits/chosen": -0.2555353045463562, |
|
"logits/rejected": -0.3517759442329407, |
|
"logps/chosen": -1006.6803588867188, |
|
"logps/rejected": -1699.8876953125, |
|
"loss": 0.2626, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.24506595730781555, |
|
"rewards/margins": 3.7898712158203125, |
|
"rewards/rejected": -4.034937381744385, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 5.579746591418778, |
|
"learning_rate": 4.374027739443953e-06, |
|
"logits/chosen": -0.2530584931373596, |
|
"logits/rejected": -0.39778950810432434, |
|
"logps/chosen": -1006.4603271484375, |
|
"logps/rejected": -1830.1383056640625, |
|
"loss": 0.249, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.23174142837524414, |
|
"rewards/margins": 4.588972091674805, |
|
"rewards/rejected": -4.820713996887207, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -0.2358812391757965, |
|
"eval_logits/rejected": -0.266615092754364, |
|
"eval_logps/chosen": -535.3107299804688, |
|
"eval_logps/rejected": -1504.041259765625, |
|
"eval_loss": 0.3604305684566498, |
|
"eval_rewards/accuracies": 0.8942307829856873, |
|
"eval_rewards/chosen": -0.7724042534828186, |
|
"eval_rewards/margins": 7.1227898597717285, |
|
"eval_rewards/rejected": -7.8951945304870605, |
|
"eval_runtime": 41.33, |
|
"eval_samples_per_second": 9.581, |
|
"eval_steps_per_second": 0.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 13.962571315802977, |
|
"learning_rate": 4.184825658775027e-06, |
|
"logits/chosen": -0.35490721464157104, |
|
"logits/rejected": -0.3757438659667969, |
|
"logps/chosen": -973.4483642578125, |
|
"logps/rejected": -1818.024658203125, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.5153377056121826, |
|
"rewards/margins": 3.8287353515625, |
|
"rewards/rejected": -4.344073295593262, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.793575374162859, |
|
"learning_rate": 3.976005932514807e-06, |
|
"logits/chosen": -0.3033773601055145, |
|
"logits/rejected": -0.33670344948768616, |
|
"logps/chosen": -1026.07373046875, |
|
"logps/rejected": -1623.307861328125, |
|
"loss": 0.1906, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.40073472261428833, |
|
"rewards/margins": 3.711804151535034, |
|
"rewards/rejected": -4.112539768218994, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.3455045057377073, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -0.22906668484210968, |
|
"logits/rejected": -0.30051860213279724, |
|
"logps/chosen": -947.1951904296875, |
|
"logps/rejected": -1786.0419921875, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5512313842773438, |
|
"rewards/margins": 3.6224727630615234, |
|
"rewards/rejected": -4.173704147338867, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 5.442047168951181, |
|
"learning_rate": 3.5094394120160047e-06, |
|
"logits/chosen": -0.2941485047340393, |
|
"logits/rejected": -0.324366956949234, |
|
"logps/chosen": -1009.9542236328125, |
|
"logps/rejected": -1759.7135009765625, |
|
"loss": 0.2106, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.6786917448043823, |
|
"rewards/margins": 3.91167950630188, |
|
"rewards/rejected": -4.590371608734131, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.215335437797857, |
|
"learning_rate": 3.257125189744877e-06, |
|
"logits/chosen": -0.32115620374679565, |
|
"logits/rejected": -0.36864355206489563, |
|
"logps/chosen": -954.0335693359375, |
|
"logps/rejected": -1671.050537109375, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.24637527763843536, |
|
"rewards/margins": 3.504626750946045, |
|
"rewards/rejected": -3.751002073287964, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.836600464080731, |
|
"learning_rate": 2.9959952104467247e-06, |
|
"logits/chosen": -0.3462420105934143, |
|
"logits/rejected": -0.37651991844177246, |
|
"logps/chosen": -1160.772216796875, |
|
"logps/rejected": -1859.069091796875, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.4739566743373871, |
|
"rewards/margins": 4.305468559265137, |
|
"rewards/rejected": -4.779424667358398, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.01626911931463, |
|
"learning_rate": 2.729089999626637e-06, |
|
"logits/chosen": -0.323803573846817, |
|
"logits/rejected": -0.38482701778411865, |
|
"logps/chosen": -950.5234375, |
|
"logps/rejected": -1763.620849609375, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5791029930114746, |
|
"rewards/margins": 4.474118709564209, |
|
"rewards/rejected": -5.053222179412842, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.8279150301129055, |
|
"learning_rate": 2.4595173279937464e-06, |
|
"logits/chosen": -0.373486191034317, |
|
"logits/rejected": -0.42519837617874146, |
|
"logps/chosen": -935.4544677734375, |
|
"logps/rejected": -1869.3843994140625, |
|
"loss": 0.126, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6261727809906006, |
|
"rewards/margins": 5.2783613204956055, |
|
"rewards/rejected": -5.904534339904785, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 5.49355443422865, |
|
"learning_rate": 2.190416025435675e-06, |
|
"logits/chosen": -0.40133827924728394, |
|
"logits/rejected": -0.4126282334327698, |
|
"logps/chosen": -1012.4228515625, |
|
"logps/rejected": -1692.1015625, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.6535183191299438, |
|
"rewards/margins": 4.786801815032959, |
|
"rewards/rejected": -5.440320014953613, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.3031942791417244, |
|
"learning_rate": 1.9249194333484567e-06, |
|
"logits/chosen": -0.32231295108795166, |
|
"logits/rejected": -0.42156219482421875, |
|
"logps/chosen": -821.6759643554688, |
|
"logps/rejected": -1748.581787109375, |
|
"loss": 0.1374, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3118464946746826, |
|
"rewards/margins": 4.042534828186035, |
|
"rewards/rejected": -4.354381561279297, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -0.2821931540966034, |
|
"eval_logits/rejected": -0.1753174513578415, |
|
"eval_logps/chosen": -550.3823852539062, |
|
"eval_logps/rejected": -1521.086181640625, |
|
"eval_loss": 0.23887068033218384, |
|
"eval_rewards/accuracies": 0.9038461446762085, |
|
"eval_rewards/chosen": -0.9231204390525818, |
|
"eval_rewards/margins": 7.142522811889648, |
|
"eval_rewards/rejected": -8.065644264221191, |
|
"eval_runtime": 41.3932, |
|
"eval_samples_per_second": 9.567, |
|
"eval_steps_per_second": 0.314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 5.527965146477785, |
|
"learning_rate": 1.6661189208729492e-06, |
|
"logits/chosen": -0.38927820324897766, |
|
"logits/rejected": -0.5214006304740906, |
|
"logps/chosen": -1015.1365356445312, |
|
"logps/rejected": -1966.6363525390625, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6492040157318115, |
|
"rewards/margins": 5.716192722320557, |
|
"rewards/rejected": -6.365396022796631, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.602893800956427, |
|
"learning_rate": 1.4170278898446176e-06, |
|
"logits/chosen": -0.4857853055000305, |
|
"logits/rejected": -0.529462993144989, |
|
"logps/chosen": -1030.688232421875, |
|
"logps/rejected": -1932.1002197265625, |
|
"loss": 0.1249, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7277423143386841, |
|
"rewards/margins": 5.351422309875488, |
|
"rewards/rejected": -6.079164028167725, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 4.7188705667925674, |
|
"learning_rate": 1.1805466875731277e-06, |
|
"logits/chosen": -0.49866923689842224, |
|
"logits/rejected": -0.6342719793319702, |
|
"logps/chosen": -1055.165771484375, |
|
"logps/rejected": -1956.5159912109375, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6889677047729492, |
|
"rewards/margins": 5.961886405944824, |
|
"rewards/rejected": -6.650854587554932, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.655545564406198, |
|
"learning_rate": 9.594288359976817e-07, |
|
"logits/chosen": -0.48687905073165894, |
|
"logits/rejected": -0.5934125185012817, |
|
"logps/chosen": -927.3426513671875, |
|
"logps/rejected": -1977.37890625, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.49007558822631836, |
|
"rewards/margins": 5.872694969177246, |
|
"rewards/rejected": -6.362771034240723, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.6147182029800553, |
|
"learning_rate": 7.56248970436493e-07, |
|
"logits/chosen": -0.4635826647281647, |
|
"logits/rejected": -0.5487635135650635, |
|
"logps/chosen": -992.0515747070312, |
|
"logps/rejected": -1830.902587890625, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.31789669394493103, |
|
"rewards/margins": 5.5414719581604, |
|
"rewards/rejected": -5.859368801116943, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.0194030888446655, |
|
"learning_rate": 5.733728612427772e-07, |
|
"logits/chosen": -0.48970723152160645, |
|
"logits/rejected": -0.5219728350639343, |
|
"logps/chosen": -932.720703125, |
|
"logps/rejected": -1843.7044677734375, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4680628776550293, |
|
"rewards/margins": 5.688261032104492, |
|
"rewards/rejected": -6.1563239097595215, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.4254663130723073, |
|
"learning_rate": 4.129298674268226e-07, |
|
"logits/chosen": -0.47387346625328064, |
|
"logits/rejected": -0.5744868516921997, |
|
"logps/chosen": -919.7806396484375, |
|
"logps/rejected": -2094.39404296875, |
|
"loss": 0.1271, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.5868527293205261, |
|
"rewards/margins": 6.565216064453125, |
|
"rewards/rejected": -7.152068138122559, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.915035180237974, |
|
"learning_rate": 2.7678814298657735e-07, |
|
"logits/chosen": -0.48424941301345825, |
|
"logits/rejected": -0.5429133176803589, |
|
"logps/chosen": -989.7317504882812, |
|
"logps/rejected": -2131.172607421875, |
|
"loss": 0.112, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.640870213508606, |
|
"rewards/margins": 6.7747802734375, |
|
"rewards/rejected": -7.415650844573975, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 4.035709496896224, |
|
"learning_rate": 1.6653288463741064e-07, |
|
"logits/chosen": -0.5001234412193298, |
|
"logits/rejected": -0.5271893739700317, |
|
"logps/chosen": -983.5861206054688, |
|
"logps/rejected": -2070.129150390625, |
|
"loss": 0.119, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6213029623031616, |
|
"rewards/margins": 7.315102577209473, |
|
"rewards/rejected": -7.936405181884766, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.63054147536607, |
|
"learning_rate": 8.344787421847216e-08, |
|
"logits/chosen": -0.4631820619106293, |
|
"logits/rejected": -0.5495749711990356, |
|
"logps/chosen": -926.2537231445312, |
|
"logps/rejected": -1872.173095703125, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.8281421661376953, |
|
"rewards/margins": 5.6122145652771, |
|
"rewards/rejected": -6.440356254577637, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -0.3568806052207947, |
|
"eval_logits/rejected": -0.21113936603069305, |
|
"eval_logps/chosen": -567.682861328125, |
|
"eval_logps/rejected": -2033.01416015625, |
|
"eval_loss": 0.24133986234664917, |
|
"eval_rewards/accuracies": 0.8942307829856873, |
|
"eval_rewards/chosen": -1.0961254835128784, |
|
"eval_rewards/margins": 12.088796615600586, |
|
"eval_rewards/rejected": -13.184922218322754, |
|
"eval_runtime": 41.398, |
|
"eval_samples_per_second": 9.566, |
|
"eval_steps_per_second": 0.314, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.390559743925079, |
|
"learning_rate": 2.850053069080344e-08, |
|
"logits/chosen": -0.4385458827018738, |
|
"logits/rejected": -0.5586596131324768, |
|
"logps/chosen": -981.732421875, |
|
"logps/rejected": -2004.222900390625, |
|
"loss": 0.1192, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7575939893722534, |
|
"rewards/margins": 6.195023536682129, |
|
"rewards/rejected": -6.952617645263672, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.4456834695369585, |
|
"learning_rate": 2.330645777598173e-09, |
|
"logits/chosen": -0.5341562032699585, |
|
"logits/rejected": -0.5692285299301147, |
|
"logps/chosen": -944.4517822265625, |
|
"logps/rejected": -1914.8863525390625, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5554883480072021, |
|
"rewards/margins": 6.5892229080200195, |
|
"rewards/rejected": -7.144711494445801, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 324, |
|
"total_flos": 0.0, |
|
"train_loss": 0.23018195102980107, |
|
"train_runtime": 4792.5755, |
|
"train_samples_per_second": 4.331, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 324, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|