|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 2942, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.694915254237288e-09, |
|
"logits/chosen": -3.241600751876831, |
|
"logits/rejected": -2.8775925636291504, |
|
"logps/chosen": -233.8565673828125, |
|
"logps/rejected": -768.6746215820312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6949152542372882e-08, |
|
"logits/chosen": -3.183110475540161, |
|
"logits/rejected": -3.2940425872802734, |
|
"logps/chosen": -479.7237548828125, |
|
"logps/rejected": -508.1022644042969, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": -0.0011810138821601868, |
|
"rewards/margins": -0.001814355025999248, |
|
"rewards/rejected": 0.0006333404453471303, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3898305084745764e-08, |
|
"logits/chosen": -3.196739912033081, |
|
"logits/rejected": -3.3269875049591064, |
|
"logps/chosen": -235.5388946533203, |
|
"logps/rejected": -412.62353515625, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04803984612226486, |
|
"rewards/margins": 0.09699604660272598, |
|
"rewards/rejected": -0.048956211656332016, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.0847457627118645e-08, |
|
"logits/chosen": -3.134932041168213, |
|
"logits/rejected": -3.2440593242645264, |
|
"logps/chosen": -389.2142639160156, |
|
"logps/rejected": -413.16461181640625, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0791018083691597, |
|
"rewards/margins": 0.25357091426849365, |
|
"rewards/rejected": -0.17446911334991455, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.779661016949153e-08, |
|
"logits/chosen": -3.1792006492614746, |
|
"logits/rejected": -3.3407816886901855, |
|
"logps/chosen": -251.45498657226562, |
|
"logps/rejected": -452.0169982910156, |
|
"loss": 0.4281, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3247135281562805, |
|
"rewards/margins": 0.7740644812583923, |
|
"rewards/rejected": -0.4493509829044342, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.47457627118644e-08, |
|
"logits/chosen": -3.2240090370178223, |
|
"logits/rejected": -3.144906520843506, |
|
"logps/chosen": -191.65354919433594, |
|
"logps/rejected": -543.9041748046875, |
|
"loss": 0.3076, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.7691439986228943, |
|
"rewards/margins": 1.466748595237732, |
|
"rewards/rejected": -0.6976046562194824, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0169491525423729e-07, |
|
"logits/chosen": -3.2373671531677246, |
|
"logits/rejected": -3.348659038543701, |
|
"logps/chosen": -271.8299560546875, |
|
"logps/rejected": -320.3260803222656, |
|
"loss": 0.2537, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.8796433210372925, |
|
"rewards/margins": 1.8711017370224, |
|
"rewards/rejected": -0.9914585947990417, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1864406779661017e-07, |
|
"logits/chosen": -3.1753342151641846, |
|
"logits/rejected": -3.235198974609375, |
|
"logps/chosen": -256.61114501953125, |
|
"logps/rejected": -491.931640625, |
|
"loss": 0.2074, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8632518649101257, |
|
"rewards/margins": 2.171854019165039, |
|
"rewards/rejected": -1.3086023330688477, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3559322033898305e-07, |
|
"logits/chosen": -3.1588549613952637, |
|
"logits/rejected": -3.2258479595184326, |
|
"logps/chosen": -386.23126220703125, |
|
"logps/rejected": -396.27880859375, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9385740160942078, |
|
"rewards/margins": 2.8416147232055664, |
|
"rewards/rejected": -1.9030405282974243, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5254237288135593e-07, |
|
"logits/chosen": -3.170820951461792, |
|
"logits/rejected": -3.3037006855010986, |
|
"logps/chosen": -204.9710235595703, |
|
"logps/rejected": -283.68133544921875, |
|
"loss": 0.1613, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.398224115371704, |
|
"rewards/margins": 3.5001823902130127, |
|
"rewards/rejected": -2.1019580364227295, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.694915254237288e-07, |
|
"logits/chosen": -3.1509833335876465, |
|
"logits/rejected": -3.2727856636047363, |
|
"logps/chosen": -182.78797912597656, |
|
"logps/rejected": -422.30426025390625, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3794167041778564, |
|
"rewards/margins": 4.1482086181640625, |
|
"rewards/rejected": -2.768791913986206, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -3.2044522762298584, |
|
"eval_logits/rejected": -3.2204079627990723, |
|
"eval_logps/chosen": -243.43142700195312, |
|
"eval_logps/rejected": -450.3221740722656, |
|
"eval_loss": 0.16541637480258942, |
|
"eval_rewards/accuracies": 0.9570707082748413, |
|
"eval_rewards/chosen": 1.2374151945114136, |
|
"eval_rewards/margins": 3.846338987350464, |
|
"eval_rewards/rejected": -2.6089231967926025, |
|
"eval_runtime": 452.6529, |
|
"eval_samples_per_second": 20.987, |
|
"eval_steps_per_second": 0.656, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8644067796610168e-07, |
|
"logits/chosen": -3.1778359413146973, |
|
"logits/rejected": -3.0348751544952393, |
|
"logps/chosen": -191.72665405273438, |
|
"logps/rejected": -667.9417724609375, |
|
"loss": 0.1325, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5171259641647339, |
|
"rewards/margins": 3.9871017932891846, |
|
"rewards/rejected": -2.4699759483337402, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0338983050847458e-07, |
|
"logits/chosen": -3.21209979057312, |
|
"logits/rejected": -3.245016574859619, |
|
"logps/chosen": -206.03567504882812, |
|
"logps/rejected": -441.95758056640625, |
|
"loss": 0.1188, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.7967685461044312, |
|
"rewards/margins": 4.877519607543945, |
|
"rewards/rejected": -3.0807507038116455, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2033898305084743e-07, |
|
"logits/chosen": -3.121410369873047, |
|
"logits/rejected": -3.2283928394317627, |
|
"logps/chosen": -255.36746215820312, |
|
"logps/rejected": -485.18017578125, |
|
"loss": 0.2426, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4470430612564087, |
|
"rewards/margins": 5.143087387084961, |
|
"rewards/rejected": -3.6960442066192627, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3728813559322033e-07, |
|
"logits/chosen": -3.1555418968200684, |
|
"logits/rejected": -3.1503801345825195, |
|
"logps/chosen": -167.48965454101562, |
|
"logps/rejected": -516.3311767578125, |
|
"loss": 0.1002, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.1309149265289307, |
|
"rewards/margins": 4.941633701324463, |
|
"rewards/rejected": -3.810718059539795, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.542372881355932e-07, |
|
"logits/chosen": -3.143369674682617, |
|
"logits/rejected": -3.172342538833618, |
|
"logps/chosen": -197.7274169921875, |
|
"logps/rejected": -469.4510803222656, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3236812353134155, |
|
"rewards/margins": 5.159438133239746, |
|
"rewards/rejected": -3.8357574939727783, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.711864406779661e-07, |
|
"logits/chosen": -3.113320827484131, |
|
"logits/rejected": -3.179581642150879, |
|
"logps/chosen": -190.3457794189453, |
|
"logps/rejected": -422.5477600097656, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2049860954284668, |
|
"rewards/margins": 5.337760925292969, |
|
"rewards/rejected": -4.132774353027344, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.88135593220339e-07, |
|
"logits/chosen": -3.1512656211853027, |
|
"logits/rejected": -3.2015151977539062, |
|
"logps/chosen": -291.2518615722656, |
|
"logps/rejected": -427.808837890625, |
|
"loss": 0.0989, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2022926807403564, |
|
"rewards/margins": 6.1538920402526855, |
|
"rewards/rejected": -4.951599597930908, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0508474576271186e-07, |
|
"logits/chosen": -3.0925116539001465, |
|
"logits/rejected": -3.1885197162628174, |
|
"logps/chosen": -316.8929138183594, |
|
"logps/rejected": -568.46435546875, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3121305704116821, |
|
"rewards/margins": 6.406019687652588, |
|
"rewards/rejected": -5.093889236450195, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.220338983050847e-07, |
|
"logits/chosen": -3.191131830215454, |
|
"logits/rejected": -3.2551910877227783, |
|
"logps/chosen": -200.42120361328125, |
|
"logps/rejected": -473.00506591796875, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.504188060760498, |
|
"rewards/margins": 6.774598598480225, |
|
"rewards/rejected": -5.270411491394043, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.389830508474576e-07, |
|
"logits/chosen": -3.1266510486602783, |
|
"logits/rejected": -3.2319884300231934, |
|
"logps/chosen": -254.8800811767578, |
|
"logps/rejected": -386.7709045410156, |
|
"loss": 0.1349, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.716153621673584, |
|
"rewards/margins": 6.651679992675781, |
|
"rewards/rejected": -5.935526371002197, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_logits/chosen": -3.2216391563415527, |
|
"eval_logits/rejected": -3.1898040771484375, |
|
"eval_logps/chosen": -246.39938354492188, |
|
"eval_logps/rejected": -487.68365478515625, |
|
"eval_loss": 0.09607043862342834, |
|
"eval_rewards/accuracies": 0.9755892157554626, |
|
"eval_rewards/chosen": 0.9406206011772156, |
|
"eval_rewards/margins": 7.285686492919922, |
|
"eval_rewards/rejected": -6.345065593719482, |
|
"eval_runtime": 451.6853, |
|
"eval_samples_per_second": 21.032, |
|
"eval_steps_per_second": 0.658, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.559322033898305e-07, |
|
"logits/chosen": -3.162513256072998, |
|
"logits/rejected": -3.0620617866516113, |
|
"logps/chosen": -185.5023651123047, |
|
"logps/rejected": -624.7681274414062, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2032901048660278, |
|
"rewards/margins": 7.2529778480529785, |
|
"rewards/rejected": -6.04968786239624, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7288135593220336e-07, |
|
"logits/chosen": -3.16162109375, |
|
"logits/rejected": -3.2060294151306152, |
|
"logps/chosen": -306.1747131347656, |
|
"logps/rejected": -416.8641662597656, |
|
"loss": 0.0542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0064268112182617, |
|
"rewards/margins": 7.897356986999512, |
|
"rewards/rejected": -6.890931129455566, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.898305084745763e-07, |
|
"logits/chosen": -3.1517322063446045, |
|
"logits/rejected": -3.153977394104004, |
|
"logps/chosen": -265.21539306640625, |
|
"logps/rejected": -504.4422912597656, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7571786642074585, |
|
"rewards/margins": 7.3733367919921875, |
|
"rewards/rejected": -6.616157531738281, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0677966101694916e-07, |
|
"logits/chosen": -3.0925240516662598, |
|
"logits/rejected": -3.102187395095825, |
|
"logps/chosen": -250.0133514404297, |
|
"logps/rejected": -633.9852294921875, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.48636823892593384, |
|
"rewards/margins": 9.285165786743164, |
|
"rewards/rejected": -8.798797607421875, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.23728813559322e-07, |
|
"logits/chosen": -3.176499843597412, |
|
"logits/rejected": -3.1582164764404297, |
|
"logps/chosen": -195.40415954589844, |
|
"logps/rejected": -477.47833251953125, |
|
"loss": 0.3177, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.0647075176239014, |
|
"rewards/margins": 8.443204879760742, |
|
"rewards/rejected": -7.378498077392578, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4067796610169486e-07, |
|
"logits/chosen": -3.1202080249786377, |
|
"logits/rejected": -3.175307273864746, |
|
"logps/chosen": -255.7227783203125, |
|
"logps/rejected": -521.5419311523438, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.017443299293518, |
|
"rewards/margins": 9.245269775390625, |
|
"rewards/rejected": -8.227825164794922, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.576271186440678e-07, |
|
"logits/chosen": -3.1279916763305664, |
|
"logits/rejected": -3.19228196144104, |
|
"logps/chosen": -238.5277862548828, |
|
"logps/rejected": -373.4498596191406, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.6001161336898804, |
|
"rewards/margins": 8.970643043518066, |
|
"rewards/rejected": -8.370526313781738, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7457627118644066e-07, |
|
"logits/chosen": -3.114872694015503, |
|
"logits/rejected": -3.2416419982910156, |
|
"logps/chosen": -214.206298828125, |
|
"logps/rejected": -393.0050354003906, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.1830778867006302, |
|
"rewards/margins": 10.744420051574707, |
|
"rewards/rejected": -10.561342239379883, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.915254237288136e-07, |
|
"logits/chosen": -3.1324026584625244, |
|
"logits/rejected": -3.228682041168213, |
|
"logps/chosen": -186.04457092285156, |
|
"logps/rejected": -486.37286376953125, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.23065343499183655, |
|
"rewards/margins": 7.208334445953369, |
|
"rewards/rejected": -6.9776811599731445, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.990555345674349e-07, |
|
"logits/chosen": -3.194873094558716, |
|
"logits/rejected": -3.2273032665252686, |
|
"logps/chosen": -205.6195831298828, |
|
"logps/rejected": -589.7122192382812, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19616279006004333, |
|
"rewards/margins": 8.628320693969727, |
|
"rewards/rejected": -8.432156562805176, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -3.2283244132995605, |
|
"eval_logits/rejected": -3.1999356746673584, |
|
"eval_logps/chosen": -258.00885009765625, |
|
"eval_logps/rejected": -516.9434204101562, |
|
"eval_loss": 0.10145324468612671, |
|
"eval_rewards/accuracies": 0.9840067625045776, |
|
"eval_rewards/chosen": -0.22032807767391205, |
|
"eval_rewards/margins": 9.050712585449219, |
|
"eval_rewards/rejected": -9.271040916442871, |
|
"eval_runtime": 451.8175, |
|
"eval_samples_per_second": 21.026, |
|
"eval_steps_per_second": 0.657, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.971666037023044e-07, |
|
"logits/chosen": -3.2539610862731934, |
|
"logits/rejected": -3.189645290374756, |
|
"logps/chosen": -218.11282348632812, |
|
"logps/rejected": -482.13238525390625, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5336198210716248, |
|
"rewards/margins": 8.224891662597656, |
|
"rewards/rejected": -8.758512496948242, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.952776728371742e-07, |
|
"logits/chosen": -3.1139378547668457, |
|
"logits/rejected": -3.1811156272888184, |
|
"logps/chosen": -395.83807373046875, |
|
"logps/rejected": -422.86126708984375, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5153623819351196, |
|
"rewards/margins": 8.82536506652832, |
|
"rewards/rejected": -9.340726852416992, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.933887419720438e-07, |
|
"logits/chosen": -3.185594320297241, |
|
"logits/rejected": -3.141575574874878, |
|
"logps/chosen": -274.31475830078125, |
|
"logps/rejected": -605.5130004882812, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5278395414352417, |
|
"rewards/margins": 9.545147895812988, |
|
"rewards/rejected": -10.07298755645752, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.914998111069135e-07, |
|
"logits/chosen": -3.2110514640808105, |
|
"logits/rejected": -3.252934217453003, |
|
"logps/chosen": -197.74810791015625, |
|
"logps/rejected": -527.8907470703125, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15357597172260284, |
|
"rewards/margins": 10.004459381103516, |
|
"rewards/rejected": -10.158035278320312, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.896108802417831e-07, |
|
"logits/chosen": -3.2196593284606934, |
|
"logits/rejected": -3.116806745529175, |
|
"logps/chosen": -252.00607299804688, |
|
"logps/rejected": -586.9210205078125, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3348214626312256, |
|
"rewards/margins": 10.310604095458984, |
|
"rewards/rejected": -10.645425796508789, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.877219493766528e-07, |
|
"logits/chosen": -3.2220287322998047, |
|
"logits/rejected": -3.3167202472686768, |
|
"logps/chosen": -195.5267333984375, |
|
"logps/rejected": -458.151123046875, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.5834547877311707, |
|
"rewards/margins": 13.117718696594238, |
|
"rewards/rejected": -13.701173782348633, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.858330185115224e-07, |
|
"logits/chosen": -3.207420825958252, |
|
"logits/rejected": -3.1515700817108154, |
|
"logps/chosen": -204.42086791992188, |
|
"logps/rejected": -775.6949462890625, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6016790270805359, |
|
"rewards/margins": 10.775850296020508, |
|
"rewards/rejected": -11.377527236938477, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.839440876463921e-07, |
|
"logits/chosen": -3.261585235595703, |
|
"logits/rejected": -3.1481096744537354, |
|
"logps/chosen": -224.3874053955078, |
|
"logps/rejected": -682.3482055664062, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30562129616737366, |
|
"rewards/margins": 11.442914962768555, |
|
"rewards/rejected": -11.748537063598633, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.820551567812618e-07, |
|
"logits/chosen": -3.1784658432006836, |
|
"logits/rejected": -3.128221273422241, |
|
"logps/chosen": -276.6926574707031, |
|
"logps/rejected": -459.2041931152344, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6345301866531372, |
|
"rewards/margins": 11.814035415649414, |
|
"rewards/rejected": -12.448564529418945, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.801662259161314e-07, |
|
"logits/chosen": -3.2648723125457764, |
|
"logits/rejected": -3.161604404449463, |
|
"logps/chosen": -217.42861938476562, |
|
"logps/rejected": -682.0848388671875, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6267936825752258, |
|
"rewards/margins": 13.1732177734375, |
|
"rewards/rejected": -13.800012588500977, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_logits/chosen": -3.275331974029541, |
|
"eval_logits/rejected": -3.206566095352173, |
|
"eval_logps/chosen": -270.2173767089844, |
|
"eval_logps/rejected": -561.2250366210938, |
|
"eval_loss": 0.059712644666433334, |
|
"eval_rewards/accuracies": 0.9865319728851318, |
|
"eval_rewards/chosen": -1.4411805868148804, |
|
"eval_rewards/margins": 12.258034706115723, |
|
"eval_rewards/rejected": -13.699216842651367, |
|
"eval_runtime": 451.4664, |
|
"eval_samples_per_second": 21.043, |
|
"eval_steps_per_second": 0.658, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.782772950510011e-07, |
|
"logits/chosen": -3.2593846321105957, |
|
"logits/rejected": -3.095431327819824, |
|
"logps/chosen": -216.7010498046875, |
|
"logps/rejected": -542.0743408203125, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.9579986333847046, |
|
"rewards/margins": 10.643548965454102, |
|
"rewards/rejected": -11.601548194885254, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7638836418587073e-07, |
|
"logits/chosen": -3.1919872760772705, |
|
"logits/rejected": -3.1519837379455566, |
|
"logps/chosen": -222.6879425048828, |
|
"logps/rejected": -716.9598388671875, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.237994909286499, |
|
"rewards/margins": 13.0872220993042, |
|
"rewards/rejected": -14.325218200683594, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7449943332074044e-07, |
|
"logits/chosen": -3.1929614543914795, |
|
"logits/rejected": -3.092634677886963, |
|
"logps/chosen": -203.4615020751953, |
|
"logps/rejected": -651.5323486328125, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7688810229301453, |
|
"rewards/margins": 12.568059921264648, |
|
"rewards/rejected": -13.336939811706543, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7261050245561014e-07, |
|
"logits/chosen": -3.2217884063720703, |
|
"logits/rejected": -3.214984178543091, |
|
"logps/chosen": -209.8181915283203, |
|
"logps/rejected": -500.721923828125, |
|
"loss": 0.039, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.114246129989624, |
|
"rewards/margins": 12.425156593322754, |
|
"rewards/rejected": -13.539402961730957, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7072157159047975e-07, |
|
"logits/chosen": -3.191771984100342, |
|
"logits/rejected": -3.340414524078369, |
|
"logps/chosen": -251.2989501953125, |
|
"logps/rejected": -453.38812255859375, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.3671858310699463, |
|
"rewards/margins": 13.762568473815918, |
|
"rewards/rejected": -15.129753112792969, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6883264072534946e-07, |
|
"logits/chosen": -3.229691982269287, |
|
"logits/rejected": -3.2845585346221924, |
|
"logps/chosen": -275.047119140625, |
|
"logps/rejected": -483.4437561035156, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8710290789604187, |
|
"rewards/margins": 13.83751106262207, |
|
"rewards/rejected": -14.708539962768555, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6694370986021906e-07, |
|
"logits/chosen": -3.261676073074341, |
|
"logits/rejected": -3.308655261993408, |
|
"logps/chosen": -364.6744689941406, |
|
"logps/rejected": -348.94195556640625, |
|
"loss": 0.2232, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.5191086530685425, |
|
"rewards/margins": 12.976631164550781, |
|
"rewards/rejected": -14.49573802947998, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6505477899508877e-07, |
|
"logits/chosen": -3.34987211227417, |
|
"logits/rejected": -3.1941416263580322, |
|
"logps/chosen": -215.4343719482422, |
|
"logps/rejected": -669.6368408203125, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.3562166690826416, |
|
"rewards/margins": 15.658292770385742, |
|
"rewards/rejected": -17.014511108398438, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.631658481299584e-07, |
|
"logits/chosen": -3.346367359161377, |
|
"logits/rejected": -3.2884230613708496, |
|
"logps/chosen": -229.24880981445312, |
|
"logps/rejected": -600.4979858398438, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6670745015144348, |
|
"rewards/margins": 18.63577651977539, |
|
"rewards/rejected": -19.3028507232666, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.612769172648281e-07, |
|
"logits/chosen": -3.2286019325256348, |
|
"logits/rejected": -3.272526264190674, |
|
"logps/chosen": -331.59771728515625, |
|
"logps/rejected": -384.60089111328125, |
|
"loss": 0.304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12641265988349915, |
|
"rewards/margins": 16.35049819946289, |
|
"rewards/rejected": -16.224084854125977, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_logits/chosen": -3.2601242065429688, |
|
"eval_logits/rejected": -3.20926833152771, |
|
"eval_logps/chosen": -258.48223876953125, |
|
"eval_logps/rejected": -596.7301635742188, |
|
"eval_loss": 0.08744455128908157, |
|
"eval_rewards/accuracies": 0.9890572428703308, |
|
"eval_rewards/chosen": -0.26766717433929443, |
|
"eval_rewards/margins": 16.982051849365234, |
|
"eval_rewards/rejected": -17.249719619750977, |
|
"eval_runtime": 452.2491, |
|
"eval_samples_per_second": 21.006, |
|
"eval_steps_per_second": 0.657, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5938798639969773e-07, |
|
"logits/chosen": -3.1900811195373535, |
|
"logits/rejected": -3.165273904800415, |
|
"logps/chosen": -310.04840087890625, |
|
"logps/rejected": -706.6512451171875, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.08271221071481705, |
|
"rewards/margins": 16.163110733032227, |
|
"rewards/rejected": -16.24582290649414, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.574990555345674e-07, |
|
"logits/chosen": -3.233689546585083, |
|
"logits/rejected": -3.1376094818115234, |
|
"logps/chosen": -227.1941680908203, |
|
"logps/rejected": -826.2154541015625, |
|
"loss": 0.3105, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.272916555404663, |
|
"rewards/margins": 36.48918151855469, |
|
"rewards/rejected": -37.76210021972656, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.556101246694371e-07, |
|
"logits/chosen": -3.3540146350860596, |
|
"logits/rejected": -3.1351585388183594, |
|
"logps/chosen": -206.82461547851562, |
|
"logps/rejected": -887.9490966796875, |
|
"loss": 0.1344, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.5983909368515015, |
|
"rewards/margins": 37.151283264160156, |
|
"rewards/rejected": -37.749671936035156, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5372119380430675e-07, |
|
"logits/chosen": -3.2952301502227783, |
|
"logits/rejected": -3.2790439128875732, |
|
"logps/chosen": -196.64913940429688, |
|
"logps/rejected": -452.2648010253906, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.035662006586790085, |
|
"rewards/margins": 12.354232788085938, |
|
"rewards/rejected": -12.389894485473633, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.518322629391764e-07, |
|
"logits/chosen": -3.339197874069214, |
|
"logits/rejected": -3.2826008796691895, |
|
"logps/chosen": -215.4700469970703, |
|
"logps/rejected": -526.360595703125, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8227304220199585, |
|
"rewards/margins": 13.043925285339355, |
|
"rewards/rejected": -13.866655349731445, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4994333207404607e-07, |
|
"logits/chosen": -3.238922119140625, |
|
"logits/rejected": -3.2724907398223877, |
|
"logps/chosen": -336.5367126464844, |
|
"logps/rejected": -491.58416748046875, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.3954570293426514, |
|
"rewards/margins": 12.810078620910645, |
|
"rewards/rejected": -14.205537796020508, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.480544012089157e-07, |
|
"logits/chosen": -3.1199750900268555, |
|
"logits/rejected": -3.2144436836242676, |
|
"logps/chosen": -372.39630126953125, |
|
"logps/rejected": -409.29168701171875, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8765771985054016, |
|
"rewards/margins": 15.976984977722168, |
|
"rewards/rejected": -16.85356330871582, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.461654703437854e-07, |
|
"logits/chosen": -3.1464171409606934, |
|
"logits/rejected": -3.1977782249450684, |
|
"logps/chosen": -329.33721923828125, |
|
"logps/rejected": -579.863525390625, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.042082421481609344, |
|
"rewards/margins": 18.116785049438477, |
|
"rewards/rejected": -18.158864974975586, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.442765394786551e-07, |
|
"logits/chosen": -3.2647204399108887, |
|
"logits/rejected": -3.173513889312744, |
|
"logps/chosen": -210.3753662109375, |
|
"logps/rejected": -446.36175537109375, |
|
"loss": 0.1479, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5811902284622192, |
|
"rewards/margins": 14.033243179321289, |
|
"rewards/rejected": -14.614431381225586, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.423876086135247e-07, |
|
"logits/chosen": -3.1431689262390137, |
|
"logits/rejected": -3.110055685043335, |
|
"logps/chosen": -322.7793884277344, |
|
"logps/rejected": -717.8176879882812, |
|
"loss": 0.1206, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4665941298007965, |
|
"rewards/margins": 16.126140594482422, |
|
"rewards/rejected": -16.592737197875977, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -3.2024269104003906, |
|
"eval_logits/rejected": -3.168919801712036, |
|
"eval_logps/chosen": -260.0578308105469, |
|
"eval_logps/rejected": -580.7472534179688, |
|
"eval_loss": 0.06855383515357971, |
|
"eval_rewards/accuracies": 0.9890572428703308, |
|
"eval_rewards/chosen": -0.42522314190864563, |
|
"eval_rewards/margins": 15.226205825805664, |
|
"eval_rewards/rejected": -15.65142822265625, |
|
"eval_runtime": 462.2117, |
|
"eval_samples_per_second": 20.553, |
|
"eval_steps_per_second": 0.643, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.404986777483944e-07, |
|
"logits/chosen": -3.0911104679107666, |
|
"logits/rejected": -3.2177834510803223, |
|
"logps/chosen": -253.5253448486328, |
|
"logps/rejected": -505.5723571777344, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4803188443183899, |
|
"rewards/margins": 14.278889656066895, |
|
"rewards/rejected": -14.759210586547852, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3860974688326405e-07, |
|
"logits/chosen": -3.17213773727417, |
|
"logits/rejected": -3.153881072998047, |
|
"logps/chosen": -341.3065185546875, |
|
"logps/rejected": -633.9520874023438, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8739015460014343, |
|
"rewards/margins": 14.758657455444336, |
|
"rewards/rejected": -15.632558822631836, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.367208160181337e-07, |
|
"logits/chosen": -3.1356098651885986, |
|
"logits/rejected": -3.269990921020508, |
|
"logps/chosen": -300.65411376953125, |
|
"logps/rejected": -361.2610778808594, |
|
"loss": 0.114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36743250489234924, |
|
"rewards/margins": 14.380537033081055, |
|
"rewards/rejected": -14.747968673706055, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.348318851530034e-07, |
|
"logits/chosen": -3.2255866527557373, |
|
"logits/rejected": -3.2463996410369873, |
|
"logps/chosen": -273.51776123046875, |
|
"logps/rejected": -633.8323364257812, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.413738876581192, |
|
"rewards/margins": 17.93083381652832, |
|
"rewards/rejected": -18.344573974609375, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.32942954287873e-07, |
|
"logits/chosen": -3.2475905418395996, |
|
"logits/rejected": -3.3233604431152344, |
|
"logps/chosen": -264.77996826171875, |
|
"logps/rejected": -494.20855712890625, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.034877061843872, |
|
"rewards/margins": 19.03714942932129, |
|
"rewards/rejected": -20.072025299072266, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3105402342274273e-07, |
|
"logits/chosen": -3.2771410942077637, |
|
"logits/rejected": -3.288753032684326, |
|
"logps/chosen": -217.06277465820312, |
|
"logps/rejected": -617.0843505859375, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8464560508728027, |
|
"rewards/margins": 17.9014949798584, |
|
"rewards/rejected": -18.74795150756836, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2916509255761233e-07, |
|
"logits/chosen": -3.221287488937378, |
|
"logits/rejected": -3.186659812927246, |
|
"logps/chosen": -258.8490295410156, |
|
"logps/rejected": -667.2969970703125, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.5467805862426758, |
|
"rewards/margins": 15.2776460647583, |
|
"rewards/rejected": -16.82442855834961, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2727616169248204e-07, |
|
"logits/chosen": -3.259873151779175, |
|
"logits/rejected": -3.214630126953125, |
|
"logps/chosen": -230.55038452148438, |
|
"logps/rejected": -636.394287109375, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.225334644317627, |
|
"rewards/margins": 16.479169845581055, |
|
"rewards/rejected": -17.70450210571289, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.253872308273517e-07, |
|
"logits/chosen": -3.2281653881073, |
|
"logits/rejected": -3.0544328689575195, |
|
"logps/chosen": -301.4876403808594, |
|
"logps/rejected": -634.31689453125, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.3511894941329956, |
|
"rewards/margins": 17.707965850830078, |
|
"rewards/rejected": -18.05915641784668, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2349829996222135e-07, |
|
"logits/chosen": -3.2615528106689453, |
|
"logits/rejected": -3.146155834197998, |
|
"logps/chosen": -257.6978759765625, |
|
"logps/rejected": -651.7713012695312, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4299692213535309, |
|
"rewards/margins": 16.86044692993164, |
|
"rewards/rejected": -17.290416717529297, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -3.2957923412323, |
|
"eval_logits/rejected": -3.2304515838623047, |
|
"eval_logps/chosen": -262.88763427734375, |
|
"eval_logps/rejected": -599.524169921875, |
|
"eval_loss": 0.06300165504217148, |
|
"eval_rewards/accuracies": 0.9932659864425659, |
|
"eval_rewards/chosen": -0.7082026600837708, |
|
"eval_rewards/margins": 16.8209228515625, |
|
"eval_rewards/rejected": -17.529123306274414, |
|
"eval_runtime": 459.6358, |
|
"eval_samples_per_second": 20.669, |
|
"eval_steps_per_second": 0.646, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2160936909709106e-07, |
|
"logits/chosen": -3.19771409034729, |
|
"logits/rejected": -3.223461866378784, |
|
"logps/chosen": -260.47796630859375, |
|
"logps/rejected": -475.68341064453125, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8059867024421692, |
|
"rewards/margins": 14.349054336547852, |
|
"rewards/rejected": -15.155041694641113, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.1972043823196066e-07, |
|
"logits/chosen": -3.252122402191162, |
|
"logits/rejected": -3.2310726642608643, |
|
"logps/chosen": -230.65432739257812, |
|
"logps/rejected": -538.1376953125, |
|
"loss": 0.0743, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.229381799697876, |
|
"rewards/margins": 19.127460479736328, |
|
"rewards/rejected": -20.356840133666992, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1783150736683037e-07, |
|
"logits/chosen": -3.29115629196167, |
|
"logits/rejected": -3.1870217323303223, |
|
"logps/chosen": -289.22088623046875, |
|
"logps/rejected": -615.3138427734375, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.0578917264938354, |
|
"rewards/margins": 16.45914649963379, |
|
"rewards/rejected": -17.51703643798828, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1594257650170003e-07, |
|
"logits/chosen": -3.2585701942443848, |
|
"logits/rejected": -3.274698257446289, |
|
"logps/chosen": -291.2822570800781, |
|
"logps/rejected": -534.2890625, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3135229349136353, |
|
"rewards/margins": 16.959497451782227, |
|
"rewards/rejected": -18.273021697998047, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.140536456365697e-07, |
|
"logits/chosen": -3.2825417518615723, |
|
"logits/rejected": -3.2054946422576904, |
|
"logps/chosen": -204.35067749023438, |
|
"logps/rejected": -632.234375, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6541246771812439, |
|
"rewards/margins": 18.069482803344727, |
|
"rewards/rejected": -18.723609924316406, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1216471477143934e-07, |
|
"logits/chosen": -3.3405938148498535, |
|
"logits/rejected": -3.2676734924316406, |
|
"logps/chosen": -214.34793090820312, |
|
"logps/rejected": -491.17803955078125, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9016032218933105, |
|
"rewards/margins": 17.351411819458008, |
|
"rewards/rejected": -18.253013610839844, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.10275783906309e-07, |
|
"logits/chosen": -3.272359848022461, |
|
"logits/rejected": -3.3219470977783203, |
|
"logps/chosen": -323.8666687011719, |
|
"logps/rejected": -581.1951904296875, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8133894205093384, |
|
"rewards/margins": 15.973344802856445, |
|
"rewards/rejected": -16.786733627319336, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0838685304117865e-07, |
|
"logits/chosen": -3.353651762008667, |
|
"logits/rejected": -3.259639263153076, |
|
"logps/chosen": -336.8185729980469, |
|
"logps/rejected": -695.9039306640625, |
|
"loss": 0.2987, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.0787861347198486, |
|
"rewards/margins": 17.26241683959961, |
|
"rewards/rejected": -18.34119987487793, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0649792217604836e-07, |
|
"logits/chosen": -3.4104390144348145, |
|
"logits/rejected": -3.359205961227417, |
|
"logps/chosen": -263.760498046875, |
|
"logps/rejected": -435.3912658691406, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8803040385246277, |
|
"rewards/margins": 15.400744438171387, |
|
"rewards/rejected": -16.281049728393555, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.04608991310918e-07, |
|
"logits/chosen": -3.4024837017059326, |
|
"logits/rejected": -3.3290324211120605, |
|
"logps/chosen": -274.2511291503906, |
|
"logps/rejected": -561.3245239257812, |
|
"loss": 0.0461, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1979191303253174, |
|
"rewards/margins": 17.333105087280273, |
|
"rewards/rejected": -18.531024932861328, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_logits/chosen": -3.5158255100250244, |
|
"eval_logits/rejected": -3.3936259746551514, |
|
"eval_logps/chosen": -268.3476867675781, |
|
"eval_logps/rejected": -636.7913818359375, |
|
"eval_loss": 0.034081265330314636, |
|
"eval_rewards/accuracies": 0.9932659864425659, |
|
"eval_rewards/chosen": -1.2542104721069336, |
|
"eval_rewards/margins": 20.001632690429688, |
|
"eval_rewards/rejected": -21.255842208862305, |
|
"eval_runtime": 459.1675, |
|
"eval_samples_per_second": 20.69, |
|
"eval_steps_per_second": 0.647, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0272006044578767e-07, |
|
"logits/chosen": -3.4222965240478516, |
|
"logits/rejected": -3.3901965618133545, |
|
"logps/chosen": -271.127685546875, |
|
"logps/rejected": -702.6803588867188, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3194767236709595, |
|
"rewards/margins": 20.698434829711914, |
|
"rewards/rejected": -21.01791000366211, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0083112958065733e-07, |
|
"logits/chosen": -3.3571228981018066, |
|
"logits/rejected": -3.378098964691162, |
|
"logps/chosen": -354.65338134765625, |
|
"logps/rejected": -460.4593811035156, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2477954924106598, |
|
"rewards/margins": 16.9534854888916, |
|
"rewards/rejected": -16.705690383911133, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.98942198715527e-07, |
|
"logits/chosen": -3.4696907997131348, |
|
"logits/rejected": -3.3343029022216797, |
|
"logps/chosen": -200.86773681640625, |
|
"logps/rejected": -668.7745361328125, |
|
"loss": 0.0249, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.9370819926261902, |
|
"rewards/margins": 18.672664642333984, |
|
"rewards/rejected": -19.609745025634766, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.970532678503967e-07, |
|
"logits/chosen": -3.428400754928589, |
|
"logits/rejected": -3.392636775970459, |
|
"logps/chosen": -278.60614013671875, |
|
"logps/rejected": -547.3959350585938, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5795613527297974, |
|
"rewards/margins": 18.447797775268555, |
|
"rewards/rejected": -19.027359008789062, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.951643369852663e-07, |
|
"logits/chosen": -3.3655319213867188, |
|
"logits/rejected": -3.2096476554870605, |
|
"logps/chosen": -277.06854248046875, |
|
"logps/rejected": -800.4194946289062, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36317068338394165, |
|
"rewards/margins": 18.841623306274414, |
|
"rewards/rejected": -19.204795837402344, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.93275406120136e-07, |
|
"logits/chosen": -3.3958353996276855, |
|
"logits/rejected": -3.364926815032959, |
|
"logps/chosen": -260.45867919921875, |
|
"logps/rejected": -638.11328125, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.29275327920913696, |
|
"rewards/margins": 20.899669647216797, |
|
"rewards/rejected": -21.192420959472656, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.913864752550056e-07, |
|
"logits/chosen": -3.555651903152466, |
|
"logits/rejected": -3.362936019897461, |
|
"logps/chosen": -336.64990234375, |
|
"logps/rejected": -678.8165283203125, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29712626338005066, |
|
"rewards/margins": 20.055971145629883, |
|
"rewards/rejected": -19.758846282958984, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.894975443898753e-07, |
|
"logits/chosen": -3.5221924781799316, |
|
"logits/rejected": -3.4192261695861816, |
|
"logps/chosen": -184.4869384765625, |
|
"logps/rejected": -517.7220458984375, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2702915668487549, |
|
"rewards/margins": 17.328258514404297, |
|
"rewards/rejected": -17.05797004699707, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.87608613524745e-07, |
|
"logits/chosen": -3.3060905933380127, |
|
"logits/rejected": -3.3240628242492676, |
|
"logps/chosen": -249.1829833984375, |
|
"logps/rejected": -423.19183349609375, |
|
"loss": 0.0244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4387660622596741, |
|
"rewards/margins": 15.609105110168457, |
|
"rewards/rejected": -15.17033863067627, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.857196826596146e-07, |
|
"logits/chosen": -3.432100296020508, |
|
"logits/rejected": -3.3346171379089355, |
|
"logps/chosen": -213.72622680664062, |
|
"logps/rejected": -522.6224365234375, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36038094758987427, |
|
"rewards/margins": 17.75735855102539, |
|
"rewards/rejected": -17.39698028564453, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.494093418121338, |
|
"eval_logits/rejected": -3.3745272159576416, |
|
"eval_logps/chosen": -252.02415466308594, |
|
"eval_logps/rejected": -596.7079467773438, |
|
"eval_loss": 0.029099902138113976, |
|
"eval_rewards/accuracies": 0.996632993221283, |
|
"eval_rewards/chosen": 0.3781438171863556, |
|
"eval_rewards/margins": 17.625646591186523, |
|
"eval_rewards/rejected": -17.24750328063965, |
|
"eval_runtime": 458.9428, |
|
"eval_samples_per_second": 20.7, |
|
"eval_steps_per_second": 0.647, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8383075179448433e-07, |
|
"logits/chosen": -3.4732635021209717, |
|
"logits/rejected": -3.32080340385437, |
|
"logps/chosen": -299.28045654296875, |
|
"logps/rejected": -412.11737060546875, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5949827432632446, |
|
"rewards/margins": 14.9349365234375, |
|
"rewards/rejected": -14.339953422546387, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8194182092935394e-07, |
|
"logits/chosen": -3.5183627605438232, |
|
"logits/rejected": -3.418959856033325, |
|
"logps/chosen": -206.648193359375, |
|
"logps/rejected": -540.2952880859375, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14846962690353394, |
|
"rewards/margins": 18.112279891967773, |
|
"rewards/rejected": -17.963809967041016, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.8005289006422365e-07, |
|
"logits/chosen": -3.4474635124206543, |
|
"logits/rejected": -3.4216742515563965, |
|
"logps/chosen": -245.8963165283203, |
|
"logps/rejected": -637.1114501953125, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.09156160801649094, |
|
"rewards/margins": 20.223173141479492, |
|
"rewards/rejected": -20.13161277770996, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7816395919909325e-07, |
|
"logits/chosen": -3.43304443359375, |
|
"logits/rejected": -3.3371405601501465, |
|
"logps/chosen": -205.36312866210938, |
|
"logps/rejected": -577.2255249023438, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.1940477043390274, |
|
"rewards/margins": 17.12152099609375, |
|
"rewards/rejected": -17.31557273864746, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7627502833396296e-07, |
|
"logits/chosen": -3.4164633750915527, |
|
"logits/rejected": -3.334111452102661, |
|
"logps/chosen": -252.3224334716797, |
|
"logps/rejected": -522.1593017578125, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06414094567298889, |
|
"rewards/margins": 16.263051986694336, |
|
"rewards/rejected": -16.327190399169922, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7438609746883267e-07, |
|
"logits/chosen": -3.3646836280822754, |
|
"logits/rejected": -3.2790799140930176, |
|
"logps/chosen": -303.45623779296875, |
|
"logps/rejected": -564.6026611328125, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1330849528312683, |
|
"rewards/margins": 15.452998161315918, |
|
"rewards/rejected": -15.319913864135742, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7249716660370227e-07, |
|
"logits/chosen": -3.5046660900115967, |
|
"logits/rejected": -3.309417724609375, |
|
"logps/chosen": -193.89120483398438, |
|
"logps/rejected": -745.4686279296875, |
|
"loss": 0.0171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28295058012008667, |
|
"rewards/margins": 19.531200408935547, |
|
"rewards/rejected": -19.814151763916016, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.70608235738572e-07, |
|
"logits/chosen": -3.4429755210876465, |
|
"logits/rejected": -3.297156572341919, |
|
"logps/chosen": -311.4560852050781, |
|
"logps/rejected": -706.6754150390625, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6654003858566284, |
|
"rewards/margins": 20.311656951904297, |
|
"rewards/rejected": -19.646259307861328, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.687193048734416e-07, |
|
"logits/chosen": -3.448244094848633, |
|
"logits/rejected": -3.379380464553833, |
|
"logps/chosen": -186.96884155273438, |
|
"logps/rejected": -682.4005126953125, |
|
"loss": 0.038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3271837532520294, |
|
"rewards/margins": 18.818578720092773, |
|
"rewards/rejected": -18.491397857666016, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.668303740083113e-07, |
|
"logits/chosen": -3.448310136795044, |
|
"logits/rejected": -3.360579013824463, |
|
"logps/chosen": -269.36297607421875, |
|
"logps/rejected": -513.7430419921875, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5543234944343567, |
|
"rewards/margins": 17.900325775146484, |
|
"rewards/rejected": -17.34600067138672, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -3.4703125953674316, |
|
"eval_logits/rejected": -3.3235578536987305, |
|
"eval_logps/chosen": -256.8191223144531, |
|
"eval_logps/rejected": -620.40966796875, |
|
"eval_loss": 0.024830101057887077, |
|
"eval_rewards/accuracies": 0.9957912564277649, |
|
"eval_rewards/chosen": -0.10135477781295776, |
|
"eval_rewards/margins": 19.51631736755371, |
|
"eval_rewards/rejected": -19.6176700592041, |
|
"eval_runtime": 459.9759, |
|
"eval_samples_per_second": 20.653, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6494144314318094e-07, |
|
"logits/chosen": -3.414768695831299, |
|
"logits/rejected": -3.3657734394073486, |
|
"logps/chosen": -279.57891845703125, |
|
"logps/rejected": -491.0535583496094, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23943224549293518, |
|
"rewards/margins": 14.393826484680176, |
|
"rewards/rejected": -14.154393196105957, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.630525122780506e-07, |
|
"logits/chosen": -3.4718196392059326, |
|
"logits/rejected": -3.3277339935302734, |
|
"logps/chosen": -190.89041137695312, |
|
"logps/rejected": -588.8604736328125, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5905485153198242, |
|
"rewards/margins": 20.05153465270996, |
|
"rewards/rejected": -20.642086029052734, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6116358141292026e-07, |
|
"logits/chosen": -3.4760546684265137, |
|
"logits/rejected": -3.3435845375061035, |
|
"logps/chosen": -203.86050415039062, |
|
"logps/rejected": -592.036865234375, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7946874499320984, |
|
"rewards/margins": 24.075565338134766, |
|
"rewards/rejected": -24.87025260925293, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.592746505477899e-07, |
|
"logits/chosen": -3.3917622566223145, |
|
"logits/rejected": -3.2945053577423096, |
|
"logps/chosen": -364.39324951171875, |
|
"logps/rejected": -807.2418212890625, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8276251554489136, |
|
"rewards/margins": 28.354557037353516, |
|
"rewards/rejected": -29.182178497314453, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.573857196826596e-07, |
|
"logits/chosen": -3.579265594482422, |
|
"logits/rejected": -3.4756500720977783, |
|
"logps/chosen": -352.31451416015625, |
|
"logps/rejected": -720.6471557617188, |
|
"loss": 0.1426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.192226529121399, |
|
"rewards/margins": 35.108890533447266, |
|
"rewards/rejected": -36.301116943359375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.554967888175293e-07, |
|
"logits/chosen": -3.6324493885040283, |
|
"logits/rejected": -3.4453964233398438, |
|
"logps/chosen": -275.65631103515625, |
|
"logps/rejected": -759.9483642578125, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.29346764087677, |
|
"rewards/margins": 34.53205490112305, |
|
"rewards/rejected": -35.825523376464844, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5360785795239893e-07, |
|
"logits/chosen": -3.4921135902404785, |
|
"logits/rejected": -3.409318208694458, |
|
"logps/chosen": -326.6520080566406, |
|
"logps/rejected": -557.5526733398438, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.002267599105835, |
|
"rewards/margins": 22.072717666625977, |
|
"rewards/rejected": -23.07498550415039, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.517189270872686e-07, |
|
"logits/chosen": -3.4556102752685547, |
|
"logits/rejected": -3.42388916015625, |
|
"logps/chosen": -266.01983642578125, |
|
"logps/rejected": -539.4820556640625, |
|
"loss": 0.1283, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8836309313774109, |
|
"rewards/margins": 21.98915672302246, |
|
"rewards/rejected": -22.872791290283203, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4982999622213824e-07, |
|
"logits/chosen": -3.318305253982544, |
|
"logits/rejected": -3.3859639167785645, |
|
"logps/chosen": -262.29730224609375, |
|
"logps/rejected": -423.01531982421875, |
|
"loss": 0.0238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06474297493696213, |
|
"rewards/margins": 20.654415130615234, |
|
"rewards/rejected": -20.719158172607422, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.479410653570079e-07, |
|
"logits/chosen": -3.4146296977996826, |
|
"logits/rejected": -3.3967928886413574, |
|
"logps/chosen": -263.7212219238281, |
|
"logps/rejected": -663.2433471679688, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18020686507225037, |
|
"rewards/margins": 23.35451889038086, |
|
"rewards/rejected": -23.174312591552734, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -3.512702226638794, |
|
"eval_logits/rejected": -3.369966745376587, |
|
"eval_logps/chosen": -253.3647918701172, |
|
"eval_logps/rejected": -652.9178466796875, |
|
"eval_loss": 0.04759080708026886, |
|
"eval_rewards/accuracies": 0.9949495196342468, |
|
"eval_rewards/chosen": 0.24408026039600372, |
|
"eval_rewards/margins": 23.112565994262695, |
|
"eval_rewards/rejected": -22.868486404418945, |
|
"eval_runtime": 459.4583, |
|
"eval_samples_per_second": 20.677, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.460521344918776e-07, |
|
"logits/chosen": -3.4741005897521973, |
|
"logits/rejected": -3.3692924976348877, |
|
"logps/chosen": -177.2493896484375, |
|
"logps/rejected": -684.059326171875, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10438014566898346, |
|
"rewards/margins": 19.24637794494629, |
|
"rewards/rejected": -19.141998291015625, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.441632036267472e-07, |
|
"logits/chosen": -3.519453525543213, |
|
"logits/rejected": -3.450646162033081, |
|
"logps/chosen": -206.3206787109375, |
|
"logps/rejected": -591.7532348632812, |
|
"loss": 0.1613, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19403085112571716, |
|
"rewards/margins": 18.250011444091797, |
|
"rewards/rejected": -18.055980682373047, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.422742727616169e-07, |
|
"logits/chosen": -3.4171173572540283, |
|
"logits/rejected": -3.402754545211792, |
|
"logps/chosen": -285.92596435546875, |
|
"logps/rejected": -420.0677795410156, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29394811391830444, |
|
"rewards/margins": 17.166831970214844, |
|
"rewards/rejected": -16.872886657714844, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.403853418964866e-07, |
|
"logits/chosen": -3.395599842071533, |
|
"logits/rejected": -3.352531909942627, |
|
"logps/chosen": -267.9078674316406, |
|
"logps/rejected": -602.1204833984375, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25522318482398987, |
|
"rewards/margins": 24.411819458007812, |
|
"rewards/rejected": -24.667041778564453, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3849641103135623e-07, |
|
"logits/chosen": -3.4090888500213623, |
|
"logits/rejected": -3.3088366985321045, |
|
"logps/chosen": -302.02484130859375, |
|
"logps/rejected": -429.8524475097656, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5110098123550415, |
|
"rewards/margins": 17.73603057861328, |
|
"rewards/rejected": -18.247039794921875, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3660748016622594e-07, |
|
"logits/chosen": -3.284252882003784, |
|
"logits/rejected": -3.364304304122925, |
|
"logps/chosen": -329.56622314453125, |
|
"logps/rejected": -570.0607299804688, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14421747624874115, |
|
"rewards/margins": 23.145597457885742, |
|
"rewards/rejected": -23.00138282775879, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3471854930109554e-07, |
|
"logits/chosen": -3.2979743480682373, |
|
"logits/rejected": -3.361307144165039, |
|
"logps/chosen": -186.98870849609375, |
|
"logps/rejected": -614.3134765625, |
|
"loss": 0.074, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.3126475214958191, |
|
"rewards/margins": 21.26107406616211, |
|
"rewards/rejected": -20.948427200317383, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3282961843596525e-07, |
|
"logits/chosen": -3.3583598136901855, |
|
"logits/rejected": -3.2492566108703613, |
|
"logps/chosen": -241.7067108154297, |
|
"logps/rejected": -792.7431640625, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.38423070311546326, |
|
"rewards/margins": 24.471080780029297, |
|
"rewards/rejected": -24.086851119995117, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3094068757083485e-07, |
|
"logits/chosen": -3.342761516571045, |
|
"logits/rejected": -3.278207778930664, |
|
"logps/chosen": -275.2469482421875, |
|
"logps/rejected": -528.4226684570312, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5847635269165039, |
|
"rewards/margins": 23.485641479492188, |
|
"rewards/rejected": -22.900880813598633, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2905175670570456e-07, |
|
"logits/chosen": -3.3539364337921143, |
|
"logits/rejected": -3.304426670074463, |
|
"logps/chosen": -338.57464599609375, |
|
"logps/rejected": -586.469970703125, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.03903156518936157, |
|
"rewards/margins": 19.734745025634766, |
|
"rewards/rejected": -19.77377700805664, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -3.443300485610962, |
|
"eval_logits/rejected": -3.3281137943267822, |
|
"eval_logps/chosen": -253.46900939941406, |
|
"eval_logps/rejected": -634.9552612304688, |
|
"eval_loss": 0.03437602147459984, |
|
"eval_rewards/accuracies": 0.9957912564277649, |
|
"eval_rewards/chosen": 0.23365840315818787, |
|
"eval_rewards/margins": 21.305892944335938, |
|
"eval_rewards/rejected": -21.072233200073242, |
|
"eval_runtime": 460.0736, |
|
"eval_samples_per_second": 20.649, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.271628258405742e-07, |
|
"logits/chosen": -3.3404109477996826, |
|
"logits/rejected": -3.3278732299804688, |
|
"logps/chosen": -266.605712890625, |
|
"logps/rejected": -432.56243896484375, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16924318671226501, |
|
"rewards/margins": 20.001480102539062, |
|
"rewards/rejected": -19.832239151000977, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.252738949754439e-07, |
|
"logits/chosen": -3.325308322906494, |
|
"logits/rejected": -3.2718875408172607, |
|
"logps/chosen": -239.7605743408203, |
|
"logps/rejected": -689.46240234375, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.257117748260498, |
|
"rewards/margins": 22.868335723876953, |
|
"rewards/rejected": -21.611217498779297, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.233849641103136e-07, |
|
"logits/chosen": -3.409510850906372, |
|
"logits/rejected": -3.2908267974853516, |
|
"logps/chosen": -186.55523681640625, |
|
"logps/rejected": -838.28271484375, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19115009903907776, |
|
"rewards/margins": 21.021907806396484, |
|
"rewards/rejected": -20.830759048461914, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.214960332451832e-07, |
|
"logits/chosen": -3.3691024780273438, |
|
"logits/rejected": -3.2960734367370605, |
|
"logps/chosen": -276.49310302734375, |
|
"logps/rejected": -673.3426513671875, |
|
"loss": 0.027, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.20441074669361115, |
|
"rewards/margins": 21.827564239501953, |
|
"rewards/rejected": -22.03197479248047, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.196071023800529e-07, |
|
"logits/chosen": -3.3508994579315186, |
|
"logits/rejected": -3.2464585304260254, |
|
"logps/chosen": -262.32366943359375, |
|
"logps/rejected": -801.7515869140625, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.30456364154815674, |
|
"rewards/margins": 18.644428253173828, |
|
"rewards/rejected": -18.948993682861328, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1771817151492255e-07, |
|
"logits/chosen": -3.3253204822540283, |
|
"logits/rejected": -3.182976245880127, |
|
"logps/chosen": -271.783203125, |
|
"logps/rejected": -792.1323852539062, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4825393557548523, |
|
"rewards/margins": 23.283037185668945, |
|
"rewards/rejected": -22.800498962402344, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.158292406497922e-07, |
|
"logits/chosen": -3.281102418899536, |
|
"logits/rejected": -3.305457353591919, |
|
"logps/chosen": -303.48016357421875, |
|
"logps/rejected": -653.7716674804688, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34954649209976196, |
|
"rewards/margins": 18.922035217285156, |
|
"rewards/rejected": -18.57248306274414, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1394030978466186e-07, |
|
"logits/chosen": -3.4600696563720703, |
|
"logits/rejected": -3.3820204734802246, |
|
"logps/chosen": -226.12631225585938, |
|
"logps/rejected": -523.9305419921875, |
|
"loss": 0.034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39240407943725586, |
|
"rewards/margins": 21.62295913696289, |
|
"rewards/rejected": -22.015361785888672, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.120513789195315e-07, |
|
"logits/chosen": -3.4715068340301514, |
|
"logits/rejected": -3.343583583831787, |
|
"logps/chosen": -208.66445922851562, |
|
"logps/rejected": -671.6092529296875, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2728857100009918, |
|
"rewards/margins": 23.407129287719727, |
|
"rewards/rejected": -23.68001365661621, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1016244805440117e-07, |
|
"logits/chosen": -3.3316524028778076, |
|
"logits/rejected": -3.259505033493042, |
|
"logps/chosen": -252.8136749267578, |
|
"logps/rejected": -625.1239013671875, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11937759071588516, |
|
"rewards/margins": 17.959081649780273, |
|
"rewards/rejected": -18.078458786010742, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -3.2942728996276855, |
|
"eval_logits/rejected": -3.2086024284362793, |
|
"eval_logps/chosen": -251.94058227539062, |
|
"eval_logps/rejected": -619.3322143554688, |
|
"eval_loss": 0.03203802928328514, |
|
"eval_rewards/accuracies": 0.994107723236084, |
|
"eval_rewards/chosen": 0.3865027129650116, |
|
"eval_rewards/margins": 19.89643096923828, |
|
"eval_rewards/rejected": -19.50992774963379, |
|
"eval_runtime": 459.9792, |
|
"eval_samples_per_second": 20.653, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.082735171892709e-07, |
|
"logits/chosen": -3.283964157104492, |
|
"logits/rejected": -3.3076889514923096, |
|
"logps/chosen": -200.6337432861328, |
|
"logps/rejected": -526.4237060546875, |
|
"loss": 0.1087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0861460343003273, |
|
"rewards/margins": 19.039958953857422, |
|
"rewards/rejected": -19.126102447509766, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0638458632414054e-07, |
|
"logits/chosen": -3.3101184368133545, |
|
"logits/rejected": -3.2691454887390137, |
|
"logps/chosen": -248.6476593017578, |
|
"logps/rejected": -730.8521728515625, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3345467150211334, |
|
"rewards/margins": 21.247516632080078, |
|
"rewards/rejected": -20.912973403930664, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.044956554590102e-07, |
|
"logits/chosen": -3.3269455432891846, |
|
"logits/rejected": -3.3831734657287598, |
|
"logps/chosen": -250.7469024658203, |
|
"logps/rejected": -490.6947326660156, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0001120984525186941, |
|
"rewards/margins": 21.17990493774414, |
|
"rewards/rejected": -21.180017471313477, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0260672459387985e-07, |
|
"logits/chosen": -3.445746660232544, |
|
"logits/rejected": -3.3551487922668457, |
|
"logps/chosen": -194.33663940429688, |
|
"logps/rejected": -587.1473388671875, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2983397841453552, |
|
"rewards/margins": 21.926280975341797, |
|
"rewards/rejected": -22.224620819091797, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.007177937287495e-07, |
|
"logits/chosen": -3.3813576698303223, |
|
"logits/rejected": -3.2786917686462402, |
|
"logps/chosen": -200.80307006835938, |
|
"logps/rejected": -760.9759521484375, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03139156848192215, |
|
"rewards/margins": 25.731273651123047, |
|
"rewards/rejected": -25.762670516967773, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.988288628636192e-07, |
|
"logits/chosen": -3.431568145751953, |
|
"logits/rejected": -3.312471389770508, |
|
"logps/chosen": -261.0316162109375, |
|
"logps/rejected": -559.0184326171875, |
|
"loss": 0.057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4650034010410309, |
|
"rewards/margins": 19.260265350341797, |
|
"rewards/rejected": -18.795259475708008, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.969399319984888e-07, |
|
"logits/chosen": -3.423460006713867, |
|
"logits/rejected": -3.2820401191711426, |
|
"logps/chosen": -222.3740692138672, |
|
"logps/rejected": -875.9083862304688, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7400540709495544, |
|
"rewards/margins": 24.780017852783203, |
|
"rewards/rejected": -25.520071029663086, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.950510011333585e-07, |
|
"logits/chosen": -3.4326465129852295, |
|
"logits/rejected": -3.351288318634033, |
|
"logps/chosen": -222.06887817382812, |
|
"logps/rejected": -691.3297729492188, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06572739779949188, |
|
"rewards/margins": 22.379207611083984, |
|
"rewards/rejected": -22.313480377197266, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9316207026822813e-07, |
|
"logits/chosen": -3.4397430419921875, |
|
"logits/rejected": -3.294466733932495, |
|
"logps/chosen": -224.94287109375, |
|
"logps/rejected": -672.6677856445312, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.15702833235263824, |
|
"rewards/margins": 21.14691734313965, |
|
"rewards/rejected": -21.303943634033203, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9127313940309784e-07, |
|
"logits/chosen": -3.423677921295166, |
|
"logits/rejected": -3.4165546894073486, |
|
"logps/chosen": -191.03762817382812, |
|
"logps/rejected": -612.9573364257812, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.3937866985797882, |
|
"rewards/margins": 22.95120620727539, |
|
"rewards/rejected": -23.34499740600586, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -3.505523920059204, |
|
"eval_logits/rejected": -3.3688061237335205, |
|
"eval_logps/chosen": -259.40972900390625, |
|
"eval_logps/rejected": -670.28564453125, |
|
"eval_loss": 0.01642242632806301, |
|
"eval_rewards/accuracies": 0.9957912564277649, |
|
"eval_rewards/chosen": -0.36041346192359924, |
|
"eval_rewards/margins": 24.244855880737305, |
|
"eval_rewards/rejected": -24.605268478393555, |
|
"eval_runtime": 459.6598, |
|
"eval_samples_per_second": 20.667, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8938420853796754e-07, |
|
"logits/chosen": -3.4179089069366455, |
|
"logits/rejected": -3.3702964782714844, |
|
"logps/chosen": -280.6731262207031, |
|
"logps/rejected": -737.249267578125, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.025803815573453903, |
|
"rewards/margins": 28.388423919677734, |
|
"rewards/rejected": -28.414230346679688, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8749527767283715e-07, |
|
"logits/chosen": -3.369783878326416, |
|
"logits/rejected": -3.376282215118408, |
|
"logps/chosen": -268.02752685546875, |
|
"logps/rejected": -681.7142944335938, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.40402960777282715, |
|
"rewards/margins": 21.377483367919922, |
|
"rewards/rejected": -21.78151512145996, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8560634680770686e-07, |
|
"logits/chosen": -3.4302501678466797, |
|
"logits/rejected": -3.362534761428833, |
|
"logps/chosen": -268.420166015625, |
|
"logps/rejected": -660.0174560546875, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9907222986221313, |
|
"rewards/margins": 23.22337532043457, |
|
"rewards/rejected": -24.214096069335938, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8371741594257646e-07, |
|
"logits/chosen": -3.4184436798095703, |
|
"logits/rejected": -3.3809783458709717, |
|
"logps/chosen": -341.3996887207031, |
|
"logps/rejected": -584.3710327148438, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.279394268989563, |
|
"rewards/margins": 25.102916717529297, |
|
"rewards/rejected": -25.382308959960938, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8182848507744617e-07, |
|
"logits/chosen": -3.5210273265838623, |
|
"logits/rejected": -3.411952495574951, |
|
"logps/chosen": -221.7975311279297, |
|
"logps/rejected": -520.7232666015625, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.764832854270935, |
|
"rewards/margins": 22.8917236328125, |
|
"rewards/rejected": -24.65655517578125, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.799395542123158e-07, |
|
"logits/chosen": -3.3874526023864746, |
|
"logits/rejected": -3.3241126537323, |
|
"logps/chosen": -336.1128845214844, |
|
"logps/rejected": -680.0740356445312, |
|
"loss": 0.0197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9126793742179871, |
|
"rewards/margins": 21.444177627563477, |
|
"rewards/rejected": -22.35685920715332, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.780506233471855e-07, |
|
"logits/chosen": -3.438469409942627, |
|
"logits/rejected": -3.3389315605163574, |
|
"logps/chosen": -200.33642578125, |
|
"logps/rejected": -602.5469360351562, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.0372685194015503, |
|
"rewards/margins": 25.210084915161133, |
|
"rewards/rejected": -26.247350692749023, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7616169248205513e-07, |
|
"logits/chosen": -3.3702118396759033, |
|
"logits/rejected": -3.3400275707244873, |
|
"logps/chosen": -205.5651397705078, |
|
"logps/rejected": -772.1658935546875, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.8077577352523804, |
|
"rewards/margins": 37.4057731628418, |
|
"rewards/rejected": -39.213531494140625, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.742727616169248e-07, |
|
"logits/chosen": -3.3958728313446045, |
|
"logits/rejected": -3.326359510421753, |
|
"logps/chosen": -254.0430450439453, |
|
"logps/rejected": -525.82568359375, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1838462352752686, |
|
"rewards/margins": 29.607269287109375, |
|
"rewards/rejected": -30.791118621826172, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.723838307517945e-07, |
|
"logits/chosen": -3.3379979133605957, |
|
"logits/rejected": -3.2290759086608887, |
|
"logps/chosen": -330.87176513671875, |
|
"logps/rejected": -727.2958984375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1591452956199646, |
|
"rewards/margins": 32.132423400878906, |
|
"rewards/rejected": -32.29157257080078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -3.4454987049102783, |
|
"eval_logits/rejected": -3.298642158508301, |
|
"eval_logps/chosen": -264.3898010253906, |
|
"eval_logps/rejected": -762.0860595703125, |
|
"eval_loss": 0.011499395594000816, |
|
"eval_rewards/accuracies": 0.996632993221283, |
|
"eval_rewards/chosen": -0.8584219813346863, |
|
"eval_rewards/margins": 32.92688751220703, |
|
"eval_rewards/rejected": -33.785308837890625, |
|
"eval_runtime": 460.1896, |
|
"eval_samples_per_second": 20.644, |
|
"eval_steps_per_second": 0.645, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7049489988666416e-07, |
|
"logits/chosen": -3.3881945610046387, |
|
"logits/rejected": -3.26824951171875, |
|
"logps/chosen": -310.400634765625, |
|
"logps/rejected": -629.2418212890625, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2945066690444946, |
|
"rewards/margins": 28.674571990966797, |
|
"rewards/rejected": -29.96907615661621, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.686059690215338e-07, |
|
"logits/chosen": -3.300244092941284, |
|
"logits/rejected": -3.251279830932617, |
|
"logps/chosen": -273.0030212402344, |
|
"logps/rejected": -812.864990234375, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8532045483589172, |
|
"rewards/margins": 32.147369384765625, |
|
"rewards/rejected": -33.00057601928711, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6671703815640347e-07, |
|
"logits/chosen": -3.333925247192383, |
|
"logits/rejected": -3.2999801635742188, |
|
"logps/chosen": -247.5169219970703, |
|
"logps/rejected": -541.9185791015625, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.754341721534729, |
|
"rewards/margins": 25.133520126342773, |
|
"rewards/rejected": -25.887863159179688, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648281072912731e-07, |
|
"logits/chosen": -3.4177489280700684, |
|
"logits/rejected": -3.2177319526672363, |
|
"logps/chosen": -210.8187713623047, |
|
"logps/rejected": -762.6394653320312, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5407046675682068, |
|
"rewards/margins": 20.313051223754883, |
|
"rewards/rejected": -20.853755950927734, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.629391764261428e-07, |
|
"logits/chosen": -3.3744702339172363, |
|
"logits/rejected": -3.3114571571350098, |
|
"logps/chosen": -278.4391784667969, |
|
"logps/rejected": -541.5650634765625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8419182896614075, |
|
"rewards/margins": 19.7562313079834, |
|
"rewards/rejected": -20.598148345947266, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.610502455610125e-07, |
|
"logits/chosen": -3.3633506298065186, |
|
"logits/rejected": -3.3126094341278076, |
|
"logps/chosen": -352.4827575683594, |
|
"logps/rejected": -738.1781005859375, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.72525554895401, |
|
"rewards/margins": 22.449813842773438, |
|
"rewards/rejected": -23.175067901611328, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.591613146958821e-07, |
|
"logits/chosen": -3.4918789863586426, |
|
"logits/rejected": -3.401181697845459, |
|
"logps/chosen": -248.28060913085938, |
|
"logps/rejected": -687.9749145507812, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5774227380752563, |
|
"rewards/margins": 24.075176239013672, |
|
"rewards/rejected": -24.652597427368164, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.572723838307518e-07, |
|
"logits/chosen": -3.4203097820281982, |
|
"logits/rejected": -3.419062852859497, |
|
"logps/chosen": -262.29730224609375, |
|
"logps/rejected": -591.0722045898438, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19718003273010254, |
|
"rewards/margins": 21.856969833374023, |
|
"rewards/rejected": -22.05415153503418, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5538345296562145e-07, |
|
"logits/chosen": -3.4380905628204346, |
|
"logits/rejected": -3.3508377075195312, |
|
"logps/chosen": -261.0439147949219, |
|
"logps/rejected": -576.0928955078125, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15919718146324158, |
|
"rewards/margins": 24.959590911865234, |
|
"rewards/rejected": -25.11878776550293, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.534945221004911e-07, |
|
"logits/chosen": -3.292537212371826, |
|
"logits/rejected": -3.3550610542297363, |
|
"logps/chosen": -380.9278869628906, |
|
"logps/rejected": -537.5498657226562, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.219398856163025, |
|
"rewards/margins": 21.093488693237305, |
|
"rewards/rejected": -19.874088287353516, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_logits/chosen": -3.481621742248535, |
|
"eval_logits/rejected": -3.3371667861938477, |
|
"eval_logps/chosen": -259.466796875, |
|
"eval_logps/rejected": -648.6592407226562, |
|
"eval_loss": 0.05247886851429939, |
|
"eval_rewards/accuracies": 0.997474730014801, |
|
"eval_rewards/chosen": -0.3661208748817444, |
|
"eval_rewards/margins": 22.07651138305664, |
|
"eval_rewards/rejected": -22.442630767822266, |
|
"eval_runtime": 459.9365, |
|
"eval_samples_per_second": 20.655, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.516055912353608e-07, |
|
"logits/chosen": -3.3571033477783203, |
|
"logits/rejected": -3.2502875328063965, |
|
"logps/chosen": -277.83837890625, |
|
"logps/rejected": -724.0299072265625, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8198422193527222, |
|
"rewards/margins": 20.712133407592773, |
|
"rewards/rejected": -21.53197479248047, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.497166603702304e-07, |
|
"logits/chosen": -3.3982932567596436, |
|
"logits/rejected": -3.273448944091797, |
|
"logps/chosen": -262.4102478027344, |
|
"logps/rejected": -759.910888671875, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7558759450912476, |
|
"rewards/margins": 27.525121688842773, |
|
"rewards/rejected": -28.2810001373291, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4782772950510013e-07, |
|
"logits/chosen": -3.3453495502471924, |
|
"logits/rejected": -3.254922389984131, |
|
"logps/chosen": -278.51287841796875, |
|
"logps/rejected": -550.3945922851562, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.517066478729248, |
|
"rewards/margins": 19.643749237060547, |
|
"rewards/rejected": -20.160816192626953, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.459387986399698e-07, |
|
"logits/chosen": -3.198303699493408, |
|
"logits/rejected": -3.2498583793640137, |
|
"logps/chosen": -262.79925537109375, |
|
"logps/rejected": -609.2022705078125, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8170091509819031, |
|
"rewards/margins": 23.31692123413086, |
|
"rewards/rejected": -24.133930206298828, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4404986777483944e-07, |
|
"logits/chosen": -3.2812728881835938, |
|
"logits/rejected": -3.2436320781707764, |
|
"logps/chosen": -318.45599365234375, |
|
"logps/rejected": -511.8076171875, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28894877433776855, |
|
"rewards/margins": 20.95669937133789, |
|
"rewards/rejected": -20.667749404907227, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.421609369097091e-07, |
|
"logits/chosen": -3.3857626914978027, |
|
"logits/rejected": -3.2701289653778076, |
|
"logps/chosen": -277.44281005859375, |
|
"logps/rejected": -628.5038452148438, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5810535550117493, |
|
"rewards/margins": 24.878406524658203, |
|
"rewards/rejected": -25.459457397460938, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4027200604457875e-07, |
|
"logits/chosen": -3.3911805152893066, |
|
"logits/rejected": -3.285238742828369, |
|
"logps/chosen": -245.50717163085938, |
|
"logps/rejected": -631.26611328125, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.19214244186878204, |
|
"rewards/margins": 23.272733688354492, |
|
"rewards/rejected": -23.46487808227539, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.383830751794484e-07, |
|
"logits/chosen": -3.3616461753845215, |
|
"logits/rejected": -3.2842185497283936, |
|
"logps/chosen": -346.10980224609375, |
|
"logps/rejected": -848.2496948242188, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1335320770740509, |
|
"rewards/margins": 26.36749839782715, |
|
"rewards/rejected": -26.50103187561035, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.364941443143181e-07, |
|
"logits/chosen": -3.392210006713867, |
|
"logits/rejected": -3.366656541824341, |
|
"logps/chosen": -214.9364471435547, |
|
"logps/rejected": -542.2637939453125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.6650384664535522, |
|
"rewards/margins": 20.180553436279297, |
|
"rewards/rejected": -20.845592498779297, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3460521344918775e-07, |
|
"logits/chosen": -3.4405808448791504, |
|
"logits/rejected": -3.241579055786133, |
|
"logps/chosen": -228.2866973876953, |
|
"logps/rejected": -684.4705810546875, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8028446435928345, |
|
"rewards/margins": 30.044261932373047, |
|
"rewards/rejected": -30.847110748291016, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -3.4487576484680176, |
|
"eval_logits/rejected": -3.310159206390381, |
|
"eval_logps/chosen": -260.0583801269531, |
|
"eval_logps/rejected": -667.2958374023438, |
|
"eval_loss": 0.05137615278363228, |
|
"eval_rewards/accuracies": 0.9957912564277649, |
|
"eval_rewards/chosen": -0.42527905106544495, |
|
"eval_rewards/margins": 23.88100814819336, |
|
"eval_rewards/rejected": -24.30628776550293, |
|
"eval_runtime": 459.8654, |
|
"eval_samples_per_second": 20.658, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.327162825840574e-07, |
|
"logits/chosen": -3.330414295196533, |
|
"logits/rejected": -3.2635841369628906, |
|
"logps/chosen": -326.1940612792969, |
|
"logps/rejected": -795.8521728515625, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.16869966685771942, |
|
"rewards/margins": 20.82526206970215, |
|
"rewards/rejected": -20.993961334228516, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3082735171892708e-07, |
|
"logits/chosen": -3.359579086303711, |
|
"logits/rejected": -3.1651229858398438, |
|
"logps/chosen": -249.9299774169922, |
|
"logps/rejected": -1015.4552001953125, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.624853789806366, |
|
"rewards/margins": 25.351341247558594, |
|
"rewards/rejected": -25.9761962890625, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2893842085379674e-07, |
|
"logits/chosen": -3.4433200359344482, |
|
"logits/rejected": -3.279329776763916, |
|
"logps/chosen": -218.9633026123047, |
|
"logps/rejected": -560.0067138671875, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8118854761123657, |
|
"rewards/margins": 23.033185958862305, |
|
"rewards/rejected": -23.845069885253906, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2704948998866642e-07, |
|
"logits/chosen": -3.2414298057556152, |
|
"logits/rejected": -3.2678425312042236, |
|
"logps/chosen": -395.7589111328125, |
|
"logps/rejected": -570.04345703125, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.5239030122756958, |
|
"rewards/margins": 21.01217269897461, |
|
"rewards/rejected": -22.536075592041016, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2516055912353608e-07, |
|
"logits/chosen": -3.3212122917175293, |
|
"logits/rejected": -3.2659897804260254, |
|
"logps/chosen": -303.695556640625, |
|
"logps/rejected": -606.518310546875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16412094235420227, |
|
"rewards/margins": 30.60677146911621, |
|
"rewards/rejected": -30.442651748657227, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2327162825840573e-07, |
|
"logits/chosen": -3.2955994606018066, |
|
"logits/rejected": -3.330519199371338, |
|
"logps/chosen": -313.94586181640625, |
|
"logps/rejected": -576.6971435546875, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5809284448623657, |
|
"rewards/margins": 21.45393180847168, |
|
"rewards/rejected": -22.034860610961914, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.213826973932754e-07, |
|
"logits/chosen": -3.3576037883758545, |
|
"logits/rejected": -3.1798653602600098, |
|
"logps/chosen": -205.84744262695312, |
|
"logps/rejected": -822.9035034179688, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8852649927139282, |
|
"rewards/margins": 27.4652156829834, |
|
"rewards/rejected": -28.350482940673828, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1949376652814505e-07, |
|
"logits/chosen": -3.352653980255127, |
|
"logits/rejected": -3.3083279132843018, |
|
"logps/chosen": -281.2442932128906, |
|
"logps/rejected": -563.5089111328125, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.149827241897583, |
|
"rewards/margins": 20.38897132873535, |
|
"rewards/rejected": -21.53879737854004, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.176048356630147e-07, |
|
"logits/chosen": -3.310255765914917, |
|
"logits/rejected": -3.2874767780303955, |
|
"logps/chosen": -390.83807373046875, |
|
"logps/rejected": -587.5394897460938, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.459928035736084, |
|
"rewards/margins": 24.047969818115234, |
|
"rewards/rejected": -24.507898330688477, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.157159047978844e-07, |
|
"logits/chosen": -3.4679012298583984, |
|
"logits/rejected": -3.2024967670440674, |
|
"logps/chosen": -222.7393035888672, |
|
"logps/rejected": -737.6758422851562, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8875459432601929, |
|
"rewards/margins": 23.48456573486328, |
|
"rewards/rejected": -24.372106552124023, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -3.415928602218628, |
|
"eval_logits/rejected": -3.2893950939178467, |
|
"eval_logps/chosen": -259.3686828613281, |
|
"eval_logps/rejected": -665.7247314453125, |
|
"eval_loss": 0.03561777248978615, |
|
"eval_rewards/accuracies": 0.996632993221283, |
|
"eval_rewards/chosen": -0.3563132882118225, |
|
"eval_rewards/margins": 23.792869567871094, |
|
"eval_rewards/rejected": -24.149181365966797, |
|
"eval_runtime": 460.2637, |
|
"eval_samples_per_second": 20.64, |
|
"eval_steps_per_second": 0.645, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1382697393275407e-07, |
|
"logits/chosen": -3.35868501663208, |
|
"logits/rejected": -3.294783353805542, |
|
"logps/chosen": -320.07965087890625, |
|
"logps/rejected": -738.1607666015625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3287244737148285, |
|
"rewards/margins": 26.954553604125977, |
|
"rewards/rejected": -27.283273696899414, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1193804306762372e-07, |
|
"logits/chosen": -3.297001600265503, |
|
"logits/rejected": -3.2148499488830566, |
|
"logps/chosen": -269.9095153808594, |
|
"logps/rejected": -863.6824340820312, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7257425785064697, |
|
"rewards/margins": 20.9593448638916, |
|
"rewards/rejected": -21.685089111328125, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1004911220249338e-07, |
|
"logits/chosen": -3.3534445762634277, |
|
"logits/rejected": -3.3217594623565674, |
|
"logps/chosen": -259.9757080078125, |
|
"logps/rejected": -613.6614990234375, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3644919991493225, |
|
"rewards/margins": 22.092815399169922, |
|
"rewards/rejected": -21.728322982788086, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0816018133736303e-07, |
|
"logits/chosen": -3.335651397705078, |
|
"logits/rejected": -3.2650818824768066, |
|
"logps/chosen": -335.0670166015625, |
|
"logps/rejected": -782.08349609375, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0012598276371136308, |
|
"rewards/margins": 27.04427719116211, |
|
"rewards/rejected": -27.043018341064453, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0627125047223271e-07, |
|
"logits/chosen": -3.357959032058716, |
|
"logits/rejected": -3.278252363204956, |
|
"logps/chosen": -266.217529296875, |
|
"logps/rejected": -641.0094604492188, |
|
"loss": 0.0517, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38472968339920044, |
|
"rewards/margins": 23.43660545349121, |
|
"rewards/rejected": -23.821334838867188, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0438231960710237e-07, |
|
"logits/chosen": -3.4566192626953125, |
|
"logits/rejected": -3.297550678253174, |
|
"logps/chosen": -265.8533630371094, |
|
"logps/rejected": -815.4766845703125, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3703159093856812, |
|
"rewards/margins": 29.9644832611084, |
|
"rewards/rejected": -31.334796905517578, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0249338874197203e-07, |
|
"logits/chosen": -3.4326648712158203, |
|
"logits/rejected": -3.4103198051452637, |
|
"logps/chosen": -268.6964416503906, |
|
"logps/rejected": -670.5267333984375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2914245128631592, |
|
"rewards/margins": 30.470592498779297, |
|
"rewards/rejected": -31.762014389038086, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0060445787684168e-07, |
|
"logits/chosen": -3.5192794799804688, |
|
"logits/rejected": -3.332420825958252, |
|
"logps/chosen": -209.93576049804688, |
|
"logps/rejected": -745.6237182617188, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.117185354232788, |
|
"rewards/margins": 33.509422302246094, |
|
"rewards/rejected": -34.62660598754883, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9871552701171136e-07, |
|
"logits/chosen": -3.4282066822052, |
|
"logits/rejected": -3.368283748626709, |
|
"logps/chosen": -285.8004150390625, |
|
"logps/rejected": -652.3516235351562, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.227819561958313, |
|
"rewards/margins": 26.842388153076172, |
|
"rewards/rejected": -27.070209503173828, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9682659614658105e-07, |
|
"logits/chosen": -3.4334263801574707, |
|
"logits/rejected": -3.336756467819214, |
|
"logps/chosen": -335.62725830078125, |
|
"logps/rejected": -702.3507080078125, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38077887892723083, |
|
"rewards/margins": 29.044042587280273, |
|
"rewards/rejected": -29.424823760986328, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -3.534825086593628, |
|
"eval_logits/rejected": -3.3839564323425293, |
|
"eval_logps/chosen": -265.37176513671875, |
|
"eval_logps/rejected": -763.1902465820312, |
|
"eval_loss": 0.03813723102211952, |
|
"eval_rewards/accuracies": 0.9957912564277649, |
|
"eval_rewards/chosen": -0.9566193222999573, |
|
"eval_rewards/margins": 32.939109802246094, |
|
"eval_rewards/rejected": -33.895729064941406, |
|
"eval_runtime": 460.0454, |
|
"eval_samples_per_second": 20.65, |
|
"eval_steps_per_second": 0.646, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.949376652814507e-07, |
|
"logits/chosen": -3.396048069000244, |
|
"logits/rejected": -3.275059461593628, |
|
"logps/chosen": -252.13223266601562, |
|
"logps/rejected": -747.9521484375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7216171026229858, |
|
"rewards/margins": 32.15826416015625, |
|
"rewards/rejected": -32.87987518310547, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9304873441632036e-07, |
|
"logits/chosen": -3.5610382556915283, |
|
"logits/rejected": -3.372722625732422, |
|
"logps/chosen": -199.77662658691406, |
|
"logps/rejected": -584.327392578125, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1961396932601929, |
|
"rewards/margins": 29.65005874633789, |
|
"rewards/rejected": -30.8461971282959, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9115980355119001e-07, |
|
"logits/chosen": -3.369832992553711, |
|
"logits/rejected": -3.314237117767334, |
|
"logps/chosen": -215.8191680908203, |
|
"logps/rejected": -815.2225952148438, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3732140064239502, |
|
"rewards/margins": 27.058151245117188, |
|
"rewards/rejected": -27.431365966796875, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8927087268605967e-07, |
|
"logits/chosen": -3.4612975120544434, |
|
"logits/rejected": -3.288508176803589, |
|
"logps/chosen": -218.56201171875, |
|
"logps/rejected": -906.2979736328125, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.040290117263794, |
|
"rewards/margins": 31.08365249633789, |
|
"rewards/rejected": -32.12394714355469, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8738194182092935e-07, |
|
"logits/chosen": -3.348356246948242, |
|
"logits/rejected": -3.2337405681610107, |
|
"logps/chosen": -391.52935791015625, |
|
"logps/rejected": -651.825927734375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7969802021980286, |
|
"rewards/margins": 28.728500366210938, |
|
"rewards/rejected": -27.9315185546875, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.85493010955799e-07, |
|
"logits/chosen": -3.409937620162964, |
|
"logits/rejected": -3.3914718627929688, |
|
"logps/chosen": -265.65386962890625, |
|
"logps/rejected": -596.263916015625, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3935322165489197, |
|
"rewards/margins": 24.100194931030273, |
|
"rewards/rejected": -24.49372673034668, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8360408009066866e-07, |
|
"logits/chosen": -3.3308022022247314, |
|
"logits/rejected": -3.2177085876464844, |
|
"logps/chosen": -282.54046630859375, |
|
"logps/rejected": -823.97412109375, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41234469413757324, |
|
"rewards/margins": 22.745586395263672, |
|
"rewards/rejected": -23.157930374145508, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8171514922553835e-07, |
|
"logits/chosen": -3.446866512298584, |
|
"logits/rejected": -3.372515916824341, |
|
"logps/chosen": -276.74468994140625, |
|
"logps/rejected": -599.4273071289062, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35287076234817505, |
|
"rewards/margins": 21.927663803100586, |
|
"rewards/rejected": -22.280534744262695, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.79826218360408e-07, |
|
"logits/chosen": -3.317155361175537, |
|
"logits/rejected": -3.3876793384552, |
|
"logps/chosen": -318.00860595703125, |
|
"logps/rejected": -510.11151123046875, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5086533427238464, |
|
"rewards/margins": 24.184043884277344, |
|
"rewards/rejected": -24.692697525024414, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7793728749527768e-07, |
|
"logits/chosen": -3.3701987266540527, |
|
"logits/rejected": -3.286350727081299, |
|
"logps/chosen": -267.9636535644531, |
|
"logps/rejected": -580.6959838867188, |
|
"loss": 0.006, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3567093014717102, |
|
"rewards/margins": 24.048980712890625, |
|
"rewards/rejected": -24.40568733215332, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -3.4805188179016113, |
|
"eval_logits/rejected": -3.337117910385132, |
|
"eval_logps/chosen": -257.2082824707031, |
|
"eval_logps/rejected": -686.7160034179688, |
|
"eval_loss": 0.0072143604047596455, |
|
"eval_rewards/accuracies": 0.997474730014801, |
|
"eval_rewards/chosen": -0.14026859402656555, |
|
"eval_rewards/margins": 26.10803985595703, |
|
"eval_rewards/rejected": -26.248306274414062, |
|
"eval_runtime": 460.6018, |
|
"eval_samples_per_second": 20.625, |
|
"eval_steps_per_second": 0.645, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7604835663014734e-07, |
|
"logits/chosen": -3.408255100250244, |
|
"logits/rejected": -3.403702974319458, |
|
"logps/chosen": -203.19618225097656, |
|
"logps/rejected": -665.8862915039062, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6091235280036926, |
|
"rewards/margins": 24.911001205444336, |
|
"rewards/rejected": -25.520126342773438, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.74159425765017e-07, |
|
"logits/chosen": -3.443638324737549, |
|
"logits/rejected": -3.3524250984191895, |
|
"logps/chosen": -209.58401489257812, |
|
"logps/rejected": -749.86181640625, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.21705660223960876, |
|
"rewards/margins": 27.513622283935547, |
|
"rewards/rejected": -27.730676651000977, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7227049489988665e-07, |
|
"logits/chosen": -3.3942184448242188, |
|
"logits/rejected": -3.311061143875122, |
|
"logps/chosen": -317.77410888671875, |
|
"logps/rejected": -552.6033325195312, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5486845970153809, |
|
"rewards/margins": 22.28003692626953, |
|
"rewards/rejected": -22.82872200012207, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.703815640347563e-07, |
|
"logits/chosen": -3.5390067100524902, |
|
"logits/rejected": -3.3566482067108154, |
|
"logps/chosen": -201.36410522460938, |
|
"logps/rejected": -666.7164306640625, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4069572389125824, |
|
"rewards/margins": 28.751266479492188, |
|
"rewards/rejected": -29.158227920532227, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6849263316962596e-07, |
|
"logits/chosen": -3.3728034496307373, |
|
"logits/rejected": -3.3503639698028564, |
|
"logps/chosen": -267.21881103515625, |
|
"logps/rejected": -626.498046875, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2131740152835846, |
|
"rewards/margins": 27.280838012695312, |
|
"rewards/rejected": -27.067663192749023, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6660370230449564e-07, |
|
"logits/chosen": -3.4185855388641357, |
|
"logits/rejected": -3.3411049842834473, |
|
"logps/chosen": -272.4405212402344, |
|
"logps/rejected": -621.2442626953125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10709935426712036, |
|
"rewards/margins": 26.78093910217285, |
|
"rewards/rejected": -26.673839569091797, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6471477143936533e-07, |
|
"logits/chosen": -3.4380111694335938, |
|
"logits/rejected": -3.3767757415771484, |
|
"logps/chosen": -248.4059295654297, |
|
"logps/rejected": -633.5654296875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7223705649375916, |
|
"rewards/margins": 27.201065063476562, |
|
"rewards/rejected": -26.478694915771484, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6282584057423498e-07, |
|
"logits/chosen": -3.524405002593994, |
|
"logits/rejected": -3.448660373687744, |
|
"logps/chosen": -197.21920776367188, |
|
"logps/rejected": -635.7800903320312, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5619519948959351, |
|
"rewards/margins": 32.2112922668457, |
|
"rewards/rejected": -32.77324676513672, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6093690970910464e-07, |
|
"logits/chosen": -3.3876919746398926, |
|
"logits/rejected": -3.3305823802948, |
|
"logps/chosen": -287.99945068359375, |
|
"logps/rejected": -709.9437255859375, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21997538208961487, |
|
"rewards/margins": 31.6516170501709, |
|
"rewards/rejected": -31.431640625, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.590479788439743e-07, |
|
"logits/chosen": -3.3942599296569824, |
|
"logits/rejected": -3.2963924407958984, |
|
"logps/chosen": -314.06060791015625, |
|
"logps/rejected": -908.1524658203125, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1970634162425995, |
|
"rewards/margins": 28.25901222229004, |
|
"rewards/rejected": -28.456073760986328, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -3.497438669204712, |
|
"eval_logits/rejected": -3.355656623840332, |
|
"eval_logps/chosen": -257.67596435546875, |
|
"eval_logps/rejected": -714.7033081054688, |
|
"eval_loss": 0.010176397860050201, |
|
"eval_rewards/accuracies": 0.996632993221283, |
|
"eval_rewards/chosen": -0.18703816831111908, |
|
"eval_rewards/margins": 28.85999298095703, |
|
"eval_rewards/rejected": -29.047029495239258, |
|
"eval_runtime": 460.179, |
|
"eval_samples_per_second": 20.644, |
|
"eval_steps_per_second": 0.645, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5715904797884398e-07, |
|
"logits/chosen": -3.422182559967041, |
|
"logits/rejected": -3.3635239601135254, |
|
"logps/chosen": -237.2140350341797, |
|
"logps/rejected": -596.4075317382812, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42542657256126404, |
|
"rewards/margins": 25.331525802612305, |
|
"rewards/rejected": -25.7569522857666, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5527011711371363e-07, |
|
"logits/chosen": -3.46155047416687, |
|
"logits/rejected": -3.287555694580078, |
|
"logps/chosen": -221.025634765625, |
|
"logps/rejected": -760.5899047851562, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.587006688117981, |
|
"rewards/margins": 23.792795181274414, |
|
"rewards/rejected": -24.379802703857422, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.533811862485833e-07, |
|
"logits/chosen": -3.4075939655303955, |
|
"logits/rejected": -3.2999377250671387, |
|
"logps/chosen": -274.0217590332031, |
|
"logps/rejected": -778.3746337890625, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.0100147724151611, |
|
"rewards/margins": 29.110565185546875, |
|
"rewards/rejected": -30.120580673217773, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5149225538345294e-07, |
|
"logits/chosen": -3.4074718952178955, |
|
"logits/rejected": -3.328861951828003, |
|
"logps/chosen": -234.0587921142578, |
|
"logps/rejected": -573.584228515625, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8820997476577759, |
|
"rewards/margins": 25.29749870300293, |
|
"rewards/rejected": -26.17959976196289, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.496033245183226e-07, |
|
"logits/chosen": -3.433687925338745, |
|
"logits/rejected": -3.2161545753479004, |
|
"logps/chosen": -210.7549285888672, |
|
"logps/rejected": -769.3878173828125, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.381535142660141, |
|
"rewards/margins": 30.15536117553711, |
|
"rewards/rejected": -30.536895751953125, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.477143936531923e-07, |
|
"logits/chosen": -3.316527843475342, |
|
"logits/rejected": -3.3601183891296387, |
|
"logps/chosen": -334.7134094238281, |
|
"logps/rejected": -479.4236755371094, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.2200862169265747, |
|
"rewards/margins": 26.942337036132812, |
|
"rewards/rejected": -27.162424087524414, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4582546278806196e-07, |
|
"logits/chosen": -3.372246503829956, |
|
"logits/rejected": -3.286343812942505, |
|
"logps/chosen": -269.2080993652344, |
|
"logps/rejected": -628.6188354492188, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.732342541217804, |
|
"rewards/margins": 29.969751358032227, |
|
"rewards/rejected": -30.702091217041016, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4393653192293162e-07, |
|
"logits/chosen": -3.4120821952819824, |
|
"logits/rejected": -3.2668113708496094, |
|
"logps/chosen": -272.43658447265625, |
|
"logps/rejected": -670.0343627929688, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10129819065332413, |
|
"rewards/margins": 27.09328842163086, |
|
"rewards/rejected": -27.194589614868164, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4204760105780127e-07, |
|
"logits/chosen": -3.430737257003784, |
|
"logits/rejected": -3.3516769409179688, |
|
"logps/chosen": -270.27667236328125, |
|
"logps/rejected": -783.06884765625, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26623135805130005, |
|
"rewards/margins": 37.538352966308594, |
|
"rewards/rejected": -37.80458450317383, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4015867019267093e-07, |
|
"logits/chosen": -3.218029499053955, |
|
"logits/rejected": -3.3251090049743652, |
|
"logps/chosen": -504.22454833984375, |
|
"logps/rejected": -690.3221435546875, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.57887864112854, |
|
"rewards/margins": 31.29214096069336, |
|
"rewards/rejected": -31.871023178100586, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -3.5046215057373047, |
|
"eval_logits/rejected": -3.3551418781280518, |
|
"eval_logps/chosen": -260.6087341308594, |
|
"eval_logps/rejected": -723.0064086914062, |
|
"eval_loss": 0.00784117542207241, |
|
"eval_rewards/accuracies": 0.996632993221283, |
|
"eval_rewards/chosen": -0.48031851649284363, |
|
"eval_rewards/margins": 29.39702606201172, |
|
"eval_rewards/rejected": -29.877344131469727, |
|
"eval_runtime": 460.3396, |
|
"eval_samples_per_second": 20.637, |
|
"eval_steps_per_second": 0.645, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.382697393275406e-07, |
|
"logits/chosen": -3.2150256633758545, |
|
"logits/rejected": -3.264436721801758, |
|
"logps/chosen": -429.79412841796875, |
|
"logps/rejected": -691.61767578125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6222277283668518, |
|
"rewards/margins": 24.18454360961914, |
|
"rewards/rejected": -24.806772232055664, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3638080846241027e-07, |
|
"logits/chosen": -3.491255283355713, |
|
"logits/rejected": -3.2765109539031982, |
|
"logps/chosen": -193.16854858398438, |
|
"logps/rejected": -634.7061767578125, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7472248673439026, |
|
"rewards/margins": 27.92132568359375, |
|
"rewards/rejected": -28.66855239868164, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3449187759727992e-07, |
|
"logits/chosen": -3.4371280670166016, |
|
"logits/rejected": -3.394758701324463, |
|
"logps/chosen": -200.95811462402344, |
|
"logps/rejected": -568.9024047851562, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9408467411994934, |
|
"rewards/margins": 25.23788833618164, |
|
"rewards/rejected": -26.178735733032227, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3260294673214958e-07, |
|
"logits/chosen": -3.448991060256958, |
|
"logits/rejected": -3.209871292114258, |
|
"logps/chosen": -215.184326171875, |
|
"logps/rejected": -723.2108154296875, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.9981611371040344, |
|
"rewards/margins": 30.49850845336914, |
|
"rewards/rejected": -31.496667861938477, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3071401586701926e-07, |
|
"logits/chosen": -3.442951202392578, |
|
"logits/rejected": -3.4533190727233887, |
|
"logps/chosen": -226.30252075195312, |
|
"logps/rejected": -531.5767822265625, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7389757633209229, |
|
"rewards/margins": 25.352436065673828, |
|
"rewards/rejected": -26.091405868530273, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2882508500188894e-07, |
|
"logits/chosen": -3.467149257659912, |
|
"logits/rejected": -3.3945045471191406, |
|
"logps/chosen": -211.5034637451172, |
|
"logps/rejected": -699.1697387695312, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5048588514328003, |
|
"rewards/margins": 28.00215721130371, |
|
"rewards/rejected": -28.50701904296875, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.269361541367586e-07, |
|
"logits/chosen": -3.3899827003479004, |
|
"logits/rejected": -3.4136524200439453, |
|
"logps/chosen": -312.1116943359375, |
|
"logps/rejected": -670.1107177734375, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6437836289405823, |
|
"rewards/margins": 30.72330665588379, |
|
"rewards/rejected": -31.367090225219727, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2504722327162826e-07, |
|
"logits/chosen": -3.4552528858184814, |
|
"logits/rejected": -3.3502261638641357, |
|
"logps/chosen": -222.96029663085938, |
|
"logps/rejected": -634.1143798828125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35589832067489624, |
|
"rewards/margins": 26.0993595123291, |
|
"rewards/rejected": -26.455257415771484, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.231582924064979e-07, |
|
"logits/chosen": -3.4500694274902344, |
|
"logits/rejected": -3.356391191482544, |
|
"logps/chosen": -226.2766876220703, |
|
"logps/rejected": -721.9498901367188, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8104637265205383, |
|
"rewards/margins": 28.76261329650879, |
|
"rewards/rejected": -29.573078155517578, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2126936154136757e-07, |
|
"logits/chosen": -3.4218978881835938, |
|
"logits/rejected": -3.3485636711120605, |
|
"logps/chosen": -249.322265625, |
|
"logps/rejected": -696.6746826171875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3162160813808441, |
|
"rewards/margins": 24.947269439697266, |
|
"rewards/rejected": -25.263486862182617, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -3.4948203563690186, |
|
"eval_logits/rejected": -3.345940589904785, |
|
"eval_logps/chosen": -260.5770263671875, |
|
"eval_logps/rejected": -708.5813598632812, |
|
"eval_loss": 0.007498822640627623, |
|
"eval_rewards/accuracies": 0.996632993221283, |
|
"eval_rewards/chosen": -0.4771437644958496, |
|
"eval_rewards/margins": 27.957693099975586, |
|
"eval_rewards/rejected": -28.434837341308594, |
|
"eval_runtime": 461.2562, |
|
"eval_samples_per_second": 20.596, |
|
"eval_steps_per_second": 0.644, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1938043067623725e-07, |
|
"logits/chosen": -3.370896816253662, |
|
"logits/rejected": -3.3326168060302734, |
|
"logps/chosen": -347.14337158203125, |
|
"logps/rejected": -689.7429809570312, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18211320042610168, |
|
"rewards/margins": 27.03213119506836, |
|
"rewards/rejected": -26.850021362304688, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1749149981110692e-07, |
|
"logits/chosen": -3.3958702087402344, |
|
"logits/rejected": -3.369236707687378, |
|
"logps/chosen": -246.0237579345703, |
|
"logps/rejected": -561.996826171875, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.559197723865509, |
|
"rewards/margins": 20.26263427734375, |
|
"rewards/rejected": -20.82183074951172, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1560256894597657e-07, |
|
"logits/chosen": -3.3493895530700684, |
|
"logits/rejected": -3.370333433151245, |
|
"logps/chosen": -424.63336181640625, |
|
"logps/rejected": -620.3344116210938, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.06108301132917404, |
|
"rewards/margins": 25.891162872314453, |
|
"rewards/rejected": -25.952245712280273, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1371363808084623e-07, |
|
"logits/chosen": -3.4243323802948, |
|
"logits/rejected": -3.247948169708252, |
|
"logps/chosen": -254.7300262451172, |
|
"logps/rejected": -1042.1585693359375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3867115378379822, |
|
"rewards/margins": 35.40499496459961, |
|
"rewards/rejected": -35.01828384399414, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.118247072157159e-07, |
|
"logits/chosen": -3.4594218730926514, |
|
"logits/rejected": -3.3651282787323, |
|
"logps/chosen": -242.59677124023438, |
|
"logps/rejected": -538.2279663085938, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.49601420760154724, |
|
"rewards/margins": 23.69955062866211, |
|
"rewards/rejected": -24.19556427001953, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0993577635058557e-07, |
|
"logits/chosen": -3.511322498321533, |
|
"logits/rejected": -3.356180191040039, |
|
"logps/chosen": -196.69949340820312, |
|
"logps/rejected": -571.466796875, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02175600454211235, |
|
"rewards/margins": 25.043487548828125, |
|
"rewards/rejected": -25.021728515625, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0804684548545522e-07, |
|
"logits/chosen": -3.4236786365509033, |
|
"logits/rejected": -3.338402509689331, |
|
"logps/chosen": -247.4124755859375, |
|
"logps/rejected": -755.9312133789062, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05236620828509331, |
|
"rewards/margins": 24.615774154663086, |
|
"rewards/rejected": -24.668140411376953, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0615791462032489e-07, |
|
"logits/chosen": -3.413336992263794, |
|
"logits/rejected": -3.3395888805389404, |
|
"logps/chosen": -274.93182373046875, |
|
"logps/rejected": -602.7622680664062, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4528873562812805, |
|
"rewards/margins": 26.461483001708984, |
|
"rewards/rejected": -26.91436767578125, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0426898375519455e-07, |
|
"logits/chosen": -3.4481735229492188, |
|
"logits/rejected": -3.3065543174743652, |
|
"logps/chosen": -273.5448913574219, |
|
"logps/rejected": -812.5321044921875, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46621403098106384, |
|
"rewards/margins": 29.433242797851562, |
|
"rewards/rejected": -29.899456024169922, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0238005289006423e-07, |
|
"logits/chosen": -3.498518466949463, |
|
"logits/rejected": -3.352489948272705, |
|
"logps/chosen": -229.6658172607422, |
|
"logps/rejected": -674.809814453125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.506945788860321, |
|
"rewards/margins": 24.02678680419922, |
|
"rewards/rejected": -24.533733367919922, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/chosen": -3.4949593544006348, |
|
"eval_logits/rejected": -3.3488945960998535, |
|
"eval_logps/chosen": -257.8039245605469, |
|
"eval_logps/rejected": -704.2630615234375, |
|
"eval_loss": 0.00467069773003459, |
|
"eval_rewards/accuracies": 0.9983165264129639, |
|
"eval_rewards/chosen": -0.19983212649822235, |
|
"eval_rewards/margins": 27.803180694580078, |
|
"eval_rewards/rejected": -28.00301170349121, |
|
"eval_runtime": 460.1772, |
|
"eval_samples_per_second": 20.644, |
|
"eval_steps_per_second": 0.645, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0049112202493389e-07, |
|
"logits/chosen": -3.458670139312744, |
|
"logits/rejected": -3.34519624710083, |
|
"logps/chosen": -229.53012084960938, |
|
"logps/rejected": -653.47021484375, |
|
"loss": 0.9205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46636518836021423, |
|
"rewards/margins": 31.985393524169922, |
|
"rewards/rejected": -32.45175552368164, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.860219115980354e-08, |
|
"logits/chosen": -3.354882001876831, |
|
"logits/rejected": -3.3362224102020264, |
|
"logps/chosen": -298.93890380859375, |
|
"logps/rejected": -575.4816284179688, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2504803538322449, |
|
"rewards/margins": 25.459463119506836, |
|
"rewards/rejected": -25.208984375, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.671326029467321e-08, |
|
"logits/chosen": -3.409818649291992, |
|
"logits/rejected": -3.294722080230713, |
|
"logps/chosen": -264.5761413574219, |
|
"logps/rejected": -753.7411499023438, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6481723189353943, |
|
"rewards/margins": 29.749752044677734, |
|
"rewards/rejected": -30.39792251586914, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.482432942954287e-08, |
|
"logits/chosen": -3.398315906524658, |
|
"logits/rejected": -3.2785720825195312, |
|
"logps/chosen": -275.75506591796875, |
|
"logps/rejected": -881.2058715820312, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4502645432949066, |
|
"rewards/margins": 33.76361846923828, |
|
"rewards/rejected": -34.21388626098633, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.293539856441255e-08, |
|
"logits/chosen": -3.4217476844787598, |
|
"logits/rejected": -3.160505533218384, |
|
"logps/chosen": -211.6370849609375, |
|
"logps/rejected": -842.2235107421875, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4303048253059387, |
|
"rewards/margins": 34.46601486206055, |
|
"rewards/rejected": -34.89632034301758, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.10464676992822e-08, |
|
"logits/chosen": -3.360055446624756, |
|
"logits/rejected": -3.2219321727752686, |
|
"logps/chosen": -197.63504028320312, |
|
"logps/rejected": -700.8560791015625, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5080915689468384, |
|
"rewards/margins": 28.26936912536621, |
|
"rewards/rejected": -28.7774600982666, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.915753683415186e-08, |
|
"logits/chosen": -3.324397325515747, |
|
"logits/rejected": -3.2371230125427246, |
|
"logps/chosen": -265.84197998046875, |
|
"logps/rejected": -706.1388549804688, |
|
"loss": 0.0212, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3217003345489502, |
|
"rewards/margins": 31.357486724853516, |
|
"rewards/rejected": -31.035785675048828, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.726860596902153e-08, |
|
"logits/chosen": -3.324913501739502, |
|
"logits/rejected": -3.31086802482605, |
|
"logps/chosen": -289.2586364746094, |
|
"logps/rejected": -618.4796142578125, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31292805075645447, |
|
"rewards/margins": 25.791996002197266, |
|
"rewards/rejected": -25.47906494140625, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.53796751038912e-08, |
|
"logits/chosen": -3.358564853668213, |
|
"logits/rejected": -3.3472542762756348, |
|
"logps/chosen": -252.2576141357422, |
|
"logps/rejected": -531.1395263671875, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5012627840042114, |
|
"rewards/margins": 24.36186408996582, |
|
"rewards/rejected": -24.86312484741211, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.349074423876085e-08, |
|
"logits/chosen": -3.2991740703582764, |
|
"logits/rejected": -3.3889808654785156, |
|
"logps/chosen": -312.4256591796875, |
|
"logps/rejected": -605.0225830078125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34426942467689514, |
|
"rewards/margins": 31.89084243774414, |
|
"rewards/rejected": -31.546573638916016, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -3.4427988529205322, |
|
"eval_logits/rejected": -3.3025379180908203, |
|
"eval_logps/chosen": -258.5765075683594, |
|
"eval_logps/rejected": -716.5906372070312, |
|
"eval_loss": 0.004813064821064472, |
|
"eval_rewards/accuracies": 0.9991582632064819, |
|
"eval_rewards/chosen": -0.2770873010158539, |
|
"eval_rewards/margins": 28.958681106567383, |
|
"eval_rewards/rejected": -29.23576545715332, |
|
"eval_runtime": 460.0317, |
|
"eval_samples_per_second": 20.651, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.160181337363052e-08, |
|
"logits/chosen": -3.3120341300964355, |
|
"logits/rejected": -3.2296226024627686, |
|
"logps/chosen": -280.31353759765625, |
|
"logps/rejected": -694.3499755859375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37215790152549744, |
|
"rewards/margins": 28.98830795288086, |
|
"rewards/rejected": -29.3604679107666, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.971288250850018e-08, |
|
"logits/chosen": -3.4131808280944824, |
|
"logits/rejected": -3.1928412914276123, |
|
"logps/chosen": -205.73983764648438, |
|
"logps/rejected": -883.18017578125, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5982518196105957, |
|
"rewards/margins": 27.798303604125977, |
|
"rewards/rejected": -28.39655113220215, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.782395164336985e-08, |
|
"logits/chosen": -3.383462429046631, |
|
"logits/rejected": -3.292088747024536, |
|
"logps/chosen": -337.71136474609375, |
|
"logps/rejected": -649.876220703125, |
|
"loss": 0.3124, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.490572690963745, |
|
"rewards/margins": 20.789615631103516, |
|
"rewards/rejected": -23.280189514160156, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.593502077823952e-08, |
|
"logits/chosen": -3.3705170154571533, |
|
"logits/rejected": -3.262524127960205, |
|
"logps/chosen": -210.7335205078125, |
|
"logps/rejected": -573.064453125, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.23895084857940674, |
|
"rewards/margins": 23.423107147216797, |
|
"rewards/rejected": -23.662057876586914, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.404608991310917e-08, |
|
"logits/chosen": -3.3921730518341064, |
|
"logits/rejected": -3.3298392295837402, |
|
"logps/chosen": -256.29339599609375, |
|
"logps/rejected": -685.7628784179688, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020406579598784447, |
|
"rewards/margins": 28.857458114624023, |
|
"rewards/rejected": -28.837055206298828, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.215715904797884e-08, |
|
"logits/chosen": -3.4744045734405518, |
|
"logits/rejected": -3.286008834838867, |
|
"logps/chosen": -214.1748046875, |
|
"logps/rejected": -831.2682495117188, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8809574842453003, |
|
"rewards/margins": 33.92351150512695, |
|
"rewards/rejected": -34.804466247558594, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.02682281828485e-08, |
|
"logits/chosen": -3.3835952281951904, |
|
"logits/rejected": -3.3155086040496826, |
|
"logps/chosen": -275.273193359375, |
|
"logps/rejected": -767.3925170898438, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5842048525810242, |
|
"rewards/margins": 30.7226505279541, |
|
"rewards/rejected": -31.30685806274414, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.837929731771818e-08, |
|
"logits/chosen": -3.317415952682495, |
|
"logits/rejected": -3.3041164875030518, |
|
"logps/chosen": -258.95086669921875, |
|
"logps/rejected": -488.6026916503906, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17078134417533875, |
|
"rewards/margins": 23.854570388793945, |
|
"rewards/rejected": -24.025352478027344, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.649036645258783e-08, |
|
"logits/chosen": -3.344968795776367, |
|
"logits/rejected": -3.2897567749023438, |
|
"logps/chosen": -314.9465026855469, |
|
"logps/rejected": -535.2317504882812, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07202013581991196, |
|
"rewards/margins": 28.91226577758789, |
|
"rewards/rejected": -28.984283447265625, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.460143558745749e-08, |
|
"logits/chosen": -3.419276475906372, |
|
"logits/rejected": -3.2689127922058105, |
|
"logps/chosen": -205.4881134033203, |
|
"logps/rejected": -798.7406005859375, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08600559085607529, |
|
"rewards/margins": 38.14585494995117, |
|
"rewards/rejected": -38.059852600097656, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -3.4804773330688477, |
|
"eval_logits/rejected": -3.332041025161743, |
|
"eval_logps/chosen": -257.89501953125, |
|
"eval_logps/rejected": -720.7188720703125, |
|
"eval_loss": 0.004375319927930832, |
|
"eval_rewards/accuracies": 0.997474730014801, |
|
"eval_rewards/chosen": -0.20894265174865723, |
|
"eval_rewards/margins": 29.43964385986328, |
|
"eval_rewards/rejected": -29.64858627319336, |
|
"eval_runtime": 460.0382, |
|
"eval_samples_per_second": 20.65, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.271250472232716e-08, |
|
"logits/chosen": -3.394615888595581, |
|
"logits/rejected": -3.2852377891540527, |
|
"logps/chosen": -266.36767578125, |
|
"logps/rejected": -717.4393310546875, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.6554498672485352, |
|
"rewards/margins": 26.674755096435547, |
|
"rewards/rejected": -26.019306182861328, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.082357385719683e-08, |
|
"logits/chosen": -3.381989002227783, |
|
"logits/rejected": -3.3894596099853516, |
|
"logps/chosen": -342.04913330078125, |
|
"logps/rejected": -546.2716674804688, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6526740193367004, |
|
"rewards/margins": 21.150623321533203, |
|
"rewards/rejected": -21.803295135498047, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.893464299206649e-08, |
|
"logits/chosen": -3.4155497550964355, |
|
"logits/rejected": -3.2923424243927, |
|
"logps/chosen": -223.69140625, |
|
"logps/rejected": -731.6795043945312, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3694123923778534, |
|
"rewards/margins": 28.902706146240234, |
|
"rewards/rejected": -29.27211570739746, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.704571212693615e-08, |
|
"logits/chosen": -3.330949306488037, |
|
"logits/rejected": -3.3261523246765137, |
|
"logps/chosen": -248.51528930664062, |
|
"logps/rejected": -587.5772705078125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7009295225143433, |
|
"rewards/margins": 24.234630584716797, |
|
"rewards/rejected": -24.935558319091797, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.5156781261805816e-08, |
|
"logits/chosen": -3.4656805992126465, |
|
"logits/rejected": -3.3214333057403564, |
|
"logps/chosen": -191.90036010742188, |
|
"logps/rejected": -628.0974731445312, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.088071584701538, |
|
"rewards/margins": 31.438037872314453, |
|
"rewards/rejected": -32.5261116027832, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.326785039667548e-08, |
|
"logits/chosen": -3.4162774085998535, |
|
"logits/rejected": -3.4043285846710205, |
|
"logps/chosen": -270.454345703125, |
|
"logps/rejected": -554.826171875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00885864533483982, |
|
"rewards/margins": 29.386489868164062, |
|
"rewards/rejected": -29.395349502563477, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.137891953154514e-08, |
|
"logits/chosen": -3.5397841930389404, |
|
"logits/rejected": -3.299877166748047, |
|
"logps/chosen": -220.99899291992188, |
|
"logps/rejected": -832.1795654296875, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.49379825592041016, |
|
"rewards/margins": 30.90732192993164, |
|
"rewards/rejected": -31.4011173248291, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.948998866641481e-08, |
|
"logits/chosen": -3.422128200531006, |
|
"logits/rejected": -3.272432327270508, |
|
"logps/chosen": -266.85186767578125, |
|
"logps/rejected": -875.5877075195312, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2771952152252197, |
|
"rewards/margins": 30.630329132080078, |
|
"rewards/rejected": -30.90752601623535, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.760105780128447e-08, |
|
"logits/chosen": -3.4342148303985596, |
|
"logits/rejected": -3.2355589866638184, |
|
"logps/chosen": -297.9632873535156, |
|
"logps/rejected": -929.0408935546875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.417285293340683, |
|
"rewards/margins": 34.31209182739258, |
|
"rewards/rejected": -34.729373931884766, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5712126936154134e-08, |
|
"logits/chosen": -3.488246202468872, |
|
"logits/rejected": -3.3506011962890625, |
|
"logps/chosen": -173.14797973632812, |
|
"logps/rejected": -620.3333740234375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3520640432834625, |
|
"rewards/margins": 29.699413299560547, |
|
"rewards/rejected": -30.05147933959961, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -3.482171058654785, |
|
"eval_logits/rejected": -3.3307948112487793, |
|
"eval_logps/chosen": -257.48095703125, |
|
"eval_logps/rejected": -726.024169921875, |
|
"eval_loss": 0.004117514006793499, |
|
"eval_rewards/accuracies": 0.997474730014801, |
|
"eval_rewards/chosen": -0.16753698885440826, |
|
"eval_rewards/margins": 30.011581420898438, |
|
"eval_rewards/rejected": -30.179115295410156, |
|
"eval_runtime": 459.6071, |
|
"eval_samples_per_second": 20.67, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3823196071023796e-08, |
|
"logits/chosen": -3.4520740509033203, |
|
"logits/rejected": -3.2903473377227783, |
|
"logps/chosen": -204.6480712890625, |
|
"logps/rejected": -706.2102661132812, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4886886179447174, |
|
"rewards/margins": 32.48357009887695, |
|
"rewards/rejected": -32.972259521484375, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.1934265205893465e-08, |
|
"logits/chosen": -3.3444876670837402, |
|
"logits/rejected": -3.3331406116485596, |
|
"logps/chosen": -345.8191223144531, |
|
"logps/rejected": -787.9623413085938, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5544703006744385, |
|
"rewards/margins": 30.112964630126953, |
|
"rewards/rejected": -29.558496475219727, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.004533434076313e-08, |
|
"logits/chosen": -3.449005126953125, |
|
"logits/rejected": -3.3541347980499268, |
|
"logps/chosen": -205.72506713867188, |
|
"logps/rejected": -782.6888427734375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2865965962409973, |
|
"rewards/margins": 26.386667251586914, |
|
"rewards/rejected": -26.673263549804688, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.815640347563279e-08, |
|
"logits/chosen": -3.330857038497925, |
|
"logits/rejected": -3.237947463989258, |
|
"logps/chosen": -281.54742431640625, |
|
"logps/rejected": -995.6536865234375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.840170681476593, |
|
"rewards/margins": 30.559356689453125, |
|
"rewards/rejected": -31.399526596069336, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.626747261050245e-08, |
|
"logits/chosen": -3.4661128520965576, |
|
"logits/rejected": -3.30942964553833, |
|
"logps/chosen": -212.7915496826172, |
|
"logps/rejected": -827.4059448242188, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.515861988067627, |
|
"rewards/margins": 28.297542572021484, |
|
"rewards/rejected": -28.813405990600586, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4378541745372115e-08, |
|
"logits/chosen": -3.424384355545044, |
|
"logits/rejected": -3.36513090133667, |
|
"logps/chosen": -288.58001708984375, |
|
"logps/rejected": -790.6275634765625, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2595888078212738, |
|
"rewards/margins": 32.224891662597656, |
|
"rewards/rejected": -31.965301513671875, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2489610880241784e-08, |
|
"logits/chosen": -3.450979232788086, |
|
"logits/rejected": -3.3050060272216797, |
|
"logps/chosen": -198.88816833496094, |
|
"logps/rejected": -727.8658447265625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09249986708164215, |
|
"rewards/margins": 30.053543090820312, |
|
"rewards/rejected": -29.96103858947754, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0600680015111446e-08, |
|
"logits/chosen": -3.4271132946014404, |
|
"logits/rejected": -3.3997604846954346, |
|
"logps/chosen": -195.2742919921875, |
|
"logps/rejected": -512.84619140625, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01265418529510498, |
|
"rewards/margins": 30.05193519592285, |
|
"rewards/rejected": -30.064586639404297, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.871174914998111e-08, |
|
"logits/chosen": -3.450589656829834, |
|
"logits/rejected": -3.260341167449951, |
|
"logps/chosen": -221.8329315185547, |
|
"logps/rejected": -699.6900634765625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08895153552293777, |
|
"rewards/margins": 34.512027740478516, |
|
"rewards/rejected": -34.423072814941406, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.682281828485077e-08, |
|
"logits/chosen": -3.405123472213745, |
|
"logits/rejected": -3.2812092304229736, |
|
"logps/chosen": -212.4518585205078, |
|
"logps/rejected": -927.1981201171875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3205551505088806, |
|
"rewards/margins": 29.993661880493164, |
|
"rewards/rejected": -30.314218521118164, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_logits/chosen": -3.4751133918762207, |
|
"eval_logits/rejected": -3.3266823291778564, |
|
"eval_logps/chosen": -255.20126342773438, |
|
"eval_logps/rejected": -718.1399536132812, |
|
"eval_loss": 0.003812924027442932, |
|
"eval_rewards/accuracies": 0.9983165264129639, |
|
"eval_rewards/chosen": 0.06043152138590813, |
|
"eval_rewards/margins": 29.45113182067871, |
|
"eval_rewards/rejected": -29.390703201293945, |
|
"eval_runtime": 459.6851, |
|
"eval_samples_per_second": 20.666, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4933887419720436e-08, |
|
"logits/chosen": -3.470411777496338, |
|
"logits/rejected": -3.371030330657959, |
|
"logps/chosen": -203.39833068847656, |
|
"logps/rejected": -554.4922485351562, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4396284222602844, |
|
"rewards/margins": 28.658405303955078, |
|
"rewards/rejected": -29.0980281829834, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.30449565545901e-08, |
|
"logits/chosen": -3.403038740158081, |
|
"logits/rejected": -3.3339126110076904, |
|
"logps/chosen": -218.4235076904297, |
|
"logps/rejected": -501.0118713378906, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3079354763031006, |
|
"rewards/margins": 22.659008026123047, |
|
"rewards/rejected": -22.96694564819336, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1156025689459764e-08, |
|
"logits/chosen": -3.360138416290283, |
|
"logits/rejected": -3.204312562942505, |
|
"logps/chosen": -275.4970703125, |
|
"logps/rejected": -807.7415771484375, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.563244104385376, |
|
"rewards/margins": 28.03116226196289, |
|
"rewards/rejected": -27.467914581298828, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.926709482432943e-08, |
|
"logits/chosen": -3.4086861610412598, |
|
"logits/rejected": -3.273714542388916, |
|
"logps/chosen": -241.6406707763672, |
|
"logps/rejected": -834.037109375, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.5851477384567261, |
|
"rewards/margins": 38.93583679199219, |
|
"rewards/rejected": -38.350685119628906, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7378163959199092e-08, |
|
"logits/chosen": -3.443998336791992, |
|
"logits/rejected": -3.288005828857422, |
|
"logps/chosen": -259.77630615234375, |
|
"logps/rejected": -637.4151611328125, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.25292807817459106, |
|
"rewards/margins": 29.613880157470703, |
|
"rewards/rejected": -29.360952377319336, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5489233094068758e-08, |
|
"logits/chosen": -3.4653220176696777, |
|
"logits/rejected": -3.3464725017547607, |
|
"logps/chosen": -222.8519287109375, |
|
"logps/rejected": -588.8685913085938, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2632875442504883, |
|
"rewards/margins": 26.466629028320312, |
|
"rewards/rejected": -26.729915618896484, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3600302228938419e-08, |
|
"logits/chosen": -3.376246929168701, |
|
"logits/rejected": -3.2897162437438965, |
|
"logps/chosen": -312.7208251953125, |
|
"logps/rejected": -748.8970336914062, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.849084734916687, |
|
"rewards/margins": 28.013254165649414, |
|
"rewards/rejected": -27.164173126220703, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1711371363808084e-08, |
|
"logits/chosen": -3.442333936691284, |
|
"logits/rejected": -3.225363254547119, |
|
"logps/chosen": -266.2436828613281, |
|
"logps/rejected": -922.4913330078125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09482870250940323, |
|
"rewards/margins": 27.245941162109375, |
|
"rewards/rejected": -27.340768814086914, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.822440498677748e-09, |
|
"logits/chosen": -3.2727837562561035, |
|
"logits/rejected": -3.270648241043091, |
|
"logps/chosen": -396.17156982421875, |
|
"logps/rejected": -819.6370239257812, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.2518737316131592, |
|
"rewards/margins": 33.05315017700195, |
|
"rewards/rejected": -32.80127716064453, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.933509633547412e-09, |
|
"logits/chosen": -3.4724514484405518, |
|
"logits/rejected": -3.2815041542053223, |
|
"logps/chosen": -211.4152374267578, |
|
"logps/rejected": -643.9974365234375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16061940789222717, |
|
"rewards/margins": 27.16357421875, |
|
"rewards/rejected": -27.324193954467773, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -3.4722537994384766, |
|
"eval_logits/rejected": -3.3257179260253906, |
|
"eval_logps/chosen": -254.35963439941406, |
|
"eval_logps/rejected": -714.0263671875, |
|
"eval_loss": 0.003982194699347019, |
|
"eval_rewards/accuracies": 0.9983165264129639, |
|
"eval_rewards/chosen": 0.1445969194173813, |
|
"eval_rewards/margins": 29.123931884765625, |
|
"eval_rewards/rejected": -28.979337692260742, |
|
"eval_runtime": 459.5744, |
|
"eval_samples_per_second": 20.671, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.044578768417076e-09, |
|
"logits/chosen": -3.3734288215637207, |
|
"logits/rejected": -3.2156894207000732, |
|
"logps/chosen": -206.44552612304688, |
|
"logps/rejected": -797.2452392578125, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3702377378940582, |
|
"rewards/margins": 27.78318214416504, |
|
"rewards/rejected": -28.153417587280273, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.15564790328674e-09, |
|
"logits/chosen": -3.3592917919158936, |
|
"logits/rejected": -3.2777762413024902, |
|
"logps/chosen": -216.9835968017578, |
|
"logps/rejected": -824.8385620117188, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20981796085834503, |
|
"rewards/margins": 28.391815185546875, |
|
"rewards/rejected": -28.601633071899414, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.2667170381564033e-09, |
|
"logits/chosen": -3.416182041168213, |
|
"logits/rejected": -3.359605312347412, |
|
"logps/chosen": -203.88467407226562, |
|
"logps/rejected": -526.6511840820312, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26971641182899475, |
|
"rewards/margins": 26.130962371826172, |
|
"rewards/rejected": -26.400676727294922, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7778617302606723e-10, |
|
"logits/chosen": -3.350004196166992, |
|
"logits/rejected": -3.2843971252441406, |
|
"logps/chosen": -274.75946044921875, |
|
"logps/rejected": -673.1805419921875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04258955270051956, |
|
"rewards/margins": 28.780603408813477, |
|
"rewards/rejected": -28.82318687438965, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2942, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0552501158515984, |
|
"train_runtime": 31216.715, |
|
"train_samples_per_second": 6.032, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2942, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|