|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995071463775259, |
|
"eval_steps": 400, |
|
"global_step": 507, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001971414489896501, |
|
"grad_norm": 6.2392770862642, |
|
"learning_rate": 9.803921568627451e-09, |
|
"logits/chosen": -1.594488501548767, |
|
"logits/rejected": -1.1860766410827637, |
|
"logps/chosen": -198.3888397216797, |
|
"logps/rejected": -269.352783203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009857072449482503, |
|
"grad_norm": 5.49954498256661, |
|
"learning_rate": 4.901960784313725e-08, |
|
"logits/chosen": -1.645488977432251, |
|
"logits/rejected": -1.0096673965454102, |
|
"logps/chosen": -192.4307861328125, |
|
"logps/rejected": -247.57391357421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": 0.00013264300650916994, |
|
"rewards/margins": 0.0001808845845516771, |
|
"rewards/rejected": -4.824160714633763e-05, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.019714144898965006, |
|
"grad_norm": 4.196436716438617, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -1.6045820713043213, |
|
"logits/rejected": -1.0348637104034424, |
|
"logps/chosen": -184.26632690429688, |
|
"logps/rejected": -245.4076690673828, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0013285436434671283, |
|
"rewards/margins": -0.0003174581506755203, |
|
"rewards/rejected": 0.001646001823246479, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02957121734844751, |
|
"grad_norm": 5.768149867251834, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -1.8137686252593994, |
|
"logits/rejected": -1.135617971420288, |
|
"logps/chosen": -199.5909881591797, |
|
"logps/rejected": -266.2090759277344, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0006634569144807756, |
|
"rewards/margins": 0.0016718091210350394, |
|
"rewards/rejected": -0.002335265977308154, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03942828979793001, |
|
"grad_norm": 5.9407046802470065, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -1.7376708984375, |
|
"logits/rejected": -1.1297136545181274, |
|
"logps/chosen": -189.01934814453125, |
|
"logps/rejected": -255.4130859375, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.006191645748913288, |
|
"rewards/margins": 0.006456127855926752, |
|
"rewards/rejected": -0.012647772207856178, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04928536224741252, |
|
"grad_norm": 5.2689388633967456, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"logits/chosen": -1.7063930034637451, |
|
"logits/rejected": -1.1289308071136475, |
|
"logps/chosen": -204.759765625, |
|
"logps/rejected": -266.6024169921875, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.017932727932929993, |
|
"rewards/margins": 0.019068485125899315, |
|
"rewards/rejected": -0.03700121492147446, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05914243469689502, |
|
"grad_norm": 6.582821307605904, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -1.5701669454574585, |
|
"logits/rejected": -1.041677474975586, |
|
"logps/chosen": -194.34347534179688, |
|
"logps/rejected": -276.304443359375, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.02758154645562172, |
|
"rewards/margins": 0.045433152467012405, |
|
"rewards/rejected": -0.07301469147205353, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06899950714637752, |
|
"grad_norm": 10.279144076298133, |
|
"learning_rate": 3.431372549019608e-07, |
|
"logits/chosen": -1.4824097156524658, |
|
"logits/rejected": -0.9899765253067017, |
|
"logps/chosen": -198.76766967773438, |
|
"logps/rejected": -265.6862487792969, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.068024180829525, |
|
"rewards/margins": 0.12255563586950302, |
|
"rewards/rejected": -0.1905798316001892, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07885657959586002, |
|
"grad_norm": 9.399070867553222, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -1.7072070837020874, |
|
"logits/rejected": -1.1361684799194336, |
|
"logps/chosen": -204.9685516357422, |
|
"logps/rejected": -303.8945617675781, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.17202235758304596, |
|
"rewards/margins": 0.32544782757759094, |
|
"rewards/rejected": -0.4974702000617981, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08871365204534254, |
|
"grad_norm": 13.971472653574747, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -2.052572727203369, |
|
"logits/rejected": -1.6851530075073242, |
|
"logps/chosen": -326.47637939453125, |
|
"logps/rejected": -529.0755004882812, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.31343674659729, |
|
"rewards/margins": 1.4102681875228882, |
|
"rewards/rejected": -2.7237050533294678, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09857072449482504, |
|
"grad_norm": 15.586119367213884, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -2.3275997638702393, |
|
"logits/rejected": -1.8939182758331299, |
|
"logps/chosen": -430.58563232421875, |
|
"logps/rejected": -688.560302734375, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.3751883506774902, |
|
"rewards/margins": 2.010326862335205, |
|
"rewards/rejected": -4.385515213012695, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10842779694430754, |
|
"grad_norm": 25.434888651559017, |
|
"learning_rate": 4.999050767562379e-07, |
|
"logits/chosen": -2.086081027984619, |
|
"logits/rejected": -1.7880547046661377, |
|
"logps/chosen": -363.26824951171875, |
|
"logps/rejected": -565.7152099609375, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.765494704246521, |
|
"rewards/margins": 1.468611478805542, |
|
"rewards/rejected": -3.2341067790985107, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11828486939379004, |
|
"grad_norm": 33.28729029749558, |
|
"learning_rate": 4.99519574616467e-07, |
|
"logits/chosen": -2.2118542194366455, |
|
"logits/rejected": -1.929535150527954, |
|
"logps/chosen": -434.70794677734375, |
|
"logps/rejected": -744.1588745117188, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.4308719635009766, |
|
"rewards/margins": 2.5395278930664062, |
|
"rewards/rejected": -4.970398902893066, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12814194184327254, |
|
"grad_norm": 18.906078399540743, |
|
"learning_rate": 4.988380179235842e-07, |
|
"logits/chosen": -2.071911334991455, |
|
"logits/rejected": -1.7777721881866455, |
|
"logps/chosen": -411.3829040527344, |
|
"logps/rejected": -706.1156005859375, |
|
"loss": 0.3931, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.1888458728790283, |
|
"rewards/margins": 2.4369874000549316, |
|
"rewards/rejected": -4.625833511352539, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13799901429275505, |
|
"grad_norm": 29.037425921796533, |
|
"learning_rate": 4.978612153434526e-07, |
|
"logits/chosen": -2.3122410774230957, |
|
"logits/rejected": -2.039794683456421, |
|
"logps/chosen": -457.93646240234375, |
|
"logps/rejected": -924.6302490234375, |
|
"loss": 0.4394, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.7360401153564453, |
|
"rewards/margins": 3.9763436317443848, |
|
"rewards/rejected": -6.712383270263672, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14785608674223755, |
|
"grad_norm": 12.920105460240974, |
|
"learning_rate": 4.965903258506806e-07, |
|
"logits/chosen": -2.1728882789611816, |
|
"logits/rejected": -1.9507039785385132, |
|
"logps/chosen": -443.1044006347656, |
|
"logps/rejected": -733.8536376953125, |
|
"loss": 0.4033, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.428837537765503, |
|
"rewards/margins": 2.4469170570373535, |
|
"rewards/rejected": -4.8757548332214355, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15771315919172005, |
|
"grad_norm": 13.726152345059775, |
|
"learning_rate": 4.950268573535011e-07, |
|
"logits/chosen": -2.0774412155151367, |
|
"logits/rejected": -1.877873420715332, |
|
"logps/chosen": -434.2333068847656, |
|
"logps/rejected": -697.5498657226562, |
|
"loss": 0.3896, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.339465618133545, |
|
"rewards/margins": 2.073235034942627, |
|
"rewards/rejected": -4.412700176239014, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16757023164120255, |
|
"grad_norm": 22.96957352147464, |
|
"learning_rate": 4.93172664904641e-07, |
|
"logits/chosen": -2.578918695449829, |
|
"logits/rejected": -2.313844680786133, |
|
"logps/chosen": -714.9383544921875, |
|
"logps/rejected": -1185.1837158203125, |
|
"loss": 0.3544, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -5.246799468994141, |
|
"rewards/margins": 4.040070533752441, |
|
"rewards/rejected": -9.286870002746582, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17742730409068508, |
|
"grad_norm": 16.12185087053667, |
|
"learning_rate": 4.910299485003033e-07, |
|
"logits/chosen": -2.3522980213165283, |
|
"logits/rejected": -2.1312594413757324, |
|
"logps/chosen": -548.0660400390625, |
|
"logps/rejected": -941.7537231445312, |
|
"loss": 0.326, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.534759044647217, |
|
"rewards/margins": 3.3071117401123047, |
|
"rewards/rejected": -6.841870307922363, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18728437654016758, |
|
"grad_norm": 38.45908394327309, |
|
"learning_rate": 4.886012504698769e-07, |
|
"logits/chosen": -2.29638671875, |
|
"logits/rejected": -2.0095603466033936, |
|
"logps/chosen": -526.743408203125, |
|
"logps/rejected": -906.7442626953125, |
|
"loss": 0.3562, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.3386921882629395, |
|
"rewards/margins": 3.1595466136932373, |
|
"rewards/rejected": -6.498239040374756, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19714144898965008, |
|
"grad_norm": 23.37026509407894, |
|
"learning_rate": 4.858894524594652e-07, |
|
"logits/chosen": -2.509087085723877, |
|
"logits/rejected": -2.2394092082977295, |
|
"logps/chosen": -597.5819091796875, |
|
"logps/rejected": -1110.536376953125, |
|
"loss": 0.3208, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.074545383453369, |
|
"rewards/margins": 4.4778618812561035, |
|
"rewards/rejected": -8.552406311035156, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20699852143913258, |
|
"grad_norm": 17.538993246799073, |
|
"learning_rate": 4.828977720128198e-07, |
|
"logits/chosen": -2.368518114089966, |
|
"logits/rejected": -2.0958077907562256, |
|
"logps/chosen": -522.4010620117188, |
|
"logps/rejected": -853.4436645507812, |
|
"loss": 0.3199, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.3017165660858154, |
|
"rewards/margins": 2.8141353130340576, |
|
"rewards/rejected": -6.115852355957031, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21685559388861508, |
|
"grad_norm": 14.205208079234838, |
|
"learning_rate": 4.796297587537285e-07, |
|
"logits/chosen": -2.4165451526641846, |
|
"logits/rejected": -2.1057496070861816, |
|
"logps/chosen": -577.1276245117188, |
|
"logps/rejected": -963.6633911132812, |
|
"loss": 0.2935, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.8097610473632812, |
|
"rewards/margins": 3.394871950149536, |
|
"rewards/rejected": -7.2046332359313965, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22671266633809758, |
|
"grad_norm": 16.990565766105274, |
|
"learning_rate": 4.760892901743944e-07, |
|
"logits/chosen": -2.536337375640869, |
|
"logits/rejected": -2.2590508460998535, |
|
"logps/chosen": -760.9464111328125, |
|
"logps/rejected": -1193.0130615234375, |
|
"loss": 0.3468, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.601290702819824, |
|
"rewards/margins": 3.6806259155273438, |
|
"rewards/rejected": -9.281916618347168, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23656973878758009, |
|
"grad_norm": 14.381039222874595, |
|
"learning_rate": 4.7228056703479626e-07, |
|
"logits/chosen": -2.490741014480591, |
|
"logits/rejected": -2.1797027587890625, |
|
"logps/chosen": -651.6326293945312, |
|
"logps/rejected": -1045.991943359375, |
|
"loss": 0.3, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.517138481140137, |
|
"rewards/margins": 3.300055742263794, |
|
"rewards/rejected": -7.817193508148193, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2464268112370626, |
|
"grad_norm": 13.845809576206165, |
|
"learning_rate": 4.6820810837849535e-07, |
|
"logits/chosen": -2.4549553394317627, |
|
"logits/rejected": -2.05999755859375, |
|
"logps/chosen": -606.1448974609375, |
|
"logps/rejected": -1030.6544189453125, |
|
"loss": 0.2987, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.098814487457275, |
|
"rewards/margins": 3.521782636642456, |
|
"rewards/rejected": -7.620597839355469, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2562838836865451, |
|
"grad_norm": 30.897064038144002, |
|
"learning_rate": 4.63876746170797e-07, |
|
"logits/chosen": -2.3905959129333496, |
|
"logits/rejected": -2.17751145362854, |
|
"logps/chosen": -677.5357055664062, |
|
"logps/rejected": -1075.299072265625, |
|
"loss": 0.299, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.783341407775879, |
|
"rewards/margins": 3.367708683013916, |
|
"rewards/rejected": -8.151049613952637, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2661409561360276, |
|
"grad_norm": 24.59610522580519, |
|
"learning_rate": 4.592916195656321e-07, |
|
"logits/chosen": -2.686401844024658, |
|
"logits/rejected": -2.2882132530212402, |
|
"logps/chosen": -798.7413330078125, |
|
"logps/rejected": -1337.7080078125, |
|
"loss": 0.2956, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -6.00932502746582, |
|
"rewards/margins": 4.70266580581665, |
|
"rewards/rejected": -10.711990356445312, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2759980285855101, |
|
"grad_norm": 14.965471726804886, |
|
"learning_rate": 4.544581688079602e-07, |
|
"logits/chosen": -2.4349093437194824, |
|
"logits/rejected": -2.14192533493042, |
|
"logps/chosen": -705.6304321289062, |
|
"logps/rejected": -1094.288330078125, |
|
"loss": 0.2863, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.049565315246582, |
|
"rewards/margins": 3.3041865825653076, |
|
"rewards/rejected": -8.353752136230469, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2858551010349926, |
|
"grad_norm": 18.293122078134097, |
|
"learning_rate": 4.493821287789272e-07, |
|
"logits/chosen": -2.5565428733825684, |
|
"logits/rejected": -2.1939361095428467, |
|
"logps/chosen": -744.5687255859375, |
|
"logps/rejected": -1154.7205810546875, |
|
"loss": 0.2788, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.435603618621826, |
|
"rewards/margins": 3.5441932678222656, |
|
"rewards/rejected": -8.979796409606934, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2957121734844751, |
|
"grad_norm": 20.91998686803295, |
|
"learning_rate": 4.4406952219143934e-07, |
|
"logits/chosen": -2.5498974323272705, |
|
"logits/rejected": -2.22133731842041, |
|
"logps/chosen": -842.6162109375, |
|
"logps/rejected": -1307.8778076171875, |
|
"loss": 0.295, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -6.563225746154785, |
|
"rewards/margins": 4.067451477050781, |
|
"rewards/rejected": -10.630678176879883, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3055692459339576, |
|
"grad_norm": 16.16798121676358, |
|
"learning_rate": 4.38526652444224e-07, |
|
"logits/chosen": -2.5155484676361084, |
|
"logits/rejected": -2.1966238021850586, |
|
"logps/chosen": -806.795166015625, |
|
"logps/rejected": -1259.8773193359375, |
|
"loss": 0.2963, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -6.04946231842041, |
|
"rewards/margins": 3.812061309814453, |
|
"rewards/rejected": -9.861523628234863, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3154263183834401, |
|
"grad_norm": 18.735794107249802, |
|
"learning_rate": 4.3276009614285824e-07, |
|
"logits/chosen": -2.464740037918091, |
|
"logits/rejected": -2.1177756786346436, |
|
"logps/chosen": -709.6548461914062, |
|
"logps/rejected": -1163.3350830078125, |
|
"loss": 0.2554, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.127840042114258, |
|
"rewards/margins": 3.861670970916748, |
|
"rewards/rejected": -8.989511489868164, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3252833908329226, |
|
"grad_norm": 22.02574974928147, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -2.545640707015991, |
|
"logits/rejected": -2.2462990283966064, |
|
"logps/chosen": -783.61669921875, |
|
"logps/rejected": -1255.912353515625, |
|
"loss": 0.2591, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.863072395324707, |
|
"rewards/margins": 4.111878395080566, |
|
"rewards/rejected": -9.974950790405273, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3351404632824051, |
|
"grad_norm": 18.815167005575123, |
|
"learning_rate": 4.2058354920054043e-07, |
|
"logits/chosen": -2.5555951595306396, |
|
"logits/rejected": -2.2355425357818604, |
|
"logps/chosen": -801.5789184570312, |
|
"logps/rejected": -1247.4630126953125, |
|
"loss": 0.2675, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -6.109245777130127, |
|
"rewards/margins": 3.8287367820739746, |
|
"rewards/rejected": -9.937983512878418, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34499753573188763, |
|
"grad_norm": 15.86237856217812, |
|
"learning_rate": 4.141880060119336e-07, |
|
"logits/chosen": -2.541696786880493, |
|
"logits/rejected": -2.180537700653076, |
|
"logps/chosen": -784.6647338867188, |
|
"logps/rejected": -1234.05126953125, |
|
"loss": 0.2502, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.834389686584473, |
|
"rewards/margins": 3.9414896965026855, |
|
"rewards/rejected": -9.77587890625, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35485460818137016, |
|
"grad_norm": 16.77349624146522, |
|
"learning_rate": 4.0759765403198877e-07, |
|
"logits/chosen": -2.5138328075408936, |
|
"logits/rejected": -2.1284890174865723, |
|
"logps/chosen": -700.7459106445312, |
|
"logps/rejected": -1123.9356689453125, |
|
"loss": 0.2808, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.06411075592041, |
|
"rewards/margins": 3.7050411701202393, |
|
"rewards/rejected": -8.769152641296387, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.36471168063085263, |
|
"grad_norm": 17.550598446162923, |
|
"learning_rate": 4.008203127021797e-07, |
|
"logits/chosen": -2.5796236991882324, |
|
"logits/rejected": -2.215527057647705, |
|
"logps/chosen": -717.72119140625, |
|
"logps/rejected": -1230.483642578125, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.206206798553467, |
|
"rewards/margins": 4.433660507202148, |
|
"rewards/rejected": -9.639867782592773, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.37456875308033516, |
|
"grad_norm": 15.558233723999136, |
|
"learning_rate": 3.9386402332652754e-07, |
|
"logits/chosen": -2.6024489402770996, |
|
"logits/rejected": -2.3488709926605225, |
|
"logps/chosen": -900.3855590820312, |
|
"logps/rejected": -1392.37255859375, |
|
"loss": 0.2267, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.0901055335998535, |
|
"rewards/margins": 4.340862274169922, |
|
"rewards/rejected": -11.430967330932617, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.38442582552981763, |
|
"grad_norm": 22.187048055147955, |
|
"learning_rate": 3.867370395306068e-07, |
|
"logits/chosen": -2.6506357192993164, |
|
"logits/rejected": -2.2959604263305664, |
|
"logps/chosen": -900.3836059570312, |
|
"logps/rejected": -1402.96630859375, |
|
"loss": 0.2693, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.0995588302612305, |
|
"rewards/margins": 4.546249866485596, |
|
"rewards/rejected": -11.645808219909668, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39428289797930016, |
|
"grad_norm": 15.588853964432303, |
|
"learning_rate": 3.794478174686328e-07, |
|
"logits/chosen": -2.5797057151794434, |
|
"logits/rejected": -2.1939449310302734, |
|
"logps/chosen": -769.4427490234375, |
|
"logps/rejected": -1267.3035888671875, |
|
"loss": 0.2491, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.717661380767822, |
|
"rewards/margins": 4.36967658996582, |
|
"rewards/rejected": -10.087339401245117, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40413997042878264, |
|
"grad_norm": 15.604577624570162, |
|
"learning_rate": 3.720050057902495e-07, |
|
"logits/chosen": -2.4678874015808105, |
|
"logits/rejected": -2.166454553604126, |
|
"logps/chosen": -664.3575439453125, |
|
"logps/rejected": -1184.0172119140625, |
|
"loss": 0.2733, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.809238910675049, |
|
"rewards/margins": 4.560500144958496, |
|
"rewards/rejected": -9.369739532470703, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41399704287826516, |
|
"grad_norm": 16.104577186140947, |
|
"learning_rate": 3.644174353789204e-07, |
|
"logits/chosen": -2.470492124557495, |
|
"logits/rejected": -2.2408156394958496, |
|
"logps/chosen": -702.6835327148438, |
|
"logps/rejected": -1184.7685546875, |
|
"loss": 0.24, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.020668983459473, |
|
"rewards/margins": 4.07871150970459, |
|
"rewards/rejected": -9.099380493164062, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42385411532774764, |
|
"grad_norm": 42.39094832845099, |
|
"learning_rate": 3.566941088741009e-07, |
|
"logits/chosen": -2.465122699737549, |
|
"logits/rejected": -2.202960968017578, |
|
"logps/chosen": -784.384765625, |
|
"logps/rejected": -1312.956298828125, |
|
"loss": 0.2914, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.859043598175049, |
|
"rewards/margins": 4.713334083557129, |
|
"rewards/rejected": -10.572378158569336, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43371118777723017, |
|
"grad_norm": 20.944458558373373, |
|
"learning_rate": 3.488441899896217e-07, |
|
"logits/chosen": -2.487208843231201, |
|
"logits/rejected": -2.197640895843506, |
|
"logps/chosen": -729.4404296875, |
|
"logps/rejected": -1207.813232421875, |
|
"loss": 0.2843, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.363978862762451, |
|
"rewards/margins": 4.154356956481934, |
|
"rewards/rejected": -9.518336296081543, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.44356826022671264, |
|
"grad_norm": 14.863684470935617, |
|
"learning_rate": 3.408769926409574e-07, |
|
"logits/chosen": -2.4418163299560547, |
|
"logits/rejected": -2.1561474800109863, |
|
"logps/chosen": -578.9898071289062, |
|
"logps/rejected": -913.1700439453125, |
|
"loss": 0.2547, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.821843385696411, |
|
"rewards/margins": 2.9205775260925293, |
|
"rewards/rejected": -6.742421627044678, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.45342533267619517, |
|
"grad_norm": 17.477623835490277, |
|
"learning_rate": 3.3280196989428263e-07, |
|
"logits/chosen": -2.4349989891052246, |
|
"logits/rejected": -2.196359634399414, |
|
"logps/chosen": -682.2156982421875, |
|
"logps/rejected": -1149.534423828125, |
|
"loss": 0.2754, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.879435062408447, |
|
"rewards/margins": 3.9676411151885986, |
|
"rewards/rejected": -8.847076416015625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4632824051256777, |
|
"grad_norm": 16.30612813668589, |
|
"learning_rate": 3.2462870275042367e-07, |
|
"logits/chosen": -2.5115764141082764, |
|
"logits/rejected": -2.3011107444763184, |
|
"logps/chosen": -744.306396484375, |
|
"logps/rejected": -1183.502685546875, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.445645332336426, |
|
"rewards/margins": 3.8912956714630127, |
|
"rewards/rejected": -9.33694076538086, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.47313947757516017, |
|
"grad_norm": 18.35946878564802, |
|
"learning_rate": 3.1636688877701806e-07, |
|
"logits/chosen": -2.5281643867492676, |
|
"logits/rejected": -2.2399466037750244, |
|
"logps/chosen": -777.9661865234375, |
|
"logps/rejected": -1258.2623291015625, |
|
"loss": 0.2537, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.761153221130371, |
|
"rewards/margins": 4.262064456939697, |
|
"rewards/rejected": -10.023218154907227, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4829965500246427, |
|
"grad_norm": 22.711594265033526, |
|
"learning_rate": 3.080263306023669e-07, |
|
"logits/chosen": -2.43805193901062, |
|
"logits/rejected": -2.136569023132324, |
|
"logps/chosen": -738.892578125, |
|
"logps/rejected": -1253.740966796875, |
|
"loss": 0.2465, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.394611358642578, |
|
"rewards/margins": 4.49846076965332, |
|
"rewards/rejected": -9.893071174621582, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4928536224741252, |
|
"grad_norm": 22.523084393015623, |
|
"learning_rate": 2.996169242846328e-07, |
|
"logits/chosen": -2.456860065460205, |
|
"logits/rejected": -2.1488893032073975, |
|
"logps/chosen": -664.892822265625, |
|
"logps/rejected": -1094.06640625, |
|
"loss": 0.2643, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.737056732177734, |
|
"rewards/margins": 3.7822394371032715, |
|
"rewards/rejected": -8.519296646118164, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5027106949236076, |
|
"grad_norm": 21.85050975494629, |
|
"learning_rate": 2.911486475701835e-07, |
|
"logits/chosen": -2.3711659908294678, |
|
"logits/rejected": -2.104147434234619, |
|
"logps/chosen": -632.7847900390625, |
|
"logps/rejected": -1041.4219970703125, |
|
"loss": 0.2848, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.385097503662109, |
|
"rewards/margins": 3.560230255126953, |
|
"rewards/rejected": -7.9453277587890625, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5125677673730902, |
|
"grad_norm": 17.27564046380349, |
|
"learning_rate": 2.826315480550129e-07, |
|
"logits/chosen": -2.326019763946533, |
|
"logits/rejected": -2.0808887481689453, |
|
"logps/chosen": -590.845458984375, |
|
"logps/rejected": -1011.6871337890625, |
|
"loss": 0.2489, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.986447811126709, |
|
"rewards/margins": 3.542587995529175, |
|
"rewards/rejected": -7.529036045074463, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5224248398225727, |
|
"grad_norm": 19.212069446863243, |
|
"learning_rate": 2.740757312632854e-07, |
|
"logits/chosen": -2.414062261581421, |
|
"logits/rejected": -2.197702646255493, |
|
"logps/chosen": -744.2257080078125, |
|
"logps/rejected": -1205.133544921875, |
|
"loss": 0.2221, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.464824676513672, |
|
"rewards/margins": 4.082161903381348, |
|
"rewards/rejected": -9.54698657989502, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5322819122720552, |
|
"grad_norm": 16.242036970306053, |
|
"learning_rate": 2.654913486571487e-07, |
|
"logits/chosen": -2.5215845108032227, |
|
"logits/rejected": -2.260974168777466, |
|
"logps/chosen": -794.4285888671875, |
|
"logps/rejected": -1301.2264404296875, |
|
"loss": 0.3103, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.997513294219971, |
|
"rewards/margins": 4.493828773498535, |
|
"rewards/rejected": -10.491341590881348, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5421389847215377, |
|
"grad_norm": 14.856129841637888, |
|
"learning_rate": 2.5688858559204053e-07, |
|
"logits/chosen": -2.406275987625122, |
|
"logits/rejected": -2.168721914291382, |
|
"logps/chosen": -711.0574951171875, |
|
"logps/rejected": -1197.390380859375, |
|
"loss": 0.2365, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.203823566436768, |
|
"rewards/margins": 4.352889060974121, |
|
"rewards/rejected": -9.55671215057373, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5519960571710202, |
|
"grad_norm": 15.78620841198885, |
|
"learning_rate": 2.4827764923178246e-07, |
|
"logits/chosen": -2.5056891441345215, |
|
"logits/rejected": -2.270139694213867, |
|
"logps/chosen": -740.6078491210938, |
|
"logps/rejected": -1241.2222900390625, |
|
"loss": 0.2371, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.419187068939209, |
|
"rewards/margins": 4.417618751525879, |
|
"rewards/rejected": -9.83680534362793, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5618531296205027, |
|
"grad_norm": 17.093071523621635, |
|
"learning_rate": 2.3966875643779667e-07, |
|
"logits/chosen": -2.443941593170166, |
|
"logits/rejected": -2.2383456230163574, |
|
"logps/chosen": -725.4220581054688, |
|
"logps/rejected": -1253.807373046875, |
|
"loss": 0.2179, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.367009162902832, |
|
"rewards/margins": 4.663185119628906, |
|
"rewards/rejected": -10.030195236206055, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5717102020699852, |
|
"grad_norm": 25.373557062497504, |
|
"learning_rate": 2.3107212164681774e-07, |
|
"logits/chosen": -2.5970406532287598, |
|
"logits/rejected": -2.2234134674072266, |
|
"logps/chosen": -703.3094482421875, |
|
"logps/rejected": -1330.679931640625, |
|
"loss": 0.2351, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.1501851081848145, |
|
"rewards/margins": 5.526017665863037, |
|
"rewards/rejected": -10.676202774047852, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5815672745194678, |
|
"grad_norm": 39.54586447558642, |
|
"learning_rate": 2.2249794475148019e-07, |
|
"logits/chosen": -2.508376359939575, |
|
"logits/rejected": -2.293508768081665, |
|
"logps/chosen": -826.1845703125, |
|
"logps/rejected": -1299.66845703125, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -6.255187034606934, |
|
"rewards/margins": 4.241654872894287, |
|
"rewards/rejected": -10.496840476989746, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5914243469689502, |
|
"grad_norm": 21.452851323361823, |
|
"learning_rate": 2.1395639899816332e-07, |
|
"logits/chosen": -2.580679416656494, |
|
"logits/rejected": -2.2998709678649902, |
|
"logps/chosen": -733.3718872070312, |
|
"logps/rejected": -1240.96533203125, |
|
"loss": 0.229, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.495171070098877, |
|
"rewards/margins": 4.4129509925842285, |
|
"rewards/rejected": -9.908121109008789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6012814194184327, |
|
"grad_norm": 16.276691413135083, |
|
"learning_rate": 2.0545761891645177e-07, |
|
"logits/chosen": -2.456111431121826, |
|
"logits/rejected": -2.243847608566284, |
|
"logps/chosen": -683.7113037109375, |
|
"logps/rejected": -1161.4580078125, |
|
"loss": 0.2226, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.874439239501953, |
|
"rewards/margins": 4.157763957977295, |
|
"rewards/rejected": -9.032203674316406, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6111384918679152, |
|
"grad_norm": 24.661935948628066, |
|
"learning_rate": 1.9701168829453305e-07, |
|
"logits/chosen": -2.6442089080810547, |
|
"logits/rejected": -2.186643600463867, |
|
"logps/chosen": -696.6754150390625, |
|
"logps/rejected": -1296.2703857421875, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.091025352478027, |
|
"rewards/margins": 5.146512031555176, |
|
"rewards/rejected": -10.237536430358887, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6209955643173978, |
|
"grad_norm": 14.790443951524152, |
|
"learning_rate": 1.886286282148002e-07, |
|
"logits/chosen": -2.444152355194092, |
|
"logits/rejected": -2.1477932929992676, |
|
"logps/chosen": -734.7506713867188, |
|
"logps/rejected": -1255.331787109375, |
|
"loss": 0.2501, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.458280563354492, |
|
"rewards/margins": 4.611725807189941, |
|
"rewards/rejected": -10.070005416870117, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6308526367668802, |
|
"grad_norm": 16.271316784779167, |
|
"learning_rate": 1.8031838516385422e-07, |
|
"logits/chosen": -2.369560718536377, |
|
"logits/rejected": -2.1628785133361816, |
|
"logps/chosen": -670.2017822265625, |
|
"logps/rejected": -1158.4266357421875, |
|
"loss": 0.2539, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.8326568603515625, |
|
"rewards/margins": 4.299530982971191, |
|
"rewards/rejected": -9.132187843322754, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6407097092163627, |
|
"grad_norm": 17.744715641719303, |
|
"learning_rate": 1.7209081923101472e-07, |
|
"logits/chosen": -2.6445670127868652, |
|
"logits/rejected": -2.266472578048706, |
|
"logps/chosen": -690.3375854492188, |
|
"logps/rejected": -1188.698974609375, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.986203193664551, |
|
"rewards/margins": 4.211085319519043, |
|
"rewards/rejected": -9.19728946685791, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6505667816658453, |
|
"grad_norm": 21.850943779213573, |
|
"learning_rate": 1.639556924093404e-07, |
|
"logits/chosen": -2.358119249343872, |
|
"logits/rejected": -2.1153066158294678, |
|
"logps/chosen": -764.6770629882812, |
|
"logps/rejected": -1240.2838134765625, |
|
"loss": 0.2799, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.743839263916016, |
|
"rewards/margins": 4.227473735809326, |
|
"rewards/rejected": -9.971312522888184, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6604238541153278, |
|
"grad_norm": 34.1719257406542, |
|
"learning_rate": 1.5592265701304114e-07, |
|
"logits/chosen": -2.375866413116455, |
|
"logits/rejected": -2.240598678588867, |
|
"logps/chosen": -763.527099609375, |
|
"logps/rejected": -1258.715576171875, |
|
"loss": 0.2564, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.7242560386657715, |
|
"rewards/margins": 4.458041667938232, |
|
"rewards/rejected": -10.18229866027832, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6702809265648102, |
|
"grad_norm": 18.71419612814259, |
|
"learning_rate": 1.4800124422502334e-07, |
|
"logits/chosen": -2.519636631011963, |
|
"logits/rejected": -2.2316250801086426, |
|
"logps/chosen": -762.00439453125, |
|
"logps/rejected": -1267.931396484375, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.641887664794922, |
|
"rewards/margins": 4.440293312072754, |
|
"rewards/rejected": -10.08218002319336, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6801379990142927, |
|
"grad_norm": 18.664999037354942, |
|
"learning_rate": 1.4020085278815743e-07, |
|
"logits/chosen": -2.458855628967285, |
|
"logits/rejected": -2.2174124717712402, |
|
"logps/chosen": -758.8146362304688, |
|
"logps/rejected": -1192.955322265625, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.576407432556152, |
|
"rewards/margins": 3.762500762939453, |
|
"rewards/rejected": -9.338907241821289, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6899950714637753, |
|
"grad_norm": 19.00593669045522, |
|
"learning_rate": 1.3253073785368545e-07, |
|
"logits/chosen": -2.4038822650909424, |
|
"logits/rejected": -2.114386796951294, |
|
"logps/chosen": -628.0557250976562, |
|
"logps/rejected": -1128.370849609375, |
|
"loss": 0.2821, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.437934875488281, |
|
"rewards/margins": 4.355624198913574, |
|
"rewards/rejected": -8.793559074401855, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6998521439132578, |
|
"grad_norm": 13.437480532125694, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -2.367783308029175, |
|
"logits/rejected": -2.1017680168151855, |
|
"logps/chosen": -647.2433471679688, |
|
"logps/rejected": -1109.231689453125, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.586766242980957, |
|
"rewards/margins": 4.0332794189453125, |
|
"rewards/rejected": -8.62004566192627, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7097092163627403, |
|
"grad_norm": 23.214131611033924, |
|
"learning_rate": 1.1761757443482285e-07, |
|
"logits/chosen": -2.4149296283721924, |
|
"logits/rejected": -2.0817036628723145, |
|
"logps/chosen": -711.5889892578125, |
|
"logps/rejected": -1216.048095703125, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.251239776611328, |
|
"rewards/margins": 4.419920921325684, |
|
"rewards/rejected": -9.671161651611328, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7195662888122227, |
|
"grad_norm": 21.14502188501099, |
|
"learning_rate": 1.1039222039359644e-07, |
|
"logits/chosen": -2.5779356956481934, |
|
"logits/rejected": -2.228896141052246, |
|
"logps/chosen": -739.5020751953125, |
|
"logps/rejected": -1144.7041015625, |
|
"loss": 0.2331, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.435536861419678, |
|
"rewards/margins": 3.5643341541290283, |
|
"rewards/rejected": -8.999870300292969, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7294233612617053, |
|
"grad_norm": 24.352395974541345, |
|
"learning_rate": 1.0333251074666608e-07, |
|
"logits/chosen": -2.4502475261688232, |
|
"logits/rejected": -2.300096035003662, |
|
"logps/chosen": -781.7764282226562, |
|
"logps/rejected": -1243.557373046875, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.88522481918335, |
|
"rewards/margins": 4.11319637298584, |
|
"rewards/rejected": -9.998421669006348, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7392804337111878, |
|
"grad_norm": 12.733858279084933, |
|
"learning_rate": 9.644682182758304e-08, |
|
"logits/chosen": -2.5493714809417725, |
|
"logits/rejected": -2.2471814155578613, |
|
"logps/chosen": -801.8941650390625, |
|
"logps/rejected": -1274.67529296875, |
|
"loss": 0.2314, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.090977668762207, |
|
"rewards/margins": 4.2742109298706055, |
|
"rewards/rejected": -10.365188598632812, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7491375061606703, |
|
"grad_norm": 30.453462939771114, |
|
"learning_rate": 8.974332349459992e-08, |
|
"logits/chosen": -2.3520667552948, |
|
"logits/rejected": -2.144470691680908, |
|
"logps/chosen": -808.9397583007812, |
|
"logps/rejected": -1302.123291015625, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.170880317687988, |
|
"rewards/margins": 4.420655727386475, |
|
"rewards/rejected": -10.591535568237305, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7589945786101527, |
|
"grad_norm": 25.800864953205974, |
|
"learning_rate": 8.322996943714672e-08, |
|
"logits/chosen": -2.4617538452148438, |
|
"logits/rejected": -2.170855760574341, |
|
"logps/chosen": -752.3043212890625, |
|
"logps/rejected": -1303.364013671875, |
|
"loss": 0.2474, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.609736919403076, |
|
"rewards/margins": 4.878790378570557, |
|
"rewards/rejected": -10.488527297973633, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7688516510596353, |
|
"grad_norm": 20.367448051714003, |
|
"learning_rate": 7.691448773879256e-08, |
|
"logits/chosen": -2.631474018096924, |
|
"logits/rejected": -2.1774039268493652, |
|
"logps/chosen": -788.4654541015625, |
|
"logps/rejected": -1410.991455078125, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.944725036621094, |
|
"rewards/margins": 5.383862495422363, |
|
"rewards/rejected": -11.328587532043457, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7787087235091178, |
|
"grad_norm": 22.316993245593054, |
|
"learning_rate": 7.080437170788722e-08, |
|
"logits/chosen": -2.5305416584014893, |
|
"logits/rejected": -2.246816873550415, |
|
"logps/chosen": -782.3768310546875, |
|
"logps/rejected": -1279.210693359375, |
|
"loss": 0.267, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.886050701141357, |
|
"rewards/margins": 4.281658172607422, |
|
"rewards/rejected": -10.167708396911621, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7885657959586003, |
|
"grad_norm": 25.32693997023262, |
|
"learning_rate": 6.490687098676332e-08, |
|
"logits/chosen": -2.4314379692077637, |
|
"logits/rejected": -2.1938157081604004, |
|
"logps/chosen": -747.9923095703125, |
|
"logps/rejected": -1171.9027099609375, |
|
"loss": 0.2606, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.560776710510254, |
|
"rewards/margins": 3.7204151153564453, |
|
"rewards/rejected": -9.2811918258667, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7885657959586003, |
|
"eval_logits/chosen": -2.784451484680176, |
|
"eval_logits/rejected": -2.6733083724975586, |
|
"eval_logps/chosen": -513.8394165039062, |
|
"eval_logps/rejected": -600.927978515625, |
|
"eval_loss": 0.5123496651649475, |
|
"eval_rewards/accuracies": 0.7782257795333862, |
|
"eval_rewards/chosen": -2.5094728469848633, |
|
"eval_rewards/margins": 0.760833203792572, |
|
"eval_rewards/rejected": -3.27030611038208, |
|
"eval_runtime": 327.294, |
|
"eval_samples_per_second": 6.037, |
|
"eval_steps_per_second": 0.379, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7984228684080829, |
|
"grad_norm": 16.407923464923826, |
|
"learning_rate": 5.9228982950048414e-08, |
|
"logits/chosen": -2.4307689666748047, |
|
"logits/rejected": -2.029819965362549, |
|
"logps/chosen": -701.4022827148438, |
|
"logps/rejected": -1147.330322265625, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.128365993499756, |
|
"rewards/margins": 3.8555781841278076, |
|
"rewards/rejected": -8.9839448928833, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8082799408575653, |
|
"grad_norm": 23.480190565228476, |
|
"learning_rate": 5.3777444402291345e-08, |
|
"logits/chosen": -2.4188990592956543, |
|
"logits/rejected": -2.1691110134124756, |
|
"logps/chosen": -730.129638671875, |
|
"logps/rejected": -1147.34521484375, |
|
"loss": 0.2563, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.437844753265381, |
|
"rewards/margins": 3.608722686767578, |
|
"rewards/rejected": -9.0465669631958, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8181370133070478, |
|
"grad_norm": 14.849124963520776, |
|
"learning_rate": 4.855872358475546e-08, |
|
"logits/chosen": -2.4617886543273926, |
|
"logits/rejected": -2.174734592437744, |
|
"logps/chosen": -733.2481079101562, |
|
"logps/rejected": -1173.011962890625, |
|
"loss": 0.2203, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.399328708648682, |
|
"rewards/margins": 3.8578476905822754, |
|
"rewards/rejected": -9.257177352905273, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8279940857565303, |
|
"grad_norm": 14.728191064922497, |
|
"learning_rate": 4.357901250086107e-08, |
|
"logits/chosen": -2.605170488357544, |
|
"logits/rejected": -2.1935315132141113, |
|
"logps/chosen": -696.6317749023438, |
|
"logps/rejected": -1134.62890625, |
|
"loss": 0.2169, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.047616481781006, |
|
"rewards/margins": 3.7440898418426514, |
|
"rewards/rejected": -8.791706085205078, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8378511582060129, |
|
"grad_norm": 21.656855077862126, |
|
"learning_rate": 3.884421956938377e-08, |
|
"logits/chosen": -2.443837881088257, |
|
"logits/rejected": -2.016244649887085, |
|
"logps/chosen": -734.0977783203125, |
|
"logps/rejected": -1261.663818359375, |
|
"loss": 0.2601, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.3997087478637695, |
|
"rewards/margins": 4.594438552856445, |
|
"rewards/rejected": -9.994146347045898, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8477082306554953, |
|
"grad_norm": 22.361665031765803, |
|
"learning_rate": 3.435996261412591e-08, |
|
"logits/chosen": -2.4327638149261475, |
|
"logits/rejected": -2.148250102996826, |
|
"logps/chosen": -736.1185913085938, |
|
"logps/rejected": -1234.297607421875, |
|
"loss": 0.2572, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.458459854125977, |
|
"rewards/margins": 4.413580894470215, |
|
"rewards/rejected": -9.872041702270508, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8575653031049778, |
|
"grad_norm": 34.61319327349877, |
|
"learning_rate": 3.013156219837776e-08, |
|
"logits/chosen": -2.418109655380249, |
|
"logits/rejected": -2.1507232189178467, |
|
"logps/chosen": -754.8319091796875, |
|
"logps/rejected": -1258.600830078125, |
|
"loss": 0.2446, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.640649795532227, |
|
"rewards/margins": 4.45644998550415, |
|
"rewards/rejected": -10.097099304199219, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8674223755544603, |
|
"grad_norm": 17.275605835829435, |
|
"learning_rate": 2.6164035312078447e-08, |
|
"logits/chosen": -2.610421657562256, |
|
"logits/rejected": -2.200122356414795, |
|
"logps/chosen": -775.4208984375, |
|
"logps/rejected": -1280.5159912109375, |
|
"loss": 0.2162, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.812338829040527, |
|
"rewards/margins": 4.107577800750732, |
|
"rewards/rejected": -9.919916152954102, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8772794480039429, |
|
"grad_norm": 19.69767066811732, |
|
"learning_rate": 2.2462089419165776e-08, |
|
"logits/chosen": -2.454554319381714, |
|
"logits/rejected": -2.129283905029297, |
|
"logps/chosen": -772.8093872070312, |
|
"logps/rejected": -1197.3636474609375, |
|
"loss": 0.2623, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.787473201751709, |
|
"rewards/margins": 3.7656428813934326, |
|
"rewards/rejected": -9.553116798400879, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8871365204534253, |
|
"grad_norm": 16.68762717345223, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"logits/chosen": -2.3323419094085693, |
|
"logits/rejected": -2.1572489738464355, |
|
"logps/chosen": -730.7281494140625, |
|
"logps/rejected": -1192.001220703125, |
|
"loss": 0.2348, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.397282600402832, |
|
"rewards/margins": 4.056326866149902, |
|
"rewards/rejected": -9.453609466552734, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8969935929029078, |
|
"grad_norm": 20.379506675051896, |
|
"learning_rate": 1.5872189700736337e-08, |
|
"logits/chosen": -2.3889849185943604, |
|
"logits/rejected": -2.237183094024658, |
|
"logps/chosen": -754.2752075195312, |
|
"logps/rejected": -1230.703125, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.620961666107178, |
|
"rewards/margins": 4.211625099182129, |
|
"rewards/rejected": -9.832587242126465, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9068506653523903, |
|
"grad_norm": 17.181830991927878, |
|
"learning_rate": 1.2992054780085692e-08, |
|
"logits/chosen": -2.495082139968872, |
|
"logits/rejected": -2.1834959983825684, |
|
"logps/chosen": -710.3396606445312, |
|
"logps/rejected": -1224.693603515625, |
|
"loss": 0.2459, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.253153324127197, |
|
"rewards/margins": 4.523493766784668, |
|
"rewards/rejected": -9.776647567749023, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9167077378018729, |
|
"grad_norm": 17.458158491525015, |
|
"learning_rate": 1.0393129385436823e-08, |
|
"logits/chosen": -2.5279664993286133, |
|
"logits/rejected": -2.2738842964172363, |
|
"logps/chosen": -760.11962890625, |
|
"logps/rejected": -1247.168212890625, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.765726566314697, |
|
"rewards/margins": 4.349237442016602, |
|
"rewards/rejected": -10.11496353149414, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9265648102513554, |
|
"grad_norm": 22.242802721359375, |
|
"learning_rate": 8.078497137373242e-09, |
|
"logits/chosen": -2.6163723468780518, |
|
"logits/rejected": -2.2263712882995605, |
|
"logps/chosen": -774.3194580078125, |
|
"logps/rejected": -1314.293212890625, |
|
"loss": 0.2375, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.78206205368042, |
|
"rewards/margins": 4.660614490509033, |
|
"rewards/rejected": -10.442676544189453, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9364218827008378, |
|
"grad_norm": 17.50981352291082, |
|
"learning_rate": 6.0509043431410945e-09, |
|
"logits/chosen": -2.4221930503845215, |
|
"logits/rejected": -2.220930814743042, |
|
"logps/chosen": -804.204345703125, |
|
"logps/rejected": -1225.1212158203125, |
|
"loss": 0.2554, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.063734531402588, |
|
"rewards/margins": 3.7845940589904785, |
|
"rewards/rejected": -9.848328590393066, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9462789551503203, |
|
"grad_norm": 19.77591605111257, |
|
"learning_rate": 4.312756738160145e-09, |
|
"logits/chosen": -2.5149528980255127, |
|
"logits/rejected": -2.154731512069702, |
|
"logps/chosen": -768.4055786132812, |
|
"logps/rejected": -1274.7755126953125, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.735711574554443, |
|
"rewards/margins": 4.444643974304199, |
|
"rewards/rejected": -10.180355072021484, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9561360275998029, |
|
"grad_norm": 16.377470184235065, |
|
"learning_rate": 2.8661166316229223e-09, |
|
"logits/chosen": -2.3629002571105957, |
|
"logits/rejected": -2.151808738708496, |
|
"logps/chosen": -723.3502197265625, |
|
"logps/rejected": -1148.914794921875, |
|
"loss": 0.2446, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.385800361633301, |
|
"rewards/margins": 3.7826755046844482, |
|
"rewards/rejected": -9.168476104736328, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9659931000492854, |
|
"grad_norm": 15.391953903269371, |
|
"learning_rate": 1.7127004595681727e-09, |
|
"logits/chosen": -2.5323967933654785, |
|
"logits/rejected": -2.1350226402282715, |
|
"logps/chosen": -750.980712890625, |
|
"logps/rejected": -1350.3404541015625, |
|
"loss": 0.2446, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.573115348815918, |
|
"rewards/margins": 5.145482063293457, |
|
"rewards/rejected": -10.718597412109375, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9758501724987678, |
|
"grad_norm": 16.723832751333937, |
|
"learning_rate": 8.538767483325383e-10, |
|
"logits/chosen": -2.532517433166504, |
|
"logits/rejected": -2.180654525756836, |
|
"logps/chosen": -762.9144287109375, |
|
"logps/rejected": -1284.332763671875, |
|
"loss": 0.2173, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.670698165893555, |
|
"rewards/margins": 4.554699897766113, |
|
"rewards/rejected": -10.225398063659668, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9857072449482503, |
|
"grad_norm": 26.735630621648028, |
|
"learning_rate": 2.9066449079634404e-10, |
|
"logits/chosen": -2.5005085468292236, |
|
"logits/rejected": -2.2136847972869873, |
|
"logps/chosen": -738.4940185546875, |
|
"logps/rejected": -1214.4185791015625, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.457156658172607, |
|
"rewards/margins": 4.181014060974121, |
|
"rewards/rejected": -9.63817024230957, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9955643173977329, |
|
"grad_norm": 17.938477728126337, |
|
"learning_rate": 2.3731937350224273e-11, |
|
"logits/chosen": -2.449402093887329, |
|
"logits/rejected": -2.1034648418426514, |
|
"logps/chosen": -783.5701293945312, |
|
"logps/rejected": -1268.8685302734375, |
|
"loss": 0.2476, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.870804786682129, |
|
"rewards/margins": 4.206555366516113, |
|
"rewards/rejected": -10.077360153198242, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9995071463775259, |
|
"step": 507, |
|
"total_flos": 0.0, |
|
"train_loss": 0.30356378627011527, |
|
"train_runtime": 18867.8748, |
|
"train_samples_per_second": 3.441, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 507, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|