|
{ |
|
"best_metric": 0.8975116014480591, |
|
"best_model_checkpoint": "saves/Mistral-7B-Instruct-v0.2/lora/orpo/checkpoint-1500", |
|
"epoch": 2.997999555456768, |
|
"eval_steps": 500, |
|
"global_step": 1686, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017781729273171815, |
|
"grad_norm": 3.822810173034668, |
|
"learning_rate": 4.9995745934141085e-06, |
|
"logits/chosen": -2.5146775245666504, |
|
"logits/rejected": -2.5178749561309814, |
|
"logps/chosen": -1.456717848777771, |
|
"logps/rejected": -2.2312474250793457, |
|
"loss": 1.542, |
|
"odds_ratio_loss": 0.853233814239502, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1456717997789383, |
|
"rewards/margins": 0.07745292782783508, |
|
"rewards/rejected": -0.22312471270561218, |
|
"sft_loss": 1.456717848777771, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03556345854634363, |
|
"grad_norm": 37.87593078613281, |
|
"learning_rate": 4.9982812903243405e-06, |
|
"logits/chosen": -2.5083491802215576, |
|
"logits/rejected": -2.5308327674865723, |
|
"logps/chosen": -1.7510416507720947, |
|
"logps/rejected": -1.991346001625061, |
|
"loss": 1.8501, |
|
"odds_ratio_loss": 0.9901398420333862, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.17510415613651276, |
|
"rewards/margins": 0.024030480533838272, |
|
"rewards/rejected": -0.19913461804389954, |
|
"sft_loss": 1.7510416507720947, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05334518781951545, |
|
"grad_norm": 4.208436012268066, |
|
"learning_rate": 4.996120496405222e-06, |
|
"logits/chosen": -2.520888090133667, |
|
"logits/rejected": -2.4970860481262207, |
|
"logps/chosen": -1.4711406230926514, |
|
"logps/rejected": -2.380532741546631, |
|
"loss": 1.5428, |
|
"odds_ratio_loss": 0.7161206603050232, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.14711406826972961, |
|
"rewards/margins": 0.09093920141458511, |
|
"rewards/rejected": -0.23805327713489532, |
|
"sft_loss": 1.4711406230926514, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07112691709268726, |
|
"grad_norm": 5.997638702392578, |
|
"learning_rate": 4.99309296196014e-06, |
|
"logits/chosen": -2.5280556678771973, |
|
"logits/rejected": -2.552898645401001, |
|
"logps/chosen": -1.3411036729812622, |
|
"logps/rejected": -1.9365333318710327, |
|
"loss": 1.4122, |
|
"odds_ratio_loss": 0.711218535900116, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13411036133766174, |
|
"rewards/margins": 0.059542976319789886, |
|
"rewards/rejected": -0.19365334510803223, |
|
"sft_loss": 1.3411036729812622, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08890864636585907, |
|
"grad_norm": 6.682967185974121, |
|
"learning_rate": 4.989199738255166e-06, |
|
"logits/chosen": -2.5629172325134277, |
|
"logits/rejected": -2.5831732749938965, |
|
"logps/chosen": -1.3399975299835205, |
|
"logps/rejected": -1.9187812805175781, |
|
"loss": 1.4155, |
|
"odds_ratio_loss": 0.7554818391799927, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.133999764919281, |
|
"rewards/margins": 0.05787837505340576, |
|
"rewards/rejected": -0.19187815487384796, |
|
"sft_loss": 1.3399975299835205, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1066903756390309, |
|
"grad_norm": 3.520189046859741, |
|
"learning_rate": 4.984442177154031e-06, |
|
"logits/chosen": -2.5552244186401367, |
|
"logits/rejected": -2.5440051555633545, |
|
"logps/chosen": -1.3248951435089111, |
|
"logps/rejected": -1.8020858764648438, |
|
"loss": 1.4047, |
|
"odds_ratio_loss": 0.7983237504959106, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.13248953223228455, |
|
"rewards/margins": 0.047719065099954605, |
|
"rewards/rejected": -0.18020859360694885, |
|
"sft_loss": 1.3248951435089111, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12447210491220272, |
|
"grad_norm": 5.902307987213135, |
|
"learning_rate": 4.978821930648704e-06, |
|
"logits/chosen": -2.6206307411193848, |
|
"logits/rejected": -2.6300582885742188, |
|
"logps/chosen": -1.4133307933807373, |
|
"logps/rejected": -1.5712898969650269, |
|
"loss": 1.5087, |
|
"odds_ratio_loss": 0.9536682963371277, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.14133310317993164, |
|
"rewards/margins": 0.01579589582979679, |
|
"rewards/rejected": -0.15712900459766388, |
|
"sft_loss": 1.4133307933807373, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14225383418537452, |
|
"grad_norm": 6.316813945770264, |
|
"learning_rate": 4.97234095028576e-06, |
|
"logits/chosen": -2.6114909648895264, |
|
"logits/rejected": -2.6050069332122803, |
|
"logps/chosen": -1.1831285953521729, |
|
"logps/rejected": -1.6871837377548218, |
|
"loss": 1.2502, |
|
"odds_ratio_loss": 0.670314371585846, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11831285059452057, |
|
"rewards/margins": 0.050405532121658325, |
|
"rewards/rejected": -0.1687183678150177, |
|
"sft_loss": 1.1831285953521729, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16003556345854633, |
|
"grad_norm": 2.3047008514404297, |
|
"learning_rate": 4.965001486488743e-06, |
|
"logits/chosen": -2.5930087566375732, |
|
"logits/rejected": -2.589054584503174, |
|
"logps/chosen": -1.0415079593658447, |
|
"logps/rejected": -1.5394554138183594, |
|
"loss": 1.1006, |
|
"odds_ratio_loss": 0.5907732248306274, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10415079444646835, |
|
"rewards/margins": 0.049794748425483704, |
|
"rewards/rejected": -0.15394553542137146, |
|
"sft_loss": 1.0415079593658447, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17781729273171815, |
|
"grad_norm": 3.8059961795806885, |
|
"learning_rate": 4.956806087776732e-06, |
|
"logits/chosen": -2.6263413429260254, |
|
"logits/rejected": -2.6199755668640137, |
|
"logps/chosen": -1.0670406818389893, |
|
"logps/rejected": -2.051734447479248, |
|
"loss": 1.1262, |
|
"odds_ratio_loss": 0.5915437340736389, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10670407116413116, |
|
"rewards/margins": 0.09846936166286469, |
|
"rewards/rejected": -0.20517341792583466, |
|
"sft_loss": 1.0670406818389893, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19559902200489, |
|
"grad_norm": 13.373092651367188, |
|
"learning_rate": 4.947757599879411e-06, |
|
"logits/chosen": -2.6753718852996826, |
|
"logits/rejected": -2.694586992263794, |
|
"logps/chosen": -1.2387793064117432, |
|
"logps/rejected": -1.7975523471832275, |
|
"loss": 1.3076, |
|
"odds_ratio_loss": 0.6879797577857971, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12387792766094208, |
|
"rewards/margins": 0.05587731674313545, |
|
"rewards/rejected": -0.17975525557994843, |
|
"sft_loss": 1.2387793064117432, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2133807512780618, |
|
"grad_norm": 3.2554187774658203, |
|
"learning_rate": 4.937859164748931e-06, |
|
"logits/chosen": -2.7004992961883545, |
|
"logits/rejected": -2.7187867164611816, |
|
"logps/chosen": -0.9789896011352539, |
|
"logps/rejected": -1.210654854774475, |
|
"loss": 1.0426, |
|
"odds_ratio_loss": 0.6358731985092163, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09789897501468658, |
|
"rewards/margins": 0.023166518658399582, |
|
"rewards/rejected": -0.12106549739837646, |
|
"sft_loss": 0.9789896011352539, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23116248055123362, |
|
"grad_norm": 3.08980393409729, |
|
"learning_rate": 4.92711421946891e-06, |
|
"logits/chosen": -2.654874563217163, |
|
"logits/rejected": -2.6488099098205566, |
|
"logps/chosen": -1.0345842838287354, |
|
"logps/rejected": -1.5608960390090942, |
|
"loss": 1.0981, |
|
"odds_ratio_loss": 0.6348952054977417, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10345842689275742, |
|
"rewards/margins": 0.052631180733442307, |
|
"rewards/rejected": -0.15608961880207062, |
|
"sft_loss": 1.0345842838287354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24894420982440543, |
|
"grad_norm": 2.4850211143493652, |
|
"learning_rate": 4.915526495060961e-06, |
|
"logits/chosen": -2.7222416400909424, |
|
"logits/rejected": -2.716777801513672, |
|
"logps/chosen": -0.9182409048080444, |
|
"logps/rejected": -1.3934853076934814, |
|
"loss": 0.9758, |
|
"odds_ratio_loss": 0.5755457878112793, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09182409942150116, |
|
"rewards/margins": 0.047524429857730865, |
|
"rewards/rejected": -0.13934853672981262, |
|
"sft_loss": 0.9182409048080444, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26672593909757725, |
|
"grad_norm": 2.675926446914673, |
|
"learning_rate": 4.903100015189153e-06, |
|
"logits/chosen": -2.693967342376709, |
|
"logits/rejected": -2.7088241577148438, |
|
"logps/chosen": -0.9448526501655579, |
|
"logps/rejected": -1.2143518924713135, |
|
"loss": 1.0096, |
|
"odds_ratio_loss": 0.6471182107925415, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.09448526054620743, |
|
"rewards/margins": 0.026949917897582054, |
|
"rewards/rejected": -0.12143518775701523, |
|
"sft_loss": 0.9448526501655579, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28450766837074903, |
|
"grad_norm": 2.688988208770752, |
|
"learning_rate": 4.889839094762848e-06, |
|
"logits/chosen": -2.6985411643981934, |
|
"logits/rejected": -2.6904616355895996, |
|
"logps/chosen": -0.9669437408447266, |
|
"logps/rejected": -1.2702093124389648, |
|
"loss": 1.0307, |
|
"odds_ratio_loss": 0.6373674869537354, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09669436514377594, |
|
"rewards/margins": 0.030326563864946365, |
|
"rewards/rejected": -0.1270209401845932, |
|
"sft_loss": 0.9669437408447266, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3022893976439209, |
|
"grad_norm": 2.4917891025543213, |
|
"learning_rate": 4.875748338438416e-06, |
|
"logits/chosen": -2.7611794471740723, |
|
"logits/rejected": -2.761101722717285, |
|
"logps/chosen": -1.059195876121521, |
|
"logps/rejected": -1.361277461051941, |
|
"loss": 1.1294, |
|
"odds_ratio_loss": 0.7024275064468384, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10591959953308105, |
|
"rewards/margins": 0.03020814061164856, |
|
"rewards/rejected": -0.13612774014472961, |
|
"sft_loss": 1.059195876121521, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.32007112691709266, |
|
"grad_norm": 1.744297981262207, |
|
"learning_rate": 4.8608326390203386e-06, |
|
"logits/chosen": -2.774660348892212, |
|
"logits/rejected": -2.7589707374572754, |
|
"logps/chosen": -0.9749459028244019, |
|
"logps/rejected": -1.2964599132537842, |
|
"loss": 1.0378, |
|
"odds_ratio_loss": 0.6283431053161621, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.09749459475278854, |
|
"rewards/margins": 0.03215140849351883, |
|
"rewards/rejected": -0.12964601814746857, |
|
"sft_loss": 0.9749459028244019, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3378528561902645, |
|
"grad_norm": 3.190150022506714, |
|
"learning_rate": 4.845097175762251e-06, |
|
"logits/chosen": -2.794633626937866, |
|
"logits/rejected": -2.8166775703430176, |
|
"logps/chosen": -1.0003185272216797, |
|
"logps/rejected": -1.2995705604553223, |
|
"loss": 1.0657, |
|
"odds_ratio_loss": 0.6539334058761597, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.10003185272216797, |
|
"rewards/margins": 0.029925208538770676, |
|
"rewards/rejected": -0.12995705008506775, |
|
"sft_loss": 1.0003185272216797, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3556345854634363, |
|
"grad_norm": 11.579193115234375, |
|
"learning_rate": 4.8285474125685286e-06, |
|
"logits/chosen": -2.8008620738983154, |
|
"logits/rejected": -2.8179948329925537, |
|
"logps/chosen": -0.9980157017707825, |
|
"logps/rejected": -1.1520756483078003, |
|
"loss": 1.0677, |
|
"odds_ratio_loss": 0.6971891522407532, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.09980158507823944, |
|
"rewards/margins": 0.015405992977321148, |
|
"rewards/rejected": -0.11520756781101227, |
|
"sft_loss": 0.9980157017707825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37341631473660813, |
|
"grad_norm": 1.7257640361785889, |
|
"learning_rate": 4.811189096097025e-06, |
|
"logits/chosen": -2.795761823654175, |
|
"logits/rejected": -2.815319776535034, |
|
"logps/chosen": -0.9524224996566772, |
|
"logps/rejected": -1.2786835432052612, |
|
"loss": 1.0165, |
|
"odds_ratio_loss": 0.6408416032791138, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09524224698543549, |
|
"rewards/margins": 0.032626111060380936, |
|
"rewards/rejected": -0.1278683841228485, |
|
"sft_loss": 0.9524224996566772, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.39119804400978, |
|
"grad_norm": 3.106210231781006, |
|
"learning_rate": 4.793028253763633e-06, |
|
"logits/chosen": -2.8339433670043945, |
|
"logits/rejected": -2.840125799179077, |
|
"logps/chosen": -0.9467460513114929, |
|
"logps/rejected": -1.1728994846343994, |
|
"loss": 1.0181, |
|
"odds_ratio_loss": 0.7131101489067078, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09467460960149765, |
|
"rewards/margins": 0.02261533960700035, |
|
"rewards/rejected": -0.1172899454832077, |
|
"sft_loss": 0.9467460513114929, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.40897977328295176, |
|
"grad_norm": 4.078444957733154, |
|
"learning_rate": 4.774071191649352e-06, |
|
"logits/chosen": -2.811408519744873, |
|
"logits/rejected": -2.8089513778686523, |
|
"logps/chosen": -0.9588619470596313, |
|
"logps/rejected": -1.2887790203094482, |
|
"loss": 1.019, |
|
"odds_ratio_loss": 0.6018227934837341, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09588618576526642, |
|
"rewards/margins": 0.032991714775562286, |
|
"rewards/rejected": -0.1288779079914093, |
|
"sft_loss": 0.9588619470596313, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4267615025561236, |
|
"grad_norm": 3.046936273574829, |
|
"learning_rate": 4.7543244923105975e-06, |
|
"logits/chosen": -2.8249118328094482, |
|
"logits/rejected": -2.851069927215576, |
|
"logps/chosen": -0.9376036524772644, |
|
"logps/rejected": -1.0390188694000244, |
|
"loss": 1.0087, |
|
"odds_ratio_loss": 0.7111011147499084, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09376037865877151, |
|
"rewards/margins": 0.010141508653759956, |
|
"rewards/rejected": -0.10390187799930573, |
|
"sft_loss": 0.9376036524772644, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4445432318292954, |
|
"grad_norm": 1.7991715669631958, |
|
"learning_rate": 4.733795012493506e-06, |
|
"logits/chosen": -2.8495593070983887, |
|
"logits/rejected": -2.889249086380005, |
|
"logps/chosen": -1.0128796100616455, |
|
"logps/rejected": -1.202759861946106, |
|
"loss": 1.0815, |
|
"odds_ratio_loss": 0.6860855221748352, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10128796100616455, |
|
"rewards/margins": 0.018988018855452538, |
|
"rewards/rejected": -0.12027599662542343, |
|
"sft_loss": 1.0128796100616455, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46232496110246724, |
|
"grad_norm": 1.7875176668167114, |
|
"learning_rate": 4.712489880753035e-06, |
|
"logits/chosen": -2.814908742904663, |
|
"logits/rejected": -2.801401138305664, |
|
"logps/chosen": -0.8547848463058472, |
|
"logps/rejected": -1.1043689250946045, |
|
"loss": 0.9165, |
|
"odds_ratio_loss": 0.6170121431350708, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08547849208116531, |
|
"rewards/margins": 0.02495841309428215, |
|
"rewards/rejected": -0.11043689399957657, |
|
"sft_loss": 0.8547848463058472, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.480106690375639, |
|
"grad_norm": 1.5513663291931152, |
|
"learning_rate": 4.690416494977673e-06, |
|
"logits/chosen": -2.804611921310425, |
|
"logits/rejected": -2.835052251815796, |
|
"logps/chosen": -0.8691908717155457, |
|
"logps/rejected": -1.2830617427825928, |
|
"loss": 0.9283, |
|
"odds_ratio_loss": 0.5908734202384949, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08691909164190292, |
|
"rewards/margins": 0.04138708487153053, |
|
"rewards/rejected": -0.12830618023872375, |
|
"sft_loss": 0.8691908717155457, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.49788841964881086, |
|
"grad_norm": 1.2906347513198853, |
|
"learning_rate": 4.667582519820639e-06, |
|
"logits/chosen": -2.8521058559417725, |
|
"logits/rejected": -2.8685240745544434, |
|
"logps/chosen": -0.9744144678115845, |
|
"logps/rejected": -1.0906678438186646, |
|
"loss": 1.0455, |
|
"odds_ratio_loss": 0.7107566595077515, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09744144976139069, |
|
"rewards/margins": 0.011625329963862896, |
|
"rewards/rejected": -0.10906676948070526, |
|
"sft_loss": 0.9744144678115845, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5156701489219827, |
|
"grad_norm": 3.3145501613616943, |
|
"learning_rate": 4.643995884038443e-06, |
|
"logits/chosen": -2.861464023590088, |
|
"logits/rejected": -2.889545440673828, |
|
"logps/chosen": -0.930392861366272, |
|
"logps/rejected": -1.140458345413208, |
|
"loss": 0.9957, |
|
"odds_ratio_loss": 0.6526821255683899, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09303927421569824, |
|
"rewards/margins": 0.021006565541028976, |
|
"rewards/rejected": -0.11404584348201752, |
|
"sft_loss": 0.930392861366272, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5334518781951545, |
|
"grad_norm": 1.5059245824813843, |
|
"learning_rate": 4.6196647777377475e-06, |
|
"logits/chosen": -2.825852870941162, |
|
"logits/rejected": -2.8473992347717285, |
|
"logps/chosen": -0.9365053176879883, |
|
"logps/rejected": -1.1387995481491089, |
|
"loss": 1.0047, |
|
"odds_ratio_loss": 0.6818416714668274, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09365053474903107, |
|
"rewards/margins": 0.02022942714393139, |
|
"rewards/rejected": -0.1138799637556076, |
|
"sft_loss": 0.9365053176879883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5512336074683263, |
|
"grad_norm": 1.2869160175323486, |
|
"learning_rate": 4.59459764953147e-06, |
|
"logits/chosen": -2.8529794216156006, |
|
"logits/rejected": -2.84968900680542, |
|
"logps/chosen": -0.9238570928573608, |
|
"logps/rejected": -1.1744383573532104, |
|
"loss": 0.9884, |
|
"odds_ratio_loss": 0.6458603739738464, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09238572418689728, |
|
"rewards/margins": 0.025058116763830185, |
|
"rewards/rejected": -0.11744382232427597, |
|
"sft_loss": 0.9238570928573608, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5690153367414981, |
|
"grad_norm": 2.306725025177002, |
|
"learning_rate": 4.568803203605133e-06, |
|
"logits/chosen": -2.8887312412261963, |
|
"logits/rejected": -2.8795981407165527, |
|
"logps/chosen": -0.8577371835708618, |
|
"logps/rejected": -1.1274560689926147, |
|
"loss": 0.9223, |
|
"odds_ratio_loss": 0.6451677680015564, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08577371388673782, |
|
"rewards/margins": 0.02697189524769783, |
|
"rewards/rejected": -0.11274560540914536, |
|
"sft_loss": 0.8577371835708618, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.58679706601467, |
|
"grad_norm": 1.8801417350769043, |
|
"learning_rate": 4.542290396694462e-06, |
|
"logits/chosen": -2.875821828842163, |
|
"logits/rejected": -2.8778319358825684, |
|
"logps/chosen": -0.9018943905830383, |
|
"logps/rejected": -1.1100924015045166, |
|
"loss": 0.9709, |
|
"odds_ratio_loss": 0.6898372769355774, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09018944203853607, |
|
"rewards/margins": 0.020819801837205887, |
|
"rewards/rejected": -0.11100924015045166, |
|
"sft_loss": 0.9018943905830383, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6045787952878418, |
|
"grad_norm": 11.704841613769531, |
|
"learning_rate": 4.515068434975298e-06, |
|
"logits/chosen": -2.8479015827178955, |
|
"logits/rejected": -2.862330675125122, |
|
"logps/chosen": -0.975223183631897, |
|
"logps/rejected": -1.3013463020324707, |
|
"loss": 1.0369, |
|
"odds_ratio_loss": 0.6164206266403198, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0975223109126091, |
|
"rewards/margins": 0.03261232003569603, |
|
"rewards/rejected": -0.13013465702533722, |
|
"sft_loss": 0.975223183631897, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6223605245610135, |
|
"grad_norm": 1.487052321434021, |
|
"learning_rate": 4.487146770866887e-06, |
|
"logits/chosen": -2.872251510620117, |
|
"logits/rejected": -2.896749973297119, |
|
"logps/chosen": -0.9074058532714844, |
|
"logps/rejected": -1.0781655311584473, |
|
"loss": 0.9736, |
|
"odds_ratio_loss": 0.6616368293762207, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09074059128761292, |
|
"rewards/margins": 0.01707596145570278, |
|
"rewards/rejected": -0.10781653970479965, |
|
"sft_loss": 0.9074058532714844, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6401422538341853, |
|
"grad_norm": 2.0436906814575195, |
|
"learning_rate": 4.458535099749666e-06, |
|
"logits/chosen": -2.8917770385742188, |
|
"logits/rejected": -2.9039359092712402, |
|
"logps/chosen": -0.9911856651306152, |
|
"logps/rejected": -1.1147364377975464, |
|
"loss": 1.0655, |
|
"odds_ratio_loss": 0.7427235245704651, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09911856800317764, |
|
"rewards/margins": 0.012355070561170578, |
|
"rewards/rejected": -0.11147364228963852, |
|
"sft_loss": 0.9911856651306152, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6579239831073572, |
|
"grad_norm": 3.131418228149414, |
|
"learning_rate": 4.429243356598694e-06, |
|
"logits/chosen": -2.861708641052246, |
|
"logits/rejected": -2.865683078765869, |
|
"logps/chosen": -0.9219058156013489, |
|
"logps/rejected": -1.2925573587417603, |
|
"loss": 0.9872, |
|
"odds_ratio_loss": 0.6528664231300354, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09219057857990265, |
|
"rewards/margins": 0.03706515580415726, |
|
"rewards/rejected": -0.1292557418346405, |
|
"sft_loss": 0.9219058156013489, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.675705712380529, |
|
"grad_norm": 2.098020076751709, |
|
"learning_rate": 4.399281712533875e-06, |
|
"logits/chosen": -2.8784518241882324, |
|
"logits/rejected": -2.8920931816101074, |
|
"logps/chosen": -0.857374370098114, |
|
"logps/rejected": -1.0407134294509888, |
|
"loss": 0.9257, |
|
"odds_ratio_loss": 0.6832669377326965, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0857374519109726, |
|
"rewards/margins": 0.018333889544010162, |
|
"rewards/rejected": -0.10407133400440216, |
|
"sft_loss": 0.857374370098114, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6934874416537008, |
|
"grad_norm": 2.2306644916534424, |
|
"learning_rate": 4.368660571288192e-06, |
|
"logits/chosen": -2.8683114051818848, |
|
"logits/rejected": -2.8982186317443848, |
|
"logps/chosen": -0.902807891368866, |
|
"logps/rejected": -1.0363978147506714, |
|
"loss": 0.9727, |
|
"odds_ratio_loss": 0.6994079947471619, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09028079360723495, |
|
"rewards/margins": 0.013359000906348228, |
|
"rewards/rejected": -0.10363979637622833, |
|
"sft_loss": 0.902807891368866, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7112691709268726, |
|
"grad_norm": 1.5744704008102417, |
|
"learning_rate": 4.337390565595163e-06, |
|
"logits/chosen": -2.857945680618286, |
|
"logits/rejected": -2.8643996715545654, |
|
"logps/chosen": -0.9818795919418335, |
|
"logps/rejected": -1.0570474863052368, |
|
"loss": 1.0562, |
|
"odds_ratio_loss": 0.743161678314209, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09818795323371887, |
|
"rewards/margins": 0.007516802754253149, |
|
"rewards/rejected": -0.10570474714040756, |
|
"sft_loss": 0.9818795919418335, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7290509002000445, |
|
"grad_norm": 2.6117560863494873, |
|
"learning_rate": 4.305482553496786e-06, |
|
"logits/chosen": -2.798802137374878, |
|
"logits/rejected": -2.823525905609131, |
|
"logps/chosen": -0.8416363596916199, |
|
"logps/rejected": -1.0808361768722534, |
|
"loss": 0.9042, |
|
"odds_ratio_loss": 0.6256502270698547, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08416363596916199, |
|
"rewards/margins": 0.023919973522424698, |
|
"rewards/rejected": -0.10808360576629639, |
|
"sft_loss": 0.8416363596916199, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7468326294732163, |
|
"grad_norm": 2.1309070587158203, |
|
"learning_rate": 4.272947614573244e-06, |
|
"logits/chosen": -2.8514039516448975, |
|
"logits/rejected": -2.8706977367401123, |
|
"logps/chosen": -0.935457706451416, |
|
"logps/rejected": -1.069616436958313, |
|
"loss": 1.0031, |
|
"odds_ratio_loss": 0.6768142580986023, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09354577958583832, |
|
"rewards/margins": 0.013415870256721973, |
|
"rewards/rejected": -0.10696164518594742, |
|
"sft_loss": 0.935457706451416, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7646143587463881, |
|
"grad_norm": 1.1241884231567383, |
|
"learning_rate": 4.23979704609569e-06, |
|
"logits/chosen": -2.858297824859619, |
|
"logits/rejected": -2.8802642822265625, |
|
"logps/chosen": -0.8784758448600769, |
|
"logps/rejected": -1.0237103700637817, |
|
"loss": 0.9426, |
|
"odds_ratio_loss": 0.641250491142273, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08784758299589157, |
|
"rewards/margins": 0.01452344935387373, |
|
"rewards/rejected": -0.10237103700637817, |
|
"sft_loss": 0.8784758448600769, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.78239608801956, |
|
"grad_norm": 1.7693474292755127, |
|
"learning_rate": 4.206042359103435e-06, |
|
"logits/chosen": -2.8127472400665283, |
|
"logits/rejected": -2.8374340534210205, |
|
"logps/chosen": -0.933555006980896, |
|
"logps/rejected": -1.194852352142334, |
|
"loss": 0.9991, |
|
"odds_ratio_loss": 0.6553630828857422, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09335552155971527, |
|
"rewards/margins": 0.02612972818315029, |
|
"rewards/rejected": -0.11948523670434952, |
|
"sft_loss": 0.933555006980896, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8001778172927317, |
|
"grad_norm": 1.5750656127929688, |
|
"learning_rate": 4.17169527440691e-06, |
|
"logits/chosen": -2.851017475128174, |
|
"logits/rejected": -2.85324764251709, |
|
"logps/chosen": -0.8872865438461304, |
|
"logps/rejected": -1.0435672998428345, |
|
"loss": 0.9571, |
|
"odds_ratio_loss": 0.6983404755592346, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0887286514043808, |
|
"rewards/margins": 0.015628065913915634, |
|
"rewards/rejected": -0.10435672104358673, |
|
"sft_loss": 0.8872865438461304, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8179595465659035, |
|
"grad_norm": 2.6487419605255127, |
|
"learning_rate": 4.136767718517797e-06, |
|
"logits/chosen": -2.8638827800750732, |
|
"logits/rejected": -2.885394811630249, |
|
"logps/chosen": -0.8147961497306824, |
|
"logps/rejected": -1.0451968908309937, |
|
"loss": 0.8763, |
|
"odds_ratio_loss": 0.6151142120361328, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08147960901260376, |
|
"rewards/margins": 0.023040082305669785, |
|
"rewards/rejected": -0.10451970249414444, |
|
"sft_loss": 0.8147961497306824, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8357412758390753, |
|
"grad_norm": 8.826119422912598, |
|
"learning_rate": 4.1012718195077196e-06, |
|
"logits/chosen": -2.907290458679199, |
|
"logits/rejected": -2.9450554847717285, |
|
"logps/chosen": -0.8985522985458374, |
|
"logps/rejected": -1.0472660064697266, |
|
"loss": 0.9656, |
|
"odds_ratio_loss": 0.6701352000236511, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08985523879528046, |
|
"rewards/margins": 0.014871363528072834, |
|
"rewards/rejected": -0.10472659766674042, |
|
"sft_loss": 0.8985522985458374, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8535230051122472, |
|
"grad_norm": 1.3586628437042236, |
|
"learning_rate": 4.065219902796953e-06, |
|
"logits/chosen": -2.864061117172241, |
|
"logits/rejected": -2.8628530502319336, |
|
"logps/chosen": -0.8511291742324829, |
|
"logps/rejected": -1.0970637798309326, |
|
"loss": 0.9158, |
|
"odds_ratio_loss": 0.646445631980896, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08511291444301605, |
|
"rewards/margins": 0.024593474343419075, |
|
"rewards/rejected": -0.10970638692378998, |
|
"sft_loss": 0.8511291742324829, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.871304734385419, |
|
"grad_norm": 1.2425549030303955, |
|
"learning_rate": 4.028624486874608e-06, |
|
"logits/chosen": -2.8780291080474854, |
|
"logits/rejected": -2.901134729385376, |
|
"logps/chosen": -0.851506233215332, |
|
"logps/rejected": -1.129828691482544, |
|
"loss": 0.9181, |
|
"odds_ratio_loss": 0.6657174825668335, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08515061438083649, |
|
"rewards/margins": 0.02783224917948246, |
|
"rewards/rejected": -0.1129828691482544, |
|
"sft_loss": 0.851506233215332, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8890864636585908, |
|
"grad_norm": 1.8243030309677124, |
|
"learning_rate": 3.99149827895177e-06, |
|
"logits/chosen": -2.9000747203826904, |
|
"logits/rejected": -2.9058332443237305, |
|
"logps/chosen": -0.9306007623672485, |
|
"logps/rejected": -1.053107500076294, |
|
"loss": 1.0001, |
|
"odds_ratio_loss": 0.6953399777412415, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09306007623672485, |
|
"rewards/margins": 0.012250673025846481, |
|
"rewards/rejected": -0.10531075298786163, |
|
"sft_loss": 0.9306007623672485, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8890864636585908, |
|
"eval_logits/chosen": -2.8713040351867676, |
|
"eval_logits/rejected": -2.8936164379119873, |
|
"eval_logps/chosen": -0.8689887523651123, |
|
"eval_logps/rejected": -1.1122756004333496, |
|
"eval_loss": 0.9318326711654663, |
|
"eval_odds_ratio_loss": 0.6284381151199341, |
|
"eval_rewards/accuracies": 0.5920000076293945, |
|
"eval_rewards/chosen": -0.08689887821674347, |
|
"eval_rewards/margins": 0.024328680709004402, |
|
"eval_rewards/rejected": -0.11122756451368332, |
|
"eval_runtime": 189.293, |
|
"eval_samples_per_second": 5.283, |
|
"eval_sft_loss": 0.8689887523651123, |
|
"eval_steps_per_second": 2.641, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9068681929317626, |
|
"grad_norm": 1.853105902671814, |
|
"learning_rate": 3.953854170549114e-06, |
|
"logits/chosen": -2.8680295944213867, |
|
"logits/rejected": -2.8688206672668457, |
|
"logps/chosen": -0.9001534581184387, |
|
"logps/rejected": -1.0128238201141357, |
|
"loss": 0.9674, |
|
"odds_ratio_loss": 0.6725192070007324, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.09001535177230835, |
|
"rewards/margins": 0.011267038993537426, |
|
"rewards/rejected": -0.10128238052129745, |
|
"sft_loss": 0.9001534581184387, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9246499222049345, |
|
"grad_norm": 1.9707605838775635, |
|
"learning_rate": 3.91570523302051e-06, |
|
"logits/chosen": -2.8937981128692627, |
|
"logits/rejected": -2.9022488594055176, |
|
"logps/chosen": -0.8274223208427429, |
|
"logps/rejected": -1.105006217956543, |
|
"loss": 0.8928, |
|
"odds_ratio_loss": 0.6533216834068298, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08274223655462265, |
|
"rewards/margins": 0.027758393436670303, |
|
"rewards/rejected": -0.11050061881542206, |
|
"sft_loss": 0.8274223208427429, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9424316514781063, |
|
"grad_norm": 1.4241600036621094, |
|
"learning_rate": 3.8770647130141996e-06, |
|
"logits/chosen": -2.892141342163086, |
|
"logits/rejected": -2.8914198875427246, |
|
"logps/chosen": -0.875231921672821, |
|
"logps/rejected": -1.0876398086547852, |
|
"loss": 0.9415, |
|
"odds_ratio_loss": 0.662711501121521, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08752318471670151, |
|
"rewards/margins": 0.02124079503118992, |
|
"rewards/rejected": -0.10876397788524628, |
|
"sft_loss": 0.875231921672821, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.960213380751278, |
|
"grad_norm": 2.250497341156006, |
|
"learning_rate": 3.837946027873086e-06, |
|
"logits/chosen": -2.8456499576568604, |
|
"logits/rejected": -2.8566787242889404, |
|
"logps/chosen": -0.9233220815658569, |
|
"logps/rejected": -1.1583213806152344, |
|
"loss": 0.9895, |
|
"odds_ratio_loss": 0.6621237993240356, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.09233220666646957, |
|
"rewards/margins": 0.02349993959069252, |
|
"rewards/rejected": -0.11583214998245239, |
|
"sft_loss": 0.9233220815658569, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9779951100244498, |
|
"grad_norm": 5.65519905090332, |
|
"learning_rate": 3.7983627609757713e-06, |
|
"logits/chosen": -2.8913745880126953, |
|
"logits/rejected": -2.901867389678955, |
|
"logps/chosen": -0.9292505383491516, |
|
"logps/rejected": -1.059091329574585, |
|
"loss": 0.996, |
|
"odds_ratio_loss": 0.6679803133010864, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0929250493645668, |
|
"rewards/margins": 0.012984082102775574, |
|
"rewards/rejected": -0.10590913146734238, |
|
"sft_loss": 0.9292505383491516, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9957768392976217, |
|
"grad_norm": 1.4165409803390503, |
|
"learning_rate": 3.758328657019924e-06, |
|
"logits/chosen": -2.8648407459259033, |
|
"logits/rejected": -2.8767480850219727, |
|
"logps/chosen": -0.8623329997062683, |
|
"logps/rejected": -1.1223938465118408, |
|
"loss": 0.9301, |
|
"odds_ratio_loss": 0.6778011322021484, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08623330295085907, |
|
"rewards/margins": 0.026006082072854042, |
|
"rewards/rejected": -0.11223938316106796, |
|
"sft_loss": 0.8623329997062683, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0135585685707935, |
|
"grad_norm": 6.480414867401123, |
|
"learning_rate": 3.717857617249642e-06, |
|
"logits/chosen": -2.8424503803253174, |
|
"logits/rejected": -2.8727316856384277, |
|
"logps/chosen": -0.9302489161491394, |
|
"logps/rejected": -1.159745693206787, |
|
"loss": 0.9995, |
|
"odds_ratio_loss": 0.6924289464950562, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09302489459514618, |
|
"rewards/margins": 0.02294967696070671, |
|
"rewards/rejected": -0.11597456783056259, |
|
"sft_loss": 0.9302489161491394, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0313402978439654, |
|
"grad_norm": 1.2129720449447632, |
|
"learning_rate": 3.6769636946284543e-06, |
|
"logits/chosen": -2.8574068546295166, |
|
"logits/rejected": -2.8658154010772705, |
|
"logps/chosen": -0.8263272047042847, |
|
"logps/rejected": -1.041471242904663, |
|
"loss": 0.8894, |
|
"odds_ratio_loss": 0.6303838491439819, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08263272792100906, |
|
"rewards/margins": 0.02151440642774105, |
|
"rewards/rejected": -0.10414713621139526, |
|
"sft_loss": 0.8263272047042847, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.049122027117137, |
|
"grad_norm": 1.3920116424560547, |
|
"learning_rate": 3.6356610889596355e-06, |
|
"logits/chosen": -2.8504767417907715, |
|
"logits/rejected": -2.8750224113464355, |
|
"logps/chosen": -0.891506016254425, |
|
"logps/rejected": -1.043367624282837, |
|
"loss": 0.9578, |
|
"odds_ratio_loss": 0.6629586815834045, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08915059268474579, |
|
"rewards/margins": 0.01518617570400238, |
|
"rewards/rejected": -0.10433676093816757, |
|
"sft_loss": 0.891506016254425, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.066903756390309, |
|
"grad_norm": 1.6321930885314941, |
|
"learning_rate": 3.593964141955541e-06, |
|
"logits/chosen": -2.851910352706909, |
|
"logits/rejected": -2.856790065765381, |
|
"logps/chosen": -0.849289059638977, |
|
"logps/rejected": -0.9652984738349915, |
|
"loss": 0.9171, |
|
"odds_ratio_loss": 0.6785116195678711, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08492890745401382, |
|
"rewards/margins": 0.011600947007536888, |
|
"rewards/rejected": -0.09652985632419586, |
|
"sft_loss": 0.849289059638977, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0846854856634809, |
|
"grad_norm": 1.705749273300171, |
|
"learning_rate": 3.5518873322576573e-06, |
|
"logits/chosen": -2.8073315620422363, |
|
"logits/rejected": -2.8466951847076416, |
|
"logps/chosen": -0.8435554504394531, |
|
"logps/rejected": -1.0204975605010986, |
|
"loss": 0.9069, |
|
"odds_ratio_loss": 0.6330701112747192, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08435554802417755, |
|
"rewards/margins": 0.017694219946861267, |
|
"rewards/rejected": -0.10204975306987762, |
|
"sft_loss": 0.8435554504394531, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.1024672149366526, |
|
"grad_norm": 1.2050608396530151, |
|
"learning_rate": 3.5094452704091143e-06, |
|
"logits/chosen": -2.8757572174072266, |
|
"logits/rejected": -2.8627264499664307, |
|
"logps/chosen": -0.8311964273452759, |
|
"logps/rejected": -1.010647177696228, |
|
"loss": 0.8957, |
|
"odds_ratio_loss": 0.6448286175727844, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08311963826417923, |
|
"rewards/margins": 0.01794508285820484, |
|
"rewards/rejected": -0.10106471925973892, |
|
"sft_loss": 0.8311964273452759, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1202489442098245, |
|
"grad_norm": 2.8746261596679688, |
|
"learning_rate": 3.46665269378139e-06, |
|
"logits/chosen": -2.834296941757202, |
|
"logits/rejected": -2.8471195697784424, |
|
"logps/chosen": -0.8721610903739929, |
|
"logps/rejected": -1.0536631345748901, |
|
"loss": 0.9388, |
|
"odds_ratio_loss": 0.6662768125534058, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08721610903739929, |
|
"rewards/margins": 0.018150202929973602, |
|
"rewards/rejected": -0.1053663119673729, |
|
"sft_loss": 0.8721610903739929, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.1380306734829961, |
|
"grad_norm": 2.6889796257019043, |
|
"learning_rate": 3.4235244614569794e-06, |
|
"logits/chosen": -2.8430144786834717, |
|
"logits/rejected": -2.841323137283325, |
|
"logps/chosen": -0.9479130506515503, |
|
"logps/rejected": -1.0536433458328247, |
|
"loss": 1.0213, |
|
"odds_ratio_loss": 0.7333841919898987, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09479130804538727, |
|
"rewards/margins": 0.010573023930191994, |
|
"rewards/rejected": -0.10536432266235352, |
|
"sft_loss": 0.9479130506515503, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.155812402756168, |
|
"grad_norm": 1.1239742040634155, |
|
"learning_rate": 3.3800755490698008e-06, |
|
"logits/chosen": -2.867356777191162, |
|
"logits/rejected": -2.8678269386291504, |
|
"logps/chosen": -0.8813873529434204, |
|
"logps/rejected": -1.146475076675415, |
|
"loss": 0.9416, |
|
"odds_ratio_loss": 0.6019629240036011, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08813872933387756, |
|
"rewards/margins": 0.02650878205895424, |
|
"rewards/rejected": -0.1146475300192833, |
|
"sft_loss": 0.8813873529434204, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.17359413202934, |
|
"grad_norm": 1.5449692010879517, |
|
"learning_rate": 3.3363210436051287e-06, |
|
"logits/chosen": -2.8728928565979004, |
|
"logits/rejected": -2.8769795894622803, |
|
"logps/chosen": -0.9058928489685059, |
|
"logps/rejected": -1.1068971157073975, |
|
"loss": 0.975, |
|
"odds_ratio_loss": 0.6907466650009155, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09058928489685059, |
|
"rewards/margins": 0.0201004259288311, |
|
"rewards/rejected": -0.11068971455097198, |
|
"sft_loss": 0.9058928489685059, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1913758613025116, |
|
"grad_norm": 1.958606243133545, |
|
"learning_rate": 3.292276138160867e-06, |
|
"logits/chosen": -2.8424530029296875, |
|
"logits/rejected": -2.854606866836548, |
|
"logps/chosen": -0.8394180536270142, |
|
"logps/rejected": -1.0364609956741333, |
|
"loss": 0.9055, |
|
"odds_ratio_loss": 0.6611912250518799, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08394180238246918, |
|
"rewards/margins": 0.019704295322299004, |
|
"rewards/rejected": -0.10364609956741333, |
|
"sft_loss": 0.8394180536270142, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.2091575905756835, |
|
"grad_norm": 3.3439667224884033, |
|
"learning_rate": 3.2479561266719694e-06, |
|
"logits/chosen": -2.8539648056030273, |
|
"logits/rejected": -2.8596293926239014, |
|
"logps/chosen": -0.89495450258255, |
|
"logps/rejected": -1.0723049640655518, |
|
"loss": 0.9604, |
|
"odds_ratio_loss": 0.6547910571098328, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0894954577088356, |
|
"rewards/margins": 0.017735039815306664, |
|
"rewards/rejected": -0.10723049938678741, |
|
"sft_loss": 0.89495450258255, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2269393198488552, |
|
"grad_norm": 3.391028881072998, |
|
"learning_rate": 3.2033763985998533e-06, |
|
"logits/chosen": -2.8784637451171875, |
|
"logits/rejected": -2.878801107406616, |
|
"logps/chosen": -0.8193685412406921, |
|
"logps/rejected": -1.303346037864685, |
|
"loss": 0.8782, |
|
"odds_ratio_loss": 0.5878696441650391, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08193685114383698, |
|
"rewards/margins": 0.048397745937108994, |
|
"rewards/rejected": -0.13033458590507507, |
|
"sft_loss": 0.8193685412406921, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.244721049122027, |
|
"grad_norm": 1.8017568588256836, |
|
"learning_rate": 3.1585524335886335e-06, |
|
"logits/chosen": -2.8997411727905273, |
|
"logits/rejected": -2.891542911529541, |
|
"logps/chosen": -0.8149515986442566, |
|
"logps/rejected": -1.062873125076294, |
|
"loss": 0.8781, |
|
"odds_ratio_loss": 0.6314008831977844, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08149515837430954, |
|
"rewards/margins": 0.024792145937681198, |
|
"rewards/rejected": -0.10628731548786163, |
|
"sft_loss": 0.8149515986442566, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.262502778395199, |
|
"grad_norm": 2.5622735023498535, |
|
"learning_rate": 3.1134997960900536e-06, |
|
"logits/chosen": -2.832627296447754, |
|
"logits/rejected": -2.8471617698669434, |
|
"logps/chosen": -0.8054940104484558, |
|
"logps/rejected": -1.1920253038406372, |
|
"loss": 0.864, |
|
"odds_ratio_loss": 0.58550626039505, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.08054940402507782, |
|
"rewards/margins": 0.03865312412381172, |
|
"rewards/rejected": -0.11920253187417984, |
|
"sft_loss": 0.8054940104484558, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2802845076683709, |
|
"grad_norm": 1.4196934700012207, |
|
"learning_rate": 3.0682341299589583e-06, |
|
"logits/chosen": -2.857084274291992, |
|
"logits/rejected": -2.8681209087371826, |
|
"logps/chosen": -0.8212624788284302, |
|
"logps/rejected": -0.9836801290512085, |
|
"loss": 0.887, |
|
"odds_ratio_loss": 0.6570355296134949, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08212624490261078, |
|
"rewards/margins": 0.016241777688264847, |
|
"rewards/rejected": -0.09836802631616592, |
|
"sft_loss": 0.8212624788284302, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2980662369415426, |
|
"grad_norm": 1.5716747045516968, |
|
"learning_rate": 3.022771153021201e-06, |
|
"logits/chosen": -2.860788583755493, |
|
"logits/rejected": -2.8880984783172607, |
|
"logps/chosen": -0.8381561040878296, |
|
"logps/rejected": -1.052847981452942, |
|
"loss": 0.9036, |
|
"odds_ratio_loss": 0.6544219255447388, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08381561934947968, |
|
"rewards/margins": 0.021469179540872574, |
|
"rewards/rejected": -0.10528478771448135, |
|
"sft_loss": 0.8381561040878296, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.3158479662147144, |
|
"grad_norm": 1.4270474910736084, |
|
"learning_rate": 2.9771266516158625e-06, |
|
"logits/chosen": -2.8542871475219727, |
|
"logits/rejected": -2.877516508102417, |
|
"logps/chosen": -0.8428620100021362, |
|
"logps/rejected": -1.029301404953003, |
|
"loss": 0.9101, |
|
"odds_ratio_loss": 0.6720696687698364, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08428619801998138, |
|
"rewards/margins": 0.0186439398676157, |
|
"rewards/rejected": -0.10293014347553253, |
|
"sft_loss": 0.8428620100021362, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.3336296954878861, |
|
"grad_norm": 1.401987910270691, |
|
"learning_rate": 2.9313164751136802e-06, |
|
"logits/chosen": -2.840547561645508, |
|
"logits/rejected": -2.872194766998291, |
|
"logps/chosen": -0.8261799812316895, |
|
"logps/rejected": -1.0598170757293701, |
|
"loss": 0.8865, |
|
"odds_ratio_loss": 0.6030489802360535, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08261799812316895, |
|
"rewards/margins": 0.023363707587122917, |
|
"rewards/rejected": -0.1059817224740982, |
|
"sft_loss": 0.8261799812316895, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.351411424761058, |
|
"grad_norm": 2.474881410598755, |
|
"learning_rate": 2.8853565304135956e-06, |
|
"logits/chosen": -2.8949756622314453, |
|
"logits/rejected": -2.898646593093872, |
|
"logps/chosen": -0.918350338935852, |
|
"logps/rejected": -1.0580472946166992, |
|
"loss": 0.9899, |
|
"odds_ratio_loss": 0.7158701419830322, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09183504432439804, |
|
"rewards/margins": 0.013969694264233112, |
|
"rewards/rejected": -0.10580474138259888, |
|
"sft_loss": 0.918350338935852, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.36919315403423, |
|
"grad_norm": 2.682598829269409, |
|
"learning_rate": 2.839262776419313e-06, |
|
"logits/chosen": -2.8501102924346924, |
|
"logits/rejected": -2.85284686088562, |
|
"logps/chosen": -0.8370243906974792, |
|
"logps/rejected": -1.3541486263275146, |
|
"loss": 0.8958, |
|
"odds_ratio_loss": 0.5878289937973022, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0837024375796318, |
|
"rewards/margins": 0.05171241611242294, |
|
"rewards/rejected": -0.13541486859321594, |
|
"sft_loss": 0.8370243906974792, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3869748833074016, |
|
"grad_norm": 2.036529064178467, |
|
"learning_rate": 2.793051218497817e-06, |
|
"logits/chosen": -2.871901035308838, |
|
"logits/rejected": -2.880750894546509, |
|
"logps/chosen": -0.8279402852058411, |
|
"logps/rejected": -0.9436777830123901, |
|
"loss": 0.8955, |
|
"odds_ratio_loss": 0.6755216717720032, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08279402554035187, |
|
"rewards/margins": 0.011573752388358116, |
|
"rewards/rejected": -0.09436777979135513, |
|
"sft_loss": 0.8279402852058411, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.4047566125805735, |
|
"grad_norm": 1.4822628498077393, |
|
"learning_rate": 2.7467379029217437e-06, |
|
"logits/chosen": -2.848276138305664, |
|
"logits/rejected": -2.8498966693878174, |
|
"logps/chosen": -0.8329153060913086, |
|
"logps/rejected": -1.1663061380386353, |
|
"loss": 0.8957, |
|
"odds_ratio_loss": 0.6282148957252502, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08329152315855026, |
|
"rewards/margins": 0.033339083194732666, |
|
"rewards/rejected": -0.11663061380386353, |
|
"sft_loss": 0.8329153060913086, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.4225383418537452, |
|
"grad_norm": 1.2180672883987427, |
|
"learning_rate": 2.7003389112975546e-06, |
|
"logits/chosen": -2.8609681129455566, |
|
"logits/rejected": -2.8844683170318604, |
|
"logps/chosen": -0.8761266469955444, |
|
"logps/rejected": -1.083616018295288, |
|
"loss": 0.9409, |
|
"odds_ratio_loss": 0.6474456787109375, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08761266618967056, |
|
"rewards/margins": 0.020748943090438843, |
|
"rewards/rejected": -0.10836161673069, |
|
"sft_loss": 0.8761266469955444, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.440320071126917, |
|
"grad_norm": 3.231017589569092, |
|
"learning_rate": 2.653870354981437e-06, |
|
"logits/chosen": -2.858649969100952, |
|
"logits/rejected": -2.870346784591675, |
|
"logps/chosen": -0.7923532128334045, |
|
"logps/rejected": -1.0410521030426025, |
|
"loss": 0.8559, |
|
"odds_ratio_loss": 0.6351376175880432, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07923531532287598, |
|
"rewards/margins": 0.024869883432984352, |
|
"rewards/rejected": -0.10410521179437637, |
|
"sft_loss": 0.7923532128334045, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.458101800400089, |
|
"grad_norm": 4.5185065269470215, |
|
"learning_rate": 2.6073483694848777e-06, |
|
"logits/chosen": -2.827204465866089, |
|
"logits/rejected": -2.8571865558624268, |
|
"logps/chosen": -0.8361181020736694, |
|
"logps/rejected": -1.0542662143707275, |
|
"loss": 0.9018, |
|
"odds_ratio_loss": 0.6572374701499939, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08361181616783142, |
|
"rewards/margins": 0.02181481197476387, |
|
"rewards/rejected": -0.10542663186788559, |
|
"sft_loss": 0.8361181020736694, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4758835296732609, |
|
"grad_norm": 1.3891066312789917, |
|
"learning_rate": 2.560789108871847e-06, |
|
"logits/chosen": -2.8242173194885254, |
|
"logits/rejected": -2.8416523933410645, |
|
"logps/chosen": -0.8545220494270325, |
|
"logps/rejected": -1.1704602241516113, |
|
"loss": 0.9191, |
|
"odds_ratio_loss": 0.6462253928184509, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08545219898223877, |
|
"rewards/margins": 0.03159382566809654, |
|
"rewards/rejected": -0.11704603582620621, |
|
"sft_loss": 0.8545220494270325, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4936652589464325, |
|
"grad_norm": 3.542637348175049, |
|
"learning_rate": 2.514208740149544e-06, |
|
"logits/chosen": -2.8630993366241455, |
|
"logits/rejected": -2.892916440963745, |
|
"logps/chosen": -0.8821213841438293, |
|
"logps/rejected": -1.1250375509262085, |
|
"loss": 0.9496, |
|
"odds_ratio_loss": 0.6751636862754822, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08821214735507965, |
|
"rewards/margins": 0.024291623383760452, |
|
"rewards/rejected": -0.11250375211238861, |
|
"sft_loss": 0.8821213841438293, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.5114469882196042, |
|
"grad_norm": 1.9266990423202515, |
|
"learning_rate": 2.46762343765464e-06, |
|
"logits/chosen": -2.8725075721740723, |
|
"logits/rejected": -2.877906084060669, |
|
"logps/chosen": -0.8815126419067383, |
|
"logps/rejected": -1.12534499168396, |
|
"loss": 0.9432, |
|
"odds_ratio_loss": 0.6172918081283569, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08815126121044159, |
|
"rewards/margins": 0.02438322827219963, |
|
"rewards/rejected": -0.11253450065851212, |
|
"sft_loss": 0.8815126419067383, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.5292287174927761, |
|
"grad_norm": 1.9089457988739014, |
|
"learning_rate": 2.4210493774369903e-06, |
|
"logits/chosen": -2.843306064605713, |
|
"logits/rejected": -2.844432830810547, |
|
"logps/chosen": -0.8710628747940063, |
|
"logps/rejected": -1.0414961576461792, |
|
"loss": 0.9387, |
|
"odds_ratio_loss": 0.6761898398399353, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08710628747940063, |
|
"rewards/margins": 0.01704334281384945, |
|
"rewards/rejected": -0.10414961725473404, |
|
"sft_loss": 0.8710628747940063, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.547010446765948, |
|
"grad_norm": 1.2754076719284058, |
|
"learning_rate": 2.374502731642732e-06, |
|
"logits/chosen": -2.855776309967041, |
|
"logits/rejected": -2.8679866790771484, |
|
"logps/chosen": -0.926120400428772, |
|
"logps/rejected": -1.1001455783843994, |
|
"loss": 0.9908, |
|
"odds_ratio_loss": 0.6463494896888733, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09261203557252884, |
|
"rewards/margins": 0.01740253157913685, |
|
"rewards/rejected": -0.11001457273960114, |
|
"sft_loss": 0.926120400428772, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.56479217603912, |
|
"grad_norm": 1.6846286058425903, |
|
"learning_rate": 2.3279996628987556e-06, |
|
"logits/chosen": -2.844256639480591, |
|
"logits/rejected": -2.8760454654693604, |
|
"logps/chosen": -0.8887776136398315, |
|
"logps/rejected": -1.0579562187194824, |
|
"loss": 0.9585, |
|
"odds_ratio_loss": 0.6975380778312683, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08887775987386703, |
|
"rewards/margins": 0.01691785827279091, |
|
"rewards/rejected": -0.10579562187194824, |
|
"sft_loss": 0.8887776136398315, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5825739053122916, |
|
"grad_norm": 1.5345253944396973, |
|
"learning_rate": 2.281556318700474e-06, |
|
"logits/chosen": -2.851228952407837, |
|
"logits/rejected": -2.8857483863830566, |
|
"logps/chosen": -0.82933509349823, |
|
"logps/rejected": -0.9570034742355347, |
|
"loss": 0.8991, |
|
"odds_ratio_loss": 0.697514533996582, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08293350785970688, |
|
"rewards/margins": 0.012766830623149872, |
|
"rewards/rejected": -0.09570035338401794, |
|
"sft_loss": 0.82933509349823, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.6003556345854635, |
|
"grad_norm": 3.0883193016052246, |
|
"learning_rate": 2.2351888258048408e-06, |
|
"logits/chosen": -2.7954063415527344, |
|
"logits/rejected": -2.8361153602600098, |
|
"logps/chosen": -0.8643034100532532, |
|
"logps/rejected": -1.0512335300445557, |
|
"loss": 0.928, |
|
"odds_ratio_loss": 0.6372779011726379, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08643033355474472, |
|
"rewards/margins": 0.018693022429943085, |
|
"rewards/rejected": -0.1051233559846878, |
|
"sft_loss": 0.8643034100532532, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6181373638586352, |
|
"grad_norm": 2.0690836906433105, |
|
"learning_rate": 2.188913284630584e-06, |
|
"logits/chosen": -2.8729634284973145, |
|
"logits/rejected": -2.8929648399353027, |
|
"logps/chosen": -0.9121950268745422, |
|
"logps/rejected": -1.029121994972229, |
|
"loss": 0.9828, |
|
"odds_ratio_loss": 0.7060950398445129, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09121949970722198, |
|
"rewards/margins": 0.011692697182297707, |
|
"rewards/rejected": -0.10291220247745514, |
|
"sft_loss": 0.9121950268745422, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.635919093131807, |
|
"grad_norm": 3.8324978351593018, |
|
"learning_rate": 2.1427457636675652e-06, |
|
"logits/chosen": -2.862370014190674, |
|
"logits/rejected": -2.88765025138855, |
|
"logps/chosen": -0.8752270936965942, |
|
"logps/rejected": -1.0570242404937744, |
|
"loss": 0.944, |
|
"odds_ratio_loss": 0.6881860494613647, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0875227078795433, |
|
"rewards/margins": 0.018179720267653465, |
|
"rewards/rejected": -0.10570243746042252, |
|
"sft_loss": 0.8752270936965942, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.653700822404979, |
|
"grad_norm": 1.9543755054473877, |
|
"learning_rate": 2.096702293897247e-06, |
|
"logits/chosen": -2.8620665073394775, |
|
"logits/rejected": -2.876176357269287, |
|
"logps/chosen": -0.8246392011642456, |
|
"logps/rejected": -1.1381771564483643, |
|
"loss": 0.8887, |
|
"odds_ratio_loss": 0.6403511166572571, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08246392011642456, |
|
"rewards/margins": 0.03135378286242485, |
|
"rewards/rejected": -0.1138177141547203, |
|
"sft_loss": 0.8246392011642456, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6714825516781509, |
|
"grad_norm": 2.1185944080352783, |
|
"learning_rate": 2.0507988632261672e-06, |
|
"logits/chosen": -2.834115505218506, |
|
"logits/rejected": -2.8819518089294434, |
|
"logps/chosen": -0.8675654530525208, |
|
"logps/rejected": -1.0815333127975464, |
|
"loss": 0.9294, |
|
"odds_ratio_loss": 0.6182416677474976, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08675654232501984, |
|
"rewards/margins": 0.02139679528772831, |
|
"rewards/rejected": -0.1081533432006836, |
|
"sft_loss": 0.8675654530525208, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6892642809513225, |
|
"grad_norm": 3.2840750217437744, |
|
"learning_rate": 2.005051410934382e-06, |
|
"logits/chosen": -2.8422374725341797, |
|
"logits/rejected": -2.8813586235046387, |
|
"logps/chosen": -0.9204028248786926, |
|
"logps/rejected": -1.0668128728866577, |
|
"loss": 0.9896, |
|
"odds_ratio_loss": 0.69184410572052, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0920402854681015, |
|
"rewards/margins": 0.01464100182056427, |
|
"rewards/rejected": -0.10668128728866577, |
|
"sft_loss": 0.9204028248786926, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.7070460102244942, |
|
"grad_norm": 1.923535704612732, |
|
"learning_rate": 1.9594758221407843e-06, |
|
"logits/chosen": -2.8552603721618652, |
|
"logits/rejected": -2.8695271015167236, |
|
"logps/chosen": -0.8000820279121399, |
|
"logps/rejected": -1.0352107286453247, |
|
"loss": 0.8598, |
|
"odds_ratio_loss": 0.5971243977546692, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.08000819385051727, |
|
"rewards/margins": 0.02351287379860878, |
|
"rewards/rejected": -0.10352107137441635, |
|
"sft_loss": 0.8000820279121399, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.724827739497666, |
|
"grad_norm": 3.057365894317627, |
|
"learning_rate": 1.9140879222872408e-06, |
|
"logits/chosen": -2.873203992843628, |
|
"logits/rejected": -2.894540309906006, |
|
"logps/chosen": -0.813400149345398, |
|
"logps/rejected": -0.9625688791275024, |
|
"loss": 0.8825, |
|
"odds_ratio_loss": 0.6909626722335815, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08134002983570099, |
|
"rewards/margins": 0.014916857704520226, |
|
"rewards/rejected": -0.09625686705112457, |
|
"sft_loss": 0.813400149345398, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.742609468770838, |
|
"grad_norm": 1.2360864877700806, |
|
"learning_rate": 1.8689034716434346e-06, |
|
"logits/chosen": -2.8773324489593506, |
|
"logits/rejected": -2.8978989124298096, |
|
"logps/chosen": -0.8950971364974976, |
|
"logps/rejected": -1.0365970134735107, |
|
"loss": 0.9651, |
|
"odds_ratio_loss": 0.7001620531082153, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08950972557067871, |
|
"rewards/margins": 0.014149991795420647, |
|
"rewards/rejected": -0.1036597266793251, |
|
"sft_loss": 0.8950971364974976, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.76039119804401, |
|
"grad_norm": 1.4812334775924683, |
|
"learning_rate": 1.8239381598343576e-06, |
|
"logits/chosen": -2.8683552742004395, |
|
"logits/rejected": -2.890810012817383, |
|
"logps/chosen": -0.8186439275741577, |
|
"logps/rejected": -1.0320905447006226, |
|
"loss": 0.8854, |
|
"odds_ratio_loss": 0.6674761772155762, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08186439424753189, |
|
"rewards/margins": 0.021344667300581932, |
|
"rewards/rejected": -0.10320906341075897, |
|
"sft_loss": 0.8186439275741577, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7781729273171816, |
|
"grad_norm": 5.4584126472473145, |
|
"learning_rate": 1.779207600392312e-06, |
|
"logits/chosen": -2.8556861877441406, |
|
"logits/rejected": -2.859384298324585, |
|
"logps/chosen": -0.840672492980957, |
|
"logps/rejected": -1.0127156972885132, |
|
"loss": 0.906, |
|
"odds_ratio_loss": 0.6529561877250671, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08406725525856018, |
|
"rewards/margins": 0.01720432937145233, |
|
"rewards/rejected": -0.10127158463001251, |
|
"sft_loss": 0.840672492980957, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7781729273171816, |
|
"eval_logits/chosen": -2.8532934188842773, |
|
"eval_logits/rejected": -2.8782994747161865, |
|
"eval_logps/chosen": -0.8414809107780457, |
|
"eval_logps/rejected": -1.0811069011688232, |
|
"eval_loss": 0.903913676738739, |
|
"eval_odds_ratio_loss": 0.6243272423744202, |
|
"eval_rewards/accuracies": 0.578000009059906, |
|
"eval_rewards/chosen": -0.0841480940580368, |
|
"eval_rewards/margins": 0.023962605744600296, |
|
"eval_rewards/rejected": -0.1081107035279274, |
|
"eval_runtime": 189.1709, |
|
"eval_samples_per_second": 5.286, |
|
"eval_sft_loss": 0.8414809107780457, |
|
"eval_steps_per_second": 2.643, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7959546565903532, |
|
"grad_norm": 1.9621540307998657, |
|
"learning_rate": 1.7347273253353552e-06, |
|
"logits/chosen": -2.8437576293945312, |
|
"logits/rejected": -2.8630166053771973, |
|
"logps/chosen": -0.8406645655632019, |
|
"logps/rejected": -1.0365639925003052, |
|
"loss": 0.9076, |
|
"odds_ratio_loss": 0.669218897819519, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08406645804643631, |
|
"rewards/margins": 0.019589943811297417, |
|
"rewards/rejected": -0.10365639626979828, |
|
"sft_loss": 0.8406645655632019, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.8137363858635251, |
|
"grad_norm": 4.011701583862305, |
|
"learning_rate": 1.690512779774029e-06, |
|
"logits/chosen": -2.848282814025879, |
|
"logits/rejected": -2.8511271476745605, |
|
"logps/chosen": -0.8997753858566284, |
|
"logps/rejected": -1.1389718055725098, |
|
"loss": 0.9632, |
|
"odds_ratio_loss": 0.6343892812728882, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08997754007577896, |
|
"rewards/margins": 0.023919641971588135, |
|
"rewards/rejected": -0.11389718949794769, |
|
"sft_loss": 0.8997753858566284, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.831518115136697, |
|
"grad_norm": 2.2862370014190674, |
|
"learning_rate": 1.6465793165482838e-06, |
|
"logits/chosen": -2.8141562938690186, |
|
"logits/rejected": -2.8219082355499268, |
|
"logps/chosen": -0.8186345100402832, |
|
"logps/rejected": -1.0088984966278076, |
|
"loss": 0.8807, |
|
"odds_ratio_loss": 0.6206359267234802, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08186344802379608, |
|
"rewards/margins": 0.019026407971978188, |
|
"rewards/rejected": -0.10088986158370972, |
|
"sft_loss": 0.8186345100402832, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.849299844409869, |
|
"grad_norm": 2.5129692554473877, |
|
"learning_rate": 1.6029421908964305e-06, |
|
"logits/chosen": -2.821484088897705, |
|
"logits/rejected": -2.8394651412963867, |
|
"logps/chosen": -0.8219555020332336, |
|
"logps/rejected": -1.3423779010772705, |
|
"loss": 0.8817, |
|
"odds_ratio_loss": 0.5971612334251404, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08219555765390396, |
|
"rewards/margins": 0.052042216062545776, |
|
"rewards/rejected": -0.13423778116703033, |
|
"sft_loss": 0.8219555020332336, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.8670815736830408, |
|
"grad_norm": 2.674842596054077, |
|
"learning_rate": 1.559616555157985e-06, |
|
"logits/chosen": -2.8907628059387207, |
|
"logits/rejected": -2.8810207843780518, |
|
"logps/chosen": -0.8370941281318665, |
|
"logps/rejected": -1.0264050960540771, |
|
"loss": 0.9034, |
|
"odds_ratio_loss": 0.6629946231842041, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08370941877365112, |
|
"rewards/margins": 0.018931085243821144, |
|
"rewards/rejected": -0.10264050960540771, |
|
"sft_loss": 0.8370941281318665, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8848633029562125, |
|
"grad_norm": 2.72856068611145, |
|
"learning_rate": 1.516617453512252e-06, |
|
"logits/chosen": -2.8834056854248047, |
|
"logits/rejected": -2.8996243476867676, |
|
"logps/chosen": -0.8767944574356079, |
|
"logps/rejected": -1.0179784297943115, |
|
"loss": 0.9475, |
|
"odds_ratio_loss": 0.7070103883743286, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08767944574356079, |
|
"rewards/margins": 0.014118405990302563, |
|
"rewards/rejected": -0.10179785639047623, |
|
"sft_loss": 0.8767944574356079, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.9026450322293842, |
|
"grad_norm": 3.5679309368133545, |
|
"learning_rate": 1.473959816754449e-06, |
|
"logits/chosen": -2.857292890548706, |
|
"logits/rejected": -2.8642122745513916, |
|
"logps/chosen": -0.8274821043014526, |
|
"logps/rejected": -0.9466173052787781, |
|
"loss": 0.8975, |
|
"odds_ratio_loss": 0.6997173428535461, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08274821937084198, |
|
"rewards/margins": 0.011913511902093887, |
|
"rewards/rejected": -0.09466172009706497, |
|
"sft_loss": 0.8274821043014526, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.920426761502556, |
|
"grad_norm": 1.369982361793518, |
|
"learning_rate": 1.4316584571112213e-06, |
|
"logits/chosen": -2.8926608562469482, |
|
"logits/rejected": -2.9016175270080566, |
|
"logps/chosen": -0.878426194190979, |
|
"logps/rejected": -1.022551417350769, |
|
"loss": 0.9463, |
|
"odds_ratio_loss": 0.6782765984535217, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08784262835979462, |
|
"rewards/margins": 0.014412516728043556, |
|
"rewards/rejected": -0.10225514322519302, |
|
"sft_loss": 0.878426194190979, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.938208490775728, |
|
"grad_norm": 2.1955406665802, |
|
"learning_rate": 1.389728063097306e-06, |
|
"logits/chosen": -2.859020471572876, |
|
"logits/rejected": -2.871717929840088, |
|
"logps/chosen": -0.8529576063156128, |
|
"logps/rejected": -1.0697009563446045, |
|
"loss": 0.9165, |
|
"odds_ratio_loss": 0.6351792216300964, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08529576659202576, |
|
"rewards/margins": 0.021674351766705513, |
|
"rewards/rejected": -0.10697011649608612, |
|
"sft_loss": 0.8529576063156128, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.9559902200488999, |
|
"grad_norm": 1.758565068244934, |
|
"learning_rate": 1.348183194415179e-06, |
|
"logits/chosen": -2.853273868560791, |
|
"logits/rejected": -2.826066493988037, |
|
"logps/chosen": -0.8589800000190735, |
|
"logps/rejected": -1.1160128116607666, |
|
"loss": 0.9192, |
|
"odds_ratio_loss": 0.6018035411834717, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0858980119228363, |
|
"rewards/margins": 0.025703275576233864, |
|
"rewards/rejected": -0.11160127818584442, |
|
"sft_loss": 0.8589800000190735, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9737719493220716, |
|
"grad_norm": 2.2166192531585693, |
|
"learning_rate": 1.3070382768994015e-06, |
|
"logits/chosen": -2.8573784828186035, |
|
"logits/rejected": -2.87496018409729, |
|
"logps/chosen": -0.8593405485153198, |
|
"logps/rejected": -1.0153789520263672, |
|
"loss": 0.9242, |
|
"odds_ratio_loss": 0.6489418148994446, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08593406528234482, |
|
"rewards/margins": 0.015603835694491863, |
|
"rewards/rejected": -0.10153790563344955, |
|
"sft_loss": 0.8593405485153198, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9915536785952432, |
|
"grad_norm": 2.127784013748169, |
|
"learning_rate": 1.2663075975074746e-06, |
|
"logits/chosen": -2.8475844860076904, |
|
"logits/rejected": -2.8674192428588867, |
|
"logps/chosen": -0.830818772315979, |
|
"logps/rejected": -1.1525778770446777, |
|
"loss": 0.8944, |
|
"odds_ratio_loss": 0.635840654373169, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08308187872171402, |
|
"rewards/margins": 0.032175906002521515, |
|
"rewards/rejected": -0.11525777727365494, |
|
"sft_loss": 0.830818772315979, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.009335407868415, |
|
"grad_norm": 2.4992928504943848, |
|
"learning_rate": 1.2260052993589034e-06, |
|
"logits/chosen": -2.8519034385681152, |
|
"logits/rejected": -2.8721909523010254, |
|
"logps/chosen": -0.9101880192756653, |
|
"logps/rejected": -1.0249983072280884, |
|
"loss": 0.9847, |
|
"odds_ratio_loss": 0.7451602220535278, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09101879596710205, |
|
"rewards/margins": 0.011481017805635929, |
|
"rewards/rejected": -0.102499820291996, |
|
"sft_loss": 0.9101880192756653, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.027117137141587, |
|
"grad_norm": 1.3270055055618286, |
|
"learning_rate": 1.1861453768242099e-06, |
|
"logits/chosen": -2.8831446170806885, |
|
"logits/rejected": -2.8843555450439453, |
|
"logps/chosen": -0.7847625613212585, |
|
"logps/rejected": -1.0357749462127686, |
|
"loss": 0.8449, |
|
"odds_ratio_loss": 0.6014257669448853, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07847625762224197, |
|
"rewards/margins": 0.025101233273744583, |
|
"rewards/rejected": -0.10357747972011566, |
|
"sft_loss": 0.7847625613212585, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.044898866414759, |
|
"grad_norm": 11.792732238769531, |
|
"learning_rate": 1.1467416706655982e-06, |
|
"logits/chosen": -2.8700387477874756, |
|
"logits/rejected": -2.8984532356262207, |
|
"logps/chosen": -0.9160982370376587, |
|
"logps/rejected": -1.110647439956665, |
|
"loss": 0.9851, |
|
"odds_ratio_loss": 0.6902655363082886, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09160983562469482, |
|
"rewards/margins": 0.019454922527074814, |
|
"rewards/rejected": -0.11106475442647934, |
|
"sft_loss": 0.9160982370376587, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.062680595687931, |
|
"grad_norm": 2.206909418106079, |
|
"learning_rate": 1.1078078632309559e-06, |
|
"logits/chosen": -2.8493118286132812, |
|
"logits/rejected": -2.8856310844421387, |
|
"logps/chosen": -0.8342447280883789, |
|
"logps/rejected": -1.0583864450454712, |
|
"loss": 0.8987, |
|
"odds_ratio_loss": 0.644468367099762, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08342447131872177, |
|
"rewards/margins": 0.022414181381464005, |
|
"rewards/rejected": -0.10583865642547607, |
|
"sft_loss": 0.8342447280883789, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0804623249611023, |
|
"grad_norm": 4.428743362426758, |
|
"learning_rate": 1.0693574737028627e-06, |
|
"logits/chosen": -2.860917329788208, |
|
"logits/rejected": -2.8824262619018555, |
|
"logps/chosen": -0.859830379486084, |
|
"logps/rejected": -1.011566162109375, |
|
"loss": 0.9292, |
|
"odds_ratio_loss": 0.6934677362442017, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08598305284976959, |
|
"rewards/margins": 0.015173576772212982, |
|
"rewards/rejected": -0.10115662962198257, |
|
"sft_loss": 0.859830379486084, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.098244054234274, |
|
"grad_norm": 2.3405988216400146, |
|
"learning_rate": 1.0314038534042586e-06, |
|
"logits/chosen": -2.88313364982605, |
|
"logits/rejected": -2.89017915725708, |
|
"logps/chosen": -0.8049169778823853, |
|
"logps/rejected": -1.0722546577453613, |
|
"loss": 0.8704, |
|
"odds_ratio_loss": 0.6548857688903809, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08049169927835464, |
|
"rewards/margins": 0.026733767241239548, |
|
"rewards/rejected": -0.1072254627943039, |
|
"sft_loss": 0.8049169778823853, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.116025783507446, |
|
"grad_norm": 3.3044447898864746, |
|
"learning_rate": 9.939601811623946e-07, |
|
"logits/chosen": -2.8709566593170166, |
|
"logits/rejected": -2.8734612464904785, |
|
"logps/chosen": -0.84669029712677, |
|
"logps/rejected": -1.037172555923462, |
|
"loss": 0.9141, |
|
"odds_ratio_loss": 0.6745220422744751, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08466903120279312, |
|
"rewards/margins": 0.019048208370804787, |
|
"rewards/rejected": -0.10371723026037216, |
|
"sft_loss": 0.84669029712677, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.133807512780618, |
|
"grad_norm": 1.8569538593292236, |
|
"learning_rate": 9.570394587326825e-07, |
|
"logits/chosen": -2.8537509441375732, |
|
"logits/rejected": -2.8589584827423096, |
|
"logps/chosen": -0.8301420211791992, |
|
"logps/rejected": -1.0919809341430664, |
|
"loss": 0.8916, |
|
"odds_ratio_loss": 0.6140956878662109, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08301420509815216, |
|
"rewards/margins": 0.02618388459086418, |
|
"rewards/rejected": -0.10919810831546783, |
|
"sft_loss": 0.8301420211791992, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.15158924205379, |
|
"grad_norm": 1.6108454465866089, |
|
"learning_rate": 9.206545062840302e-07, |
|
"logits/chosen": -2.9044604301452637, |
|
"logits/rejected": -2.876079559326172, |
|
"logps/chosen": -0.7979404926300049, |
|
"logps/rejected": -1.1050665378570557, |
|
"loss": 0.8573, |
|
"odds_ratio_loss": 0.5938332676887512, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07979404926300049, |
|
"rewards/margins": 0.03071259893476963, |
|
"rewards/rejected": -0.11050665378570557, |
|
"sft_loss": 0.7979404926300049, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.1693709713269618, |
|
"grad_norm": 1.6135528087615967, |
|
"learning_rate": 8.848179579472285e-07, |
|
"logits/chosen": -2.8942551612854004, |
|
"logits/rejected": -2.9058589935302734, |
|
"logps/chosen": -0.8136836886405945, |
|
"logps/rejected": -0.9423009157180786, |
|
"loss": 0.8803, |
|
"odds_ratio_loss": 0.666199803352356, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08136837184429169, |
|
"rewards/margins": 0.012861723080277443, |
|
"rewards/rejected": -0.09423010051250458, |
|
"sft_loss": 0.8136836886405945, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.1871527006001332, |
|
"grad_norm": 4.099541664123535, |
|
"learning_rate": 8.495422574279403e-07, |
|
"logits/chosen": -2.8292388916015625, |
|
"logits/rejected": -2.825455904006958, |
|
"logps/chosen": -0.7480133175849915, |
|
"logps/rejected": -1.0116589069366455, |
|
"loss": 0.8073, |
|
"odds_ratio_loss": 0.5924416780471802, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07480133324861526, |
|
"rewards/margins": 0.026364561170339584, |
|
"rewards/rejected": -0.10116589069366455, |
|
"sft_loss": 0.7480133175849915, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.204934429873305, |
|
"grad_norm": 2.086594581604004, |
|
"learning_rate": 8.148396536858063e-07, |
|
"logits/chosen": -2.863175630569458, |
|
"logits/rejected": -2.8580069541931152, |
|
"logps/chosen": -0.8519015312194824, |
|
"logps/rejected": -1.1402422189712524, |
|
"loss": 0.9162, |
|
"odds_ratio_loss": 0.6432971954345703, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08519015461206436, |
|
"rewards/margins": 0.028834056109189987, |
|
"rewards/rejected": -0.11402420699596405, |
|
"sft_loss": 0.8519015312194824, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.222716159146477, |
|
"grad_norm": 2.096405506134033, |
|
"learning_rate": 7.807221966811815e-07, |
|
"logits/chosen": -2.8480238914489746, |
|
"logits/rejected": -2.8696630001068115, |
|
"logps/chosen": -0.8801048994064331, |
|
"logps/rejected": -1.0339150428771973, |
|
"loss": 0.9478, |
|
"odds_ratio_loss": 0.6773165464401245, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08801049739122391, |
|
"rewards/margins": 0.015381010249257088, |
|
"rewards/rejected": -0.10339150577783585, |
|
"sft_loss": 0.8801048994064331, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.240497888419649, |
|
"grad_norm": 1.47752845287323, |
|
"learning_rate": 7.47201733190962e-07, |
|
"logits/chosen": -2.831873655319214, |
|
"logits/rejected": -2.850604772567749, |
|
"logps/chosen": -0.798612117767334, |
|
"logps/rejected": -1.0020263195037842, |
|
"loss": 0.8614, |
|
"odds_ratio_loss": 0.6277336478233337, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.07986120879650116, |
|
"rewards/margins": 0.020341429859399796, |
|
"rewards/rejected": -0.10020263493061066, |
|
"sft_loss": 0.798612117767334, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.258279617692821, |
|
"grad_norm": 1.7826944589614868, |
|
"learning_rate": 7.142899026949721e-07, |
|
"logits/chosen": -2.8982224464416504, |
|
"logits/rejected": -2.9022057056427, |
|
"logps/chosen": -0.817043662071228, |
|
"logps/rejected": -0.9819319844245911, |
|
"loss": 0.8802, |
|
"odds_ratio_loss": 0.6313419938087463, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08170437067747116, |
|
"rewards/margins": 0.016488831490278244, |
|
"rewards/rejected": -0.0981932133436203, |
|
"sft_loss": 0.817043662071228, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.2760613469659923, |
|
"grad_norm": 3.5314130783081055, |
|
"learning_rate": 6.819981333343273e-07, |
|
"logits/chosen": -2.81345796585083, |
|
"logits/rejected": -2.830575704574585, |
|
"logps/chosen": -0.8146475553512573, |
|
"logps/rejected": -1.0825046300888062, |
|
"loss": 0.8761, |
|
"odds_ratio_loss": 0.6145650744438171, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08146476000547409, |
|
"rewards/margins": 0.026785722002387047, |
|
"rewards/rejected": -0.10825047641992569, |
|
"sft_loss": 0.8146475553512573, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.293843076239164, |
|
"grad_norm": 2.047950267791748, |
|
"learning_rate": 6.503376379431839e-07, |
|
"logits/chosen": -2.8456294536590576, |
|
"logits/rejected": -2.8468470573425293, |
|
"logps/chosen": -0.8984563946723938, |
|
"logps/rejected": -0.965488612651825, |
|
"loss": 0.9673, |
|
"odds_ratio_loss": 0.68804532289505, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08984563499689102, |
|
"rewards/margins": 0.006703221704810858, |
|
"rewards/rejected": -0.09654886275529861, |
|
"sft_loss": 0.8984563946723938, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.311624805512336, |
|
"grad_norm": 1.6956673860549927, |
|
"learning_rate": 6.193194101552502e-07, |
|
"logits/chosen": -2.8512775897979736, |
|
"logits/rejected": -2.8425419330596924, |
|
"logps/chosen": -0.793393611907959, |
|
"logps/rejected": -1.0585198402404785, |
|
"loss": 0.8516, |
|
"odds_ratio_loss": 0.5823057293891907, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07933936268091202, |
|
"rewards/margins": 0.026512619107961655, |
|
"rewards/rejected": -0.10585198551416397, |
|
"sft_loss": 0.793393611907959, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.329406534785508, |
|
"grad_norm": 2.3841018676757812, |
|
"learning_rate": 5.889542205864083e-07, |
|
"logits/chosen": -2.848145008087158, |
|
"logits/rejected": -2.8572099208831787, |
|
"logps/chosen": -0.853177547454834, |
|
"logps/rejected": -1.0807201862335205, |
|
"loss": 0.9165, |
|
"odds_ratio_loss": 0.6334924697875977, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08531775325536728, |
|
"rewards/margins": 0.022754264995455742, |
|
"rewards/rejected": -0.10807202011346817, |
|
"sft_loss": 0.853177547454834, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.34718826405868, |
|
"grad_norm": 1.8613510131835938, |
|
"learning_rate": 5.592526130947862e-07, |
|
"logits/chosen": -2.81400203704834, |
|
"logits/rejected": -2.84773325920105, |
|
"logps/chosen": -0.8714796304702759, |
|
"logps/rejected": -1.0702935457229614, |
|
"loss": 0.9403, |
|
"odds_ratio_loss": 0.6886882781982422, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08714796602725983, |
|
"rewards/margins": 0.019881393760442734, |
|
"rewards/rejected": -0.10702935606241226, |
|
"sft_loss": 0.8714796304702759, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.3649699933318518, |
|
"grad_norm": 1.9149771928787231, |
|
"learning_rate": 5.302249011195507e-07, |
|
"logits/chosen": -2.82387375831604, |
|
"logits/rejected": -2.8486294746398926, |
|
"logps/chosen": -0.8160147666931152, |
|
"logps/rejected": -0.9580420255661011, |
|
"loss": 0.8828, |
|
"odds_ratio_loss": 0.6679019927978516, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08160146325826645, |
|
"rewards/margins": 0.014202730730175972, |
|
"rewards/rejected": -0.09580419957637787, |
|
"sft_loss": 0.8160147666931152, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.382751722605023, |
|
"grad_norm": 2.322023630142212, |
|
"learning_rate": 5.018811640997307e-07, |
|
"logits/chosen": -2.833300828933716, |
|
"logits/rejected": -2.8586716651916504, |
|
"logps/chosen": -0.8773719072341919, |
|
"logps/rejected": -1.2085764408111572, |
|
"loss": 0.9397, |
|
"odds_ratio_loss": 0.6232184171676636, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08773718029260635, |
|
"rewards/margins": 0.033120471984148026, |
|
"rewards/rejected": -0.12085764110088348, |
|
"sft_loss": 0.8773719072341919, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.400533451878195, |
|
"grad_norm": 1.6597644090652466, |
|
"learning_rate": 4.7423124397427105e-07, |
|
"logits/chosen": -2.8043930530548096, |
|
"logits/rejected": -2.8492674827575684, |
|
"logps/chosen": -0.8693526983261108, |
|
"logps/rejected": -1.019430160522461, |
|
"loss": 0.9355, |
|
"odds_ratio_loss": 0.6619070768356323, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08693526685237885, |
|
"rewards/margins": 0.015007746405899525, |
|
"rewards/rejected": -0.1019430160522461, |
|
"sft_loss": 0.8693526983261108, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.418315181151367, |
|
"grad_norm": 2.013772487640381, |
|
"learning_rate": 4.472847417645787e-07, |
|
"logits/chosen": -2.856095552444458, |
|
"logits/rejected": -2.853999614715576, |
|
"logps/chosen": -0.8434507250785828, |
|
"logps/rejected": -1.1554135084152222, |
|
"loss": 0.9027, |
|
"odds_ratio_loss": 0.5929771661758423, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08434506505727768, |
|
"rewards/margins": 0.031196290627121925, |
|
"rewards/rejected": -0.11554136127233505, |
|
"sft_loss": 0.8434507250785828, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.436096910424539, |
|
"grad_norm": 1.1766934394836426, |
|
"learning_rate": 4.210510142406993e-07, |
|
"logits/chosen": -2.826666831970215, |
|
"logits/rejected": -2.8096094131469727, |
|
"logps/chosen": -0.8101630210876465, |
|
"logps/rejected": -1.061128854751587, |
|
"loss": 0.8711, |
|
"odds_ratio_loss": 0.6095449924468994, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08101630210876465, |
|
"rewards/margins": 0.0250965915620327, |
|
"rewards/rejected": -0.10611288249492645, |
|
"sft_loss": 0.8101630210876465, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.4538786396977104, |
|
"grad_norm": 1.6385358572006226, |
|
"learning_rate": 3.9553917067232966e-07, |
|
"logits/chosen": -2.8439018726348877, |
|
"logits/rejected": -2.869356155395508, |
|
"logps/chosen": -0.8570329546928406, |
|
"logps/rejected": -1.0249860286712646, |
|
"loss": 0.9248, |
|
"odds_ratio_loss": 0.6779541373252869, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0857032984495163, |
|
"rewards/margins": 0.016795307397842407, |
|
"rewards/rejected": -0.1024986058473587, |
|
"sft_loss": 0.8570329546928406, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.4716603689708823, |
|
"grad_norm": 1.9620130062103271, |
|
"learning_rate": 3.707580696657509e-07, |
|
"logits/chosen": -2.8321003913879395, |
|
"logits/rejected": -2.8363351821899414, |
|
"logps/chosen": -0.8065320253372192, |
|
"logps/rejected": -0.9883378744125366, |
|
"loss": 0.8702, |
|
"odds_ratio_loss": 0.6370663046836853, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08065320551395416, |
|
"rewards/margins": 0.018180575221776962, |
|
"rewards/rejected": -0.09883377701044083, |
|
"sft_loss": 0.8065320253372192, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.489442098244054, |
|
"grad_norm": 1.9136977195739746, |
|
"learning_rate": 3.4671631608781815e-07, |
|
"logits/chosen": -2.8458282947540283, |
|
"logits/rejected": -2.858309745788574, |
|
"logps/chosen": -0.8568046689033508, |
|
"logps/rejected": -1.1048234701156616, |
|
"loss": 0.9252, |
|
"odds_ratio_loss": 0.6840890645980835, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08568046987056732, |
|
"rewards/margins": 0.02480187825858593, |
|
"rewards/rejected": -0.110482357442379, |
|
"sft_loss": 0.8568046689033508, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.507223827517226, |
|
"grad_norm": 2.3483951091766357, |
|
"learning_rate": 3.234222580780405e-07, |
|
"logits/chosen": -2.8452916145324707, |
|
"logits/rejected": -2.8620214462280273, |
|
"logps/chosen": -0.8229707479476929, |
|
"logps/rejected": -1.0171488523483276, |
|
"loss": 0.8863, |
|
"odds_ratio_loss": 0.6332994699478149, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08229707181453705, |
|
"rewards/margins": 0.01941780559718609, |
|
"rewards/rejected": -0.10171488672494888, |
|
"sft_loss": 0.8229707479476929, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.525005556790398, |
|
"grad_norm": 1.985844612121582, |
|
"learning_rate": 3.0088398414982375e-07, |
|
"logits/chosen": -2.810109853744507, |
|
"logits/rejected": -2.8480405807495117, |
|
"logps/chosen": -0.8503270149230957, |
|
"logps/rejected": -1.14946711063385, |
|
"loss": 0.9167, |
|
"odds_ratio_loss": 0.6633514165878296, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08503270149230957, |
|
"rewards/margins": 0.02991401217877865, |
|
"rewards/rejected": -0.11494670808315277, |
|
"sft_loss": 0.8503270149230957, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.54278728606357, |
|
"grad_norm": 1.8843451738357544, |
|
"learning_rate": 2.7910932038184487e-07, |
|
"logits/chosen": -2.828174352645874, |
|
"logits/rejected": -2.8114073276519775, |
|
"logps/chosen": -0.8486431241035461, |
|
"logps/rejected": -1.0430662631988525, |
|
"loss": 0.9131, |
|
"odds_ratio_loss": 0.6441913843154907, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08486433327198029, |
|
"rewards/margins": 0.019442306831479073, |
|
"rewards/rejected": -0.10430662333965302, |
|
"sft_loss": 0.8486431241035461, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.5605690153367417, |
|
"grad_norm": 1.9914612770080566, |
|
"learning_rate": 2.5810582770057325e-07, |
|
"logits/chosen": -2.873000383377075, |
|
"logits/rejected": -2.902294635772705, |
|
"logps/chosen": -0.8076226115226746, |
|
"logps/rejected": -1.0467280149459839, |
|
"loss": 0.8714, |
|
"odds_ratio_loss": 0.6380000114440918, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08076226711273193, |
|
"rewards/margins": 0.023910541087388992, |
|
"rewards/rejected": -0.10467280447483063, |
|
"sft_loss": 0.8076226115226746, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.578350744609913, |
|
"grad_norm": 2.0563340187072754, |
|
"learning_rate": 2.3788079925484402e-07, |
|
"logits/chosen": -2.849846601486206, |
|
"logits/rejected": -2.8575031757354736, |
|
"logps/chosen": -0.8711313009262085, |
|
"logps/rejected": -0.9900692105293274, |
|
"loss": 0.9391, |
|
"odds_ratio_loss": 0.6800366044044495, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08711313456296921, |
|
"rewards/margins": 0.01189377624541521, |
|
"rewards/rejected": -0.09900690615177155, |
|
"sft_loss": 0.8711313009262085, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.596132473883085, |
|
"grad_norm": 2.145470142364502, |
|
"learning_rate": 2.1844125788342661e-07, |
|
"logits/chosen": -2.840655565261841, |
|
"logits/rejected": -2.859443187713623, |
|
"logps/chosen": -0.7942542433738708, |
|
"logps/rejected": -1.2094497680664062, |
|
"loss": 0.8548, |
|
"odds_ratio_loss": 0.6057204008102417, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07942543178796768, |
|
"rewards/margins": 0.04151954501867294, |
|
"rewards/rejected": -0.12094497680664062, |
|
"sft_loss": 0.7942542433738708, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.613914203156257, |
|
"grad_norm": 1.8150006532669067, |
|
"learning_rate": 1.9979395367644428e-07, |
|
"logits/chosen": -2.865077495574951, |
|
"logits/rejected": -2.8804574012756348, |
|
"logps/chosen": -0.8026032447814941, |
|
"logps/rejected": -1.044508457183838, |
|
"loss": 0.8624, |
|
"odds_ratio_loss": 0.5979448556900024, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08026032149791718, |
|
"rewards/margins": 0.024190524592995644, |
|
"rewards/rejected": -0.10445085912942886, |
|
"sft_loss": 0.8026032447814941, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.631695932429429, |
|
"grad_norm": 3.1679821014404297, |
|
"learning_rate": 1.81945361631512e-07, |
|
"logits/chosen": -2.8732008934020996, |
|
"logits/rejected": -2.8841676712036133, |
|
"logps/chosen": -0.8442559242248535, |
|
"logps/rejected": -1.0102087259292603, |
|
"loss": 0.9134, |
|
"odds_ratio_loss": 0.6910725235939026, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.08442559093236923, |
|
"rewards/margins": 0.016595274209976196, |
|
"rewards/rejected": -0.10102085769176483, |
|
"sft_loss": 0.8442559242248535, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.6494776617026004, |
|
"grad_norm": 2.5551412105560303, |
|
"learning_rate": 1.6490167940538343e-07, |
|
"logits/chosen": -2.8778164386749268, |
|
"logits/rejected": -2.8786826133728027, |
|
"logps/chosen": -0.8008125424385071, |
|
"logps/rejected": -1.0858347415924072, |
|
"loss": 0.8593, |
|
"odds_ratio_loss": 0.5846473574638367, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08008125424385071, |
|
"rewards/margins": 0.028502214699983597, |
|
"rewards/rejected": -0.1085834726691246, |
|
"sft_loss": 0.8008125424385071, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.6672593909757722, |
|
"grad_norm": 1.7907795906066895, |
|
"learning_rate": 1.4866882516191339e-07, |
|
"logits/chosen": -2.795592784881592, |
|
"logits/rejected": -2.8337063789367676, |
|
"logps/chosen": -0.8366915583610535, |
|
"logps/rejected": -1.0835788249969482, |
|
"loss": 0.9019, |
|
"odds_ratio_loss": 0.652554988861084, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08366917073726654, |
|
"rewards/margins": 0.0246887169778347, |
|
"rewards/rejected": -0.10835788398981094, |
|
"sft_loss": 0.8366915583610535, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.6672593909757722, |
|
"eval_logits/chosen": -2.8460819721221924, |
|
"eval_logits/rejected": -2.8720545768737793, |
|
"eval_logps/chosen": -0.8352006673812866, |
|
"eval_logps/rejected": -1.0736578702926636, |
|
"eval_loss": 0.8975116014480591, |
|
"eval_odds_ratio_loss": 0.6231085658073425, |
|
"eval_rewards/accuracies": 0.5899999737739563, |
|
"eval_rewards/chosen": -0.0835200697183609, |
|
"eval_rewards/margins": 0.023845719173550606, |
|
"eval_rewards/rejected": -0.10736579447984695, |
|
"eval_runtime": 189.1942, |
|
"eval_samples_per_second": 5.286, |
|
"eval_sft_loss": 0.8352006673812866, |
|
"eval_steps_per_second": 2.643, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.685041120248944, |
|
"grad_norm": 5.948815822601318, |
|
"learning_rate": 1.3325243551706057e-07, |
|
"logits/chosen": -2.823535442352295, |
|
"logits/rejected": -2.8570055961608887, |
|
"logps/chosen": -0.8226287961006165, |
|
"logps/rejected": -1.2481344938278198, |
|
"loss": 0.8823, |
|
"odds_ratio_loss": 0.5963335633277893, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08226287364959717, |
|
"rewards/margins": 0.04255058616399765, |
|
"rewards/rejected": -0.12481345981359482, |
|
"sft_loss": 0.8226287961006165, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.702822849522116, |
|
"grad_norm": 2.1956028938293457, |
|
"learning_rate": 1.1865786358165737e-07, |
|
"logits/chosen": -2.841465473175049, |
|
"logits/rejected": -2.881883382797241, |
|
"logps/chosen": -0.8048586845397949, |
|
"logps/rejected": -0.9951068162918091, |
|
"loss": 0.8691, |
|
"odds_ratio_loss": 0.6420662999153137, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08048586547374725, |
|
"rewards/margins": 0.019024807959794998, |
|
"rewards/rejected": -0.09951067715883255, |
|
"sft_loss": 0.8048586845397949, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.720604578795288, |
|
"grad_norm": 4.919430255889893, |
|
"learning_rate": 1.0489017710262311e-07, |
|
"logits/chosen": -2.8700149059295654, |
|
"logits/rejected": -2.9017858505249023, |
|
"logps/chosen": -0.8849034309387207, |
|
"logps/rejected": -1.1535307168960571, |
|
"loss": 0.9517, |
|
"odds_ratio_loss": 0.667965292930603, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08849034458398819, |
|
"rewards/margins": 0.026862725615501404, |
|
"rewards/rejected": -0.11535308510065079, |
|
"sft_loss": 0.8849034309387207, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.73838630806846, |
|
"grad_norm": 2.982793092727661, |
|
"learning_rate": 9.195415670326446e-08, |
|
"logits/chosen": -2.874678611755371, |
|
"logits/rejected": -2.887955904006958, |
|
"logps/chosen": -0.828321099281311, |
|
"logps/rejected": -1.0579955577850342, |
|
"loss": 0.8916, |
|
"odds_ratio_loss": 0.6329658031463623, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08283210545778275, |
|
"rewards/margins": 0.02296745963394642, |
|
"rewards/rejected": -0.10579957067966461, |
|
"sft_loss": 0.828321099281311, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.7561680373416317, |
|
"grad_norm": 2.7303407192230225, |
|
"learning_rate": 7.985429422327384e-08, |
|
"logits/chosen": -2.8821816444396973, |
|
"logits/rejected": -2.9013893604278564, |
|
"logps/chosen": -0.8389023542404175, |
|
"logps/rejected": -0.9915286302566528, |
|
"loss": 0.9067, |
|
"odds_ratio_loss": 0.6778423190116882, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08389024436473846, |
|
"rewards/margins": 0.015262606553733349, |
|
"rewards/rejected": -0.09915284812450409, |
|
"sft_loss": 0.8389023542404175, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.773949766614803, |
|
"grad_norm": 1.570169448852539, |
|
"learning_rate": 6.859479115900818e-08, |
|
"logits/chosen": -2.848161458969116, |
|
"logits/rejected": -2.8777124881744385, |
|
"logps/chosen": -0.8677851557731628, |
|
"logps/rejected": -1.1111342906951904, |
|
"loss": 0.9293, |
|
"odds_ratio_loss": 0.6150751709938049, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08677852153778076, |
|
"rewards/margins": 0.024334916844964027, |
|
"rewards/rejected": -0.11111342906951904, |
|
"sft_loss": 0.8677851557731628, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.791731495887975, |
|
"grad_norm": 6.0747294425964355, |
|
"learning_rate": 5.817955720457902e-08, |
|
"logits/chosen": -2.8411552906036377, |
|
"logits/rejected": -2.8624863624572754, |
|
"logps/chosen": -0.881393313407898, |
|
"logps/rejected": -1.1098216772079468, |
|
"loss": 0.9455, |
|
"odds_ratio_loss": 0.641205370426178, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08813934028148651, |
|
"rewards/margins": 0.02284284308552742, |
|
"rewards/rejected": -0.11098217964172363, |
|
"sft_loss": 0.881393313407898, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.809513225161147, |
|
"grad_norm": 3.2441201210021973, |
|
"learning_rate": 4.861220889427199e-08, |
|
"logits/chosen": -2.8530640602111816, |
|
"logits/rejected": -2.865347385406494, |
|
"logps/chosen": -0.8542373776435852, |
|
"logps/rejected": -1.0531483888626099, |
|
"loss": 0.9195, |
|
"odds_ratio_loss": 0.6527055501937866, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08542371541261673, |
|
"rewards/margins": 0.019891122356057167, |
|
"rewards/rejected": -0.10531485080718994, |
|
"sft_loss": 0.8542373776435852, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.827294954434319, |
|
"grad_norm": 2.0825612545013428, |
|
"learning_rate": 3.9896068346758074e-08, |
|
"logits/chosen": -2.8629543781280518, |
|
"logits/rejected": -2.887873649597168, |
|
"logps/chosen": -0.8457245826721191, |
|
"logps/rejected": -1.0121568441390991, |
|
"loss": 0.9115, |
|
"odds_ratio_loss": 0.6578742265701294, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0845724567770958, |
|
"rewards/margins": 0.016643229871988297, |
|
"rewards/rejected": -0.10121568292379379, |
|
"sft_loss": 0.8457245826721191, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.8450766837074903, |
|
"grad_norm": 5.224942207336426, |
|
"learning_rate": 3.203416211153832e-08, |
|
"logits/chosen": -2.8476452827453613, |
|
"logits/rejected": -2.893502712249756, |
|
"logps/chosen": -0.843380331993103, |
|
"logps/rejected": -1.0194734334945679, |
|
"loss": 0.9131, |
|
"odds_ratio_loss": 0.6969292163848877, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08433803915977478, |
|
"rewards/margins": 0.017609301954507828, |
|
"rewards/rejected": -0.1019473448395729, |
|
"sft_loss": 0.843380331993103, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.8628584129806622, |
|
"grad_norm": 2.344214677810669, |
|
"learning_rate": 2.5029220118019393e-08, |
|
"logits/chosen": -2.8379952907562256, |
|
"logits/rejected": -2.8665690422058105, |
|
"logps/chosen": -0.8621314167976379, |
|
"logps/rejected": -0.9904851913452148, |
|
"loss": 0.929, |
|
"odds_ratio_loss": 0.6684004664421082, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08621314913034439, |
|
"rewards/margins": 0.01283537782728672, |
|
"rewards/rejected": -0.09904851764440536, |
|
"sft_loss": 0.8621314167976379, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.880640142253834, |
|
"grad_norm": 3.038189649581909, |
|
"learning_rate": 1.8883674727586122e-08, |
|
"logits/chosen": -2.868648052215576, |
|
"logits/rejected": -2.876978635787964, |
|
"logps/chosen": -0.8205513954162598, |
|
"logps/rejected": -1.1327790021896362, |
|
"loss": 0.8816, |
|
"odds_ratio_loss": 0.6100881099700928, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.08205513656139374, |
|
"rewards/margins": 0.031222760677337646, |
|
"rewards/rejected": -0.11327788978815079, |
|
"sft_loss": 0.8205513954162598, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.898421871527006, |
|
"grad_norm": 1.2839456796646118, |
|
"learning_rate": 1.3599659889000639e-08, |
|
"logits/chosen": -2.8628990650177, |
|
"logits/rejected": -2.881967067718506, |
|
"logps/chosen": -0.7933380007743835, |
|
"logps/rejected": -0.9434422254562378, |
|
"loss": 0.8593, |
|
"odds_ratio_loss": 0.6599572896957397, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07933380454778671, |
|
"rewards/margins": 0.015010423958301544, |
|
"rewards/rejected": -0.09434423595666885, |
|
"sft_loss": 0.7933380007743835, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.916203600800178, |
|
"grad_norm": 1.2355436086654663, |
|
"learning_rate": 9.179010397421528e-09, |
|
"logits/chosen": -2.864149808883667, |
|
"logits/rejected": -2.8793797492980957, |
|
"logps/chosen": -0.8156276941299438, |
|
"logps/rejected": -1.0415749549865723, |
|
"loss": 0.8816, |
|
"odds_ratio_loss": 0.659904956817627, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08156277239322662, |
|
"rewards/margins": 0.022594723850488663, |
|
"rewards/rejected": -0.10415749251842499, |
|
"sft_loss": 0.8156276941299438, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.93398533007335, |
|
"grad_norm": 1.7059450149536133, |
|
"learning_rate": 5.623261257296509e-09, |
|
"logits/chosen": -2.8292007446289062, |
|
"logits/rejected": -2.865652561187744, |
|
"logps/chosen": -0.7724882364273071, |
|
"logps/rejected": -0.9592376947402954, |
|
"loss": 0.8358, |
|
"odds_ratio_loss": 0.6333457231521606, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07724882662296295, |
|
"rewards/margins": 0.01867493987083435, |
|
"rewards/rejected": -0.0959237664937973, |
|
"sft_loss": 0.7724882364273071, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.9517670593465217, |
|
"grad_norm": 2.2234692573547363, |
|
"learning_rate": 2.933647149357122e-09, |
|
"logits/chosen": -2.8548574447631836, |
|
"logits/rejected": -2.8638224601745605, |
|
"logps/chosen": -0.8041044473648071, |
|
"logps/rejected": -1.1135035753250122, |
|
"loss": 0.8679, |
|
"odds_ratio_loss": 0.6383166909217834, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0804104432463646, |
|
"rewards/margins": 0.030939901247620583, |
|
"rewards/rejected": -0.11135034263134003, |
|
"sft_loss": 0.8041044473648071, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.969548788619693, |
|
"grad_norm": 2.0396206378936768, |
|
"learning_rate": 1.1111020018930717e-09, |
|
"logits/chosen": -2.863762617111206, |
|
"logits/rejected": -2.8730015754699707, |
|
"logps/chosen": -0.8626953363418579, |
|
"logps/rejected": -0.9779960513114929, |
|
"loss": 0.9301, |
|
"odds_ratio_loss": 0.6735736131668091, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08626954257488251, |
|
"rewards/margins": 0.011530078016221523, |
|
"rewards/rejected": -0.09779961407184601, |
|
"sft_loss": 0.8626953363418579, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.987330517892865, |
|
"grad_norm": 1.7169232368469238, |
|
"learning_rate": 1.5625866646051813e-10, |
|
"logits/chosen": -2.8806686401367188, |
|
"logits/rejected": -2.874525547027588, |
|
"logps/chosen": -0.8173590898513794, |
|
"logps/rejected": -1.051163911819458, |
|
"loss": 0.8775, |
|
"odds_ratio_loss": 0.600918173789978, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08173591643571854, |
|
"rewards/margins": 0.0233804639428854, |
|
"rewards/rejected": -0.10511638224124908, |
|
"sft_loss": 0.8173590898513794, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.997999555456768, |
|
"step": 1686, |
|
"total_flos": 1.985199772705751e+18, |
|
"train_loss": 0.9645641791862949, |
|
"train_runtime": 17170.3123, |
|
"train_samples_per_second": 1.572, |
|
"train_steps_per_second": 0.098 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1686, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.985199772705751e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|