|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9992254066615027, |
|
"eval_steps": 100, |
|
"global_step": 726, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.84931506849315e-09, |
|
"logits/chosen": -2.2289741039276123, |
|
"logits/rejected": -2.0226380825042725, |
|
"logps/chosen": -263.3438720703125, |
|
"logps/rejected": -201.271240234375, |
|
"loss": 25.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.84931506849315e-08, |
|
"logits/chosen": -2.266495943069458, |
|
"logits/rejected": -1.9388139247894287, |
|
"logps/chosen": -284.0570068359375, |
|
"logps/rejected": -217.82611083984375, |
|
"loss": 25.2097, |
|
"rewards/accuracies": 0.4253472089767456, |
|
"rewards/chosen": -0.0013144080294296145, |
|
"rewards/margins": -0.0012916413834318519, |
|
"rewards/rejected": -2.276669692946598e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.36986301369863e-07, |
|
"logits/chosen": -2.273313283920288, |
|
"logits/rejected": -1.9863513708114624, |
|
"logps/chosen": -269.41644287109375, |
|
"logps/rejected": -208.65896606445312, |
|
"loss": 25.3631, |
|
"rewards/accuracies": 0.504687488079071, |
|
"rewards/chosen": -0.00041677377885207534, |
|
"rewards/margins": -0.0017736803274601698, |
|
"rewards/rejected": 0.0013569066068157554, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.054794520547945e-07, |
|
"logits/chosen": -2.218705177307129, |
|
"logits/rejected": -1.993547797203064, |
|
"logps/chosen": -243.9556427001953, |
|
"logps/rejected": -199.10511779785156, |
|
"loss": 25.202, |
|
"rewards/accuracies": 0.5171874761581421, |
|
"rewards/chosen": -0.0021037233527749777, |
|
"rewards/margins": 0.0023984042927622795, |
|
"rewards/rejected": -0.004502127878367901, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.73972602739726e-07, |
|
"logits/chosen": -2.3098392486572266, |
|
"logits/rejected": -1.996872901916504, |
|
"logps/chosen": -274.44091796875, |
|
"logps/rejected": -208.118896484375, |
|
"loss": 25.1001, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 5.082110874354839e-05, |
|
"rewards/margins": 0.0012452874798327684, |
|
"rewards/rejected": -0.0011944664875045419, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.424657534246575e-07, |
|
"logits/chosen": -2.2532970905303955, |
|
"logits/rejected": -2.0162949562072754, |
|
"logps/chosen": -258.4920959472656, |
|
"logps/rejected": -215.529052734375, |
|
"loss": 24.7793, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0014612700324505568, |
|
"rewards/margins": 0.004128460772335529, |
|
"rewards/rejected": -0.0026671909727156162, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.10958904109589e-07, |
|
"logits/chosen": -2.2521817684173584, |
|
"logits/rejected": -1.9908069372177124, |
|
"logps/chosen": -257.59222412109375, |
|
"logps/rejected": -206.552490234375, |
|
"loss": 24.2711, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": 0.008318379521369934, |
|
"rewards/margins": 0.010114217177033424, |
|
"rewards/rejected": -0.0017958376556634903, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.794520547945205e-07, |
|
"logits/chosen": -2.2163357734680176, |
|
"logits/rejected": -1.997807264328003, |
|
"logps/chosen": -245.3988800048828, |
|
"logps/rejected": -206.3892364501953, |
|
"loss": 23.8848, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": 0.008774536661803722, |
|
"rewards/margins": 0.015018345788121223, |
|
"rewards/rejected": -0.006243808660656214, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.946401225114854e-07, |
|
"logits/chosen": -2.272542715072632, |
|
"logits/rejected": -1.9779363870620728, |
|
"logps/chosen": -256.14208984375, |
|
"logps/rejected": -210.4285125732422, |
|
"loss": 23.3744, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.011616826988756657, |
|
"rewards/margins": 0.023227987810969353, |
|
"rewards/rejected": -0.01161116175353527, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.869831546707504e-07, |
|
"logits/chosen": -2.310504913330078, |
|
"logits/rejected": -2.018115520477295, |
|
"logps/chosen": -262.4266052246094, |
|
"logps/rejected": -216.05191040039062, |
|
"loss": 22.5882, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.02013319917023182, |
|
"rewards/margins": 0.03405465558171272, |
|
"rewards/rejected": -0.013921457342803478, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.793261868300153e-07, |
|
"logits/chosen": -2.2590413093566895, |
|
"logits/rejected": -1.9913495779037476, |
|
"logps/chosen": -264.6045837402344, |
|
"logps/rejected": -219.0672149658203, |
|
"loss": 22.0702, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.02459399588406086, |
|
"rewards/margins": 0.04087246581912041, |
|
"rewards/rejected": -0.016278475522994995, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7166921898928023e-07, |
|
"logits/chosen": -2.2836508750915527, |
|
"logits/rejected": -2.039545774459839, |
|
"logps/chosen": -243.48867797851562, |
|
"logps/rejected": -214.2646942138672, |
|
"loss": 21.7548, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.020637672394514084, |
|
"rewards/margins": 0.043859176337718964, |
|
"rewards/rejected": -0.02322150394320488, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.640122511485451e-07, |
|
"logits/chosen": -2.2533435821533203, |
|
"logits/rejected": -2.0158801078796387, |
|
"logps/chosen": -247.85543823242188, |
|
"logps/rejected": -211.3623504638672, |
|
"loss": 21.608, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": 0.026100531220436096, |
|
"rewards/margins": 0.05109390616416931, |
|
"rewards/rejected": -0.024993373081088066, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.563552833078101e-07, |
|
"logits/chosen": -2.2582602500915527, |
|
"logits/rejected": -2.0190303325653076, |
|
"logps/chosen": -256.9596252441406, |
|
"logps/rejected": -217.3245849609375, |
|
"loss": 20.8686, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": 0.028709357604384422, |
|
"rewards/margins": 0.06462417542934418, |
|
"rewards/rejected": -0.035914815962314606, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4869831546707505e-07, |
|
"logits/chosen": -2.2754273414611816, |
|
"logits/rejected": -2.038130521774292, |
|
"logps/chosen": -261.1288757324219, |
|
"logps/rejected": -221.7465057373047, |
|
"loss": 20.9236, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.03735818341374397, |
|
"rewards/margins": 0.07491641491651535, |
|
"rewards/rejected": -0.037558235228061676, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4104134762633994e-07, |
|
"logits/chosen": -2.2817165851593018, |
|
"logits/rejected": -2.055285930633545, |
|
"logps/chosen": -257.0137939453125, |
|
"logps/rejected": -217.66183471679688, |
|
"loss": 20.6598, |
|
"rewards/accuracies": 0.6703125238418579, |
|
"rewards/chosen": 0.03143654763698578, |
|
"rewards/margins": 0.06876268237829208, |
|
"rewards/rejected": -0.0373261384665966, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.333843797856049e-07, |
|
"logits/chosen": -2.2860567569732666, |
|
"logits/rejected": -1.999734878540039, |
|
"logps/chosen": -270.0646057128906, |
|
"logps/rejected": -229.95010375976562, |
|
"loss": 19.9317, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": 0.04298175126314163, |
|
"rewards/margins": 0.08404376357793808, |
|
"rewards/rejected": -0.04106200858950615, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.257274119448698e-07, |
|
"logits/chosen": -2.3108818531036377, |
|
"logits/rejected": -2.0907673835754395, |
|
"logps/chosen": -261.53253173828125, |
|
"logps/rejected": -230.0071563720703, |
|
"loss": 20.2237, |
|
"rewards/accuracies": 0.6546875238418579, |
|
"rewards/chosen": 0.03258711099624634, |
|
"rewards/margins": 0.07823493331670761, |
|
"rewards/rejected": -0.04564782604575157, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.180704441041347e-07, |
|
"logits/chosen": -2.2532687187194824, |
|
"logits/rejected": -2.0309438705444336, |
|
"logps/chosen": -247.6223907470703, |
|
"logps/rejected": -216.98587036132812, |
|
"loss": 20.6143, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.027079517021775246, |
|
"rewards/margins": 0.08031971752643585, |
|
"rewards/rejected": -0.05324019119143486, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1041347626339966e-07, |
|
"logits/chosen": -2.290727138519287, |
|
"logits/rejected": -2.008113384246826, |
|
"logps/chosen": -252.6389617919922, |
|
"logps/rejected": -204.10769653320312, |
|
"loss": 19.923, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.02958676591515541, |
|
"rewards/margins": 0.09163785725831985, |
|
"rewards/rejected": -0.06205107644200325, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.027565084226646e-07, |
|
"logits/chosen": -2.278743267059326, |
|
"logits/rejected": -2.0434935092926025, |
|
"logps/chosen": -251.91677856445312, |
|
"logps/rejected": -208.04470825195312, |
|
"loss": 19.5106, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.03315151482820511, |
|
"rewards/margins": 0.09510111808776855, |
|
"rewards/rejected": -0.061949603259563446, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9509954058192954e-07, |
|
"logits/chosen": -2.288477897644043, |
|
"logits/rejected": -2.027742385864258, |
|
"logps/chosen": -274.6870422363281, |
|
"logps/rejected": -221.636962890625, |
|
"loss": 19.5079, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.03773919492959976, |
|
"rewards/margins": 0.10934200137853622, |
|
"rewards/rejected": -0.07160280644893646, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874425727411945e-07, |
|
"logits/chosen": -2.2594573497772217, |
|
"logits/rejected": -2.008882999420166, |
|
"logps/chosen": -267.6262512207031, |
|
"logps/rejected": -219.49728393554688, |
|
"loss": 19.6404, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": 0.04026947170495987, |
|
"rewards/margins": 0.10674212872982025, |
|
"rewards/rejected": -0.06647266447544098, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797856049004594e-07, |
|
"logits/chosen": -2.26505708694458, |
|
"logits/rejected": -2.060857057571411, |
|
"logps/chosen": -254.90005493164062, |
|
"logps/rejected": -218.54141235351562, |
|
"loss": 19.6698, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.035312727093696594, |
|
"rewards/margins": 0.0973130315542221, |
|
"rewards/rejected": -0.062000297009944916, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7212863705972436e-07, |
|
"logits/chosen": -2.2956652641296387, |
|
"logits/rejected": -2.047691822052002, |
|
"logps/chosen": -261.9791564941406, |
|
"logps/rejected": -212.89151000976562, |
|
"loss": 19.3937, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.035399921238422394, |
|
"rewards/margins": 0.10358913987874985, |
|
"rewards/rejected": -0.06818921864032745, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.0236928462982178, |
|
"eval_logits/rejected": -1.7961955070495605, |
|
"eval_logps/chosen": -255.23326110839844, |
|
"eval_logps/rejected": -211.76084899902344, |
|
"eval_loss": 19.3449764251709, |
|
"eval_rewards/accuracies": 0.7039999961853027, |
|
"eval_rewards/chosen": 0.029078969731926918, |
|
"eval_rewards/margins": 0.1019509881734848, |
|
"eval_rewards/rejected": -0.07287202030420303, |
|
"eval_runtime": 239.3505, |
|
"eval_samples_per_second": 8.356, |
|
"eval_steps_per_second": 0.522, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6447166921898925e-07, |
|
"logits/chosen": -2.2042205333709717, |
|
"logits/rejected": -2.014646053314209, |
|
"logps/chosen": -248.44058227539062, |
|
"logps/rejected": -200.34120178222656, |
|
"loss": 19.9018, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.03346983343362808, |
|
"rewards/margins": 0.10174000263214111, |
|
"rewards/rejected": -0.06827016919851303, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568147013782542e-07, |
|
"logits/chosen": -2.2492403984069824, |
|
"logits/rejected": -2.0223591327667236, |
|
"logps/chosen": -252.7912139892578, |
|
"logps/rejected": -216.12240600585938, |
|
"loss": 19.0687, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.03231377154588699, |
|
"rewards/margins": 0.10541899502277374, |
|
"rewards/rejected": -0.07310522347688675, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4915773353751913e-07, |
|
"logits/chosen": -2.26572847366333, |
|
"logits/rejected": -1.9836113452911377, |
|
"logps/chosen": -268.9698791503906, |
|
"logps/rejected": -225.75338745117188, |
|
"loss": 18.628, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": 0.04413260146975517, |
|
"rewards/margins": 0.1356559544801712, |
|
"rewards/rejected": -0.09152336418628693, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.41500765696784e-07, |
|
"logits/chosen": -2.270271062850952, |
|
"logits/rejected": -2.0418267250061035, |
|
"logps/chosen": -241.98959350585938, |
|
"logps/rejected": -215.8519287109375, |
|
"loss": 19.2251, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.03690015524625778, |
|
"rewards/margins": 0.11016629636287689, |
|
"rewards/rejected": -0.07326614856719971, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.33843797856049e-07, |
|
"logits/chosen": -2.2635691165924072, |
|
"logits/rejected": -2.0188465118408203, |
|
"logps/chosen": -273.9185791015625, |
|
"logps/rejected": -217.93167114257812, |
|
"loss": 18.6598, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.03699145466089249, |
|
"rewards/margins": 0.12567397952079773, |
|
"rewards/rejected": -0.08868252485990524, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2618683001531396e-07, |
|
"logits/chosen": -2.2175521850585938, |
|
"logits/rejected": -1.9899898767471313, |
|
"logps/chosen": -251.8654327392578, |
|
"logps/rejected": -217.54421997070312, |
|
"loss": 18.5271, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": 0.03108426369726658, |
|
"rewards/margins": 0.1172715276479721, |
|
"rewards/rejected": -0.08618726581335068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1852986217457885e-07, |
|
"logits/chosen": -2.287581205368042, |
|
"logits/rejected": -2.035792827606201, |
|
"logps/chosen": -250.79638671875, |
|
"logps/rejected": -218.07278442382812, |
|
"loss": 19.1524, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03225615620613098, |
|
"rewards/margins": 0.11478684097528458, |
|
"rewards/rejected": -0.082530677318573, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.108728943338438e-07, |
|
"logits/chosen": -2.2732996940612793, |
|
"logits/rejected": -2.085688829421997, |
|
"logps/chosen": -243.0460662841797, |
|
"logps/rejected": -227.0532989501953, |
|
"loss": 19.1273, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.02955133654177189, |
|
"rewards/margins": 0.10330448299646378, |
|
"rewards/rejected": -0.07375315576791763, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0321592649310873e-07, |
|
"logits/chosen": -2.2556710243225098, |
|
"logits/rejected": -1.9700673818588257, |
|
"logps/chosen": -243.98757934570312, |
|
"logps/rejected": -214.067138671875, |
|
"loss": 18.57, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": 0.03395627439022064, |
|
"rewards/margins": 0.1173306480050087, |
|
"rewards/rejected": -0.08337438106536865, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.955589586523736e-07, |
|
"logits/chosen": -2.285491466522217, |
|
"logits/rejected": -2.017014503479004, |
|
"logps/chosen": -256.3018493652344, |
|
"logps/rejected": -215.5684051513672, |
|
"loss": 19.3792, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.02712482213973999, |
|
"rewards/margins": 0.11975017935037613, |
|
"rewards/rejected": -0.09262534976005554, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8790199081163856e-07, |
|
"logits/chosen": -2.2827515602111816, |
|
"logits/rejected": -1.9982925653457642, |
|
"logps/chosen": -256.5948181152344, |
|
"logps/rejected": -214.377197265625, |
|
"loss": 19.2092, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": 0.02536213956773281, |
|
"rewards/margins": 0.10417892783880234, |
|
"rewards/rejected": -0.07881677150726318, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.802450229709035e-07, |
|
"logits/chosen": -2.226412296295166, |
|
"logits/rejected": -1.9456377029418945, |
|
"logps/chosen": -245.56753540039062, |
|
"logps/rejected": -202.68087768554688, |
|
"loss": 18.5533, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": 0.023562483489513397, |
|
"rewards/margins": 0.11015045642852783, |
|
"rewards/rejected": -0.08658796548843384, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.725880551301684e-07, |
|
"logits/chosen": -2.2886481285095215, |
|
"logits/rejected": -2.075536012649536, |
|
"logps/chosen": -265.2494812011719, |
|
"logps/rejected": -222.0449981689453, |
|
"loss": 19.4234, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.0262996107339859, |
|
"rewards/margins": 0.11352996528148651, |
|
"rewards/rejected": -0.08723036199808121, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.649310872894334e-07, |
|
"logits/chosen": -2.2954063415527344, |
|
"logits/rejected": -2.019110918045044, |
|
"logps/chosen": -265.41534423828125, |
|
"logps/rejected": -211.8571319580078, |
|
"loss": 18.7522, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.030247170478105545, |
|
"rewards/margins": 0.12689557671546936, |
|
"rewards/rejected": -0.09664840996265411, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.572741194486983e-07, |
|
"logits/chosen": -2.307234525680542, |
|
"logits/rejected": -1.9544427394866943, |
|
"logps/chosen": -263.83050537109375, |
|
"logps/rejected": -196.88644409179688, |
|
"loss": 18.7036, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.02584310993552208, |
|
"rewards/margins": 0.11804970353841782, |
|
"rewards/rejected": -0.09220659732818604, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.496171516079632e-07, |
|
"logits/chosen": -2.3390731811523438, |
|
"logits/rejected": -2.020245313644409, |
|
"logps/chosen": -283.559326171875, |
|
"logps/rejected": -228.0408477783203, |
|
"loss": 18.9635, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.03759358078241348, |
|
"rewards/margins": 0.13340887427330017, |
|
"rewards/rejected": -0.09581530094146729, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4196018376722816e-07, |
|
"logits/chosen": -2.25607967376709, |
|
"logits/rejected": -2.005556344985962, |
|
"logps/chosen": -268.84881591796875, |
|
"logps/rejected": -216.67996215820312, |
|
"loss": 19.2263, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.027539696544408798, |
|
"rewards/margins": 0.12173386663198471, |
|
"rewards/rejected": -0.09419417381286621, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.343032159264931e-07, |
|
"logits/chosen": -2.234701633453369, |
|
"logits/rejected": -1.9779908657073975, |
|
"logps/chosen": -259.31536865234375, |
|
"logps/rejected": -212.6099853515625, |
|
"loss": 18.7807, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": 0.03047511912882328, |
|
"rewards/margins": 0.13214388489723206, |
|
"rewards/rejected": -0.10166877508163452, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.26646248085758e-07, |
|
"logits/chosen": -2.231544017791748, |
|
"logits/rejected": -1.981184720993042, |
|
"logps/chosen": -249.824951171875, |
|
"logps/rejected": -205.8656005859375, |
|
"loss": 18.6868, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": 0.026928503066301346, |
|
"rewards/margins": 0.128057599067688, |
|
"rewards/rejected": -0.10112909972667694, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1898928024502298e-07, |
|
"logits/chosen": -2.2951433658599854, |
|
"logits/rejected": -2.026249408721924, |
|
"logps/chosen": -253.8539581298828, |
|
"logps/rejected": -210.1437225341797, |
|
"loss": 18.3582, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": 0.03310644254088402, |
|
"rewards/margins": 0.12848404049873352, |
|
"rewards/rejected": -0.09537758678197861, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.113323124042879e-07, |
|
"logits/chosen": -2.2722887992858887, |
|
"logits/rejected": -2.0053603649139404, |
|
"logps/chosen": -261.27166748046875, |
|
"logps/rejected": -219.5053253173828, |
|
"loss": 18.8048, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.034187205135822296, |
|
"rewards/margins": 0.1302899569272995, |
|
"rewards/rejected": -0.0961027592420578, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.036753445635528e-07, |
|
"logits/chosen": -2.279463052749634, |
|
"logits/rejected": -1.9979301691055298, |
|
"logps/chosen": -273.9200744628906, |
|
"logps/rejected": -225.6119842529297, |
|
"loss": 18.3929, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.0315621979534626, |
|
"rewards/margins": 0.1322147399187088, |
|
"rewards/rejected": -0.1006525382399559, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9601837672281775e-07, |
|
"logits/chosen": -2.261075735092163, |
|
"logits/rejected": -2.026048183441162, |
|
"logps/chosen": -260.4441223144531, |
|
"logps/rejected": -224.8106231689453, |
|
"loss": 18.4695, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": 0.025445517152547836, |
|
"rewards/margins": 0.12411808967590332, |
|
"rewards/rejected": -0.09867255389690399, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.883614088820827e-07, |
|
"logits/chosen": -2.296330213546753, |
|
"logits/rejected": -1.9999678134918213, |
|
"logps/chosen": -258.93145751953125, |
|
"logps/rejected": -212.75003051757812, |
|
"loss": 19.376, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.027268463745713234, |
|
"rewards/margins": 0.12198734283447266, |
|
"rewards/rejected": -0.09471887350082397, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.023204803466797, |
|
"eval_logits/rejected": -1.7954164743423462, |
|
"eval_logps/chosen": -255.2545623779297, |
|
"eval_logps/rejected": -211.98085021972656, |
|
"eval_loss": 18.819795608520508, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": 0.026950573548674583, |
|
"eval_rewards/margins": 0.12182173132896423, |
|
"eval_rewards/rejected": -0.0948711559176445, |
|
"eval_runtime": 239.4946, |
|
"eval_samples_per_second": 8.351, |
|
"eval_steps_per_second": 0.522, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.807044410413476e-07, |
|
"logits/chosen": -2.2521510124206543, |
|
"logits/rejected": -2.021901845932007, |
|
"logps/chosen": -248.35025024414062, |
|
"logps/rejected": -224.5894775390625, |
|
"loss": 18.8769, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": 0.026256313547492027, |
|
"rewards/margins": 0.1273646056652069, |
|
"rewards/rejected": -0.10110831260681152, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7304747320061255e-07, |
|
"logits/chosen": -2.2362194061279297, |
|
"logits/rejected": -2.0276010036468506, |
|
"logps/chosen": -253.75991821289062, |
|
"logps/rejected": -218.86349487304688, |
|
"loss": 18.6885, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.03784112259745598, |
|
"rewards/margins": 0.12961548566818237, |
|
"rewards/rejected": -0.0917743593454361, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6539050535987747e-07, |
|
"logits/chosen": -2.3277292251586914, |
|
"logits/rejected": -2.005606174468994, |
|
"logps/chosen": -260.1662902832031, |
|
"logps/rejected": -212.98428344726562, |
|
"loss": 18.2008, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.030466770753264427, |
|
"rewards/margins": 0.13587407767772675, |
|
"rewards/rejected": -0.10540731251239777, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5773353751914243e-07, |
|
"logits/chosen": -2.354032516479492, |
|
"logits/rejected": -2.0204906463623047, |
|
"logps/chosen": -275.2497863769531, |
|
"logps/rejected": -218.10595703125, |
|
"loss": 18.827, |
|
"rewards/accuracies": 0.7515624761581421, |
|
"rewards/chosen": 0.0328175388276577, |
|
"rewards/margins": 0.13892371952533722, |
|
"rewards/rejected": -0.10610618442296982, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5007656967840735e-07, |
|
"logits/chosen": -2.227428436279297, |
|
"logits/rejected": -1.9895546436309814, |
|
"logps/chosen": -244.3919677734375, |
|
"logps/rejected": -218.86886596679688, |
|
"loss": 18.3559, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": 0.027428537607192993, |
|
"rewards/margins": 0.13661186397075653, |
|
"rewards/rejected": -0.10918332636356354, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4241960183767226e-07, |
|
"logits/chosen": -2.2682414054870605, |
|
"logits/rejected": -1.9858787059783936, |
|
"logps/chosen": -272.84637451171875, |
|
"logps/rejected": -223.4131622314453, |
|
"loss": 18.6971, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.024813225492835045, |
|
"rewards/margins": 0.12149915844202042, |
|
"rewards/rejected": -0.09668593108654022, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.347626339969372e-07, |
|
"logits/chosen": -2.2588775157928467, |
|
"logits/rejected": -2.093949794769287, |
|
"logps/chosen": -261.2510986328125, |
|
"logps/rejected": -230.35330200195312, |
|
"loss": 18.6719, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": 0.034928444772958755, |
|
"rewards/margins": 0.12344489991664886, |
|
"rewards/rejected": -0.08851645886898041, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2710566615620215e-07, |
|
"logits/chosen": -2.2328484058380127, |
|
"logits/rejected": -1.9578449726104736, |
|
"logps/chosen": -264.450439453125, |
|
"logps/rejected": -212.9776611328125, |
|
"loss": 17.9566, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.026903945952653885, |
|
"rewards/margins": 0.13341760635375977, |
|
"rewards/rejected": -0.10651366412639618, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1944869831546706e-07, |
|
"logits/chosen": -2.237755060195923, |
|
"logits/rejected": -2.0300750732421875, |
|
"logps/chosen": -255.99853515625, |
|
"logps/rejected": -226.13626098632812, |
|
"loss": 18.5265, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": 0.023317929357290268, |
|
"rewards/margins": 0.12482543289661407, |
|
"rewards/rejected": -0.1015075072646141, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.11791730474732e-07, |
|
"logits/chosen": -2.257673740386963, |
|
"logits/rejected": -1.9982837438583374, |
|
"logps/chosen": -252.7151336669922, |
|
"logps/rejected": -213.78366088867188, |
|
"loss": 18.0724, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": 0.023391084745526314, |
|
"rewards/margins": 0.14269840717315674, |
|
"rewards/rejected": -0.11930731683969498, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0413476263399694e-07, |
|
"logits/chosen": -2.297306537628174, |
|
"logits/rejected": -2.012129783630371, |
|
"logps/chosen": -259.78497314453125, |
|
"logps/rejected": -206.7128448486328, |
|
"loss": 18.4955, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.017748359590768814, |
|
"rewards/margins": 0.12208724021911621, |
|
"rewards/rejected": -0.1043388843536377, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.647779479326186e-08, |
|
"logits/chosen": -2.2344954013824463, |
|
"logits/rejected": -2.0501608848571777, |
|
"logps/chosen": -240.86679077148438, |
|
"logps/rejected": -212.7677764892578, |
|
"loss": 18.9662, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.02210834063589573, |
|
"rewards/margins": 0.11001463979482651, |
|
"rewards/rejected": -0.08790630102157593, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.88208269525268e-08, |
|
"logits/chosen": -2.239448308944702, |
|
"logits/rejected": -1.992950201034546, |
|
"logps/chosen": -250.8835906982422, |
|
"logps/rejected": -211.41421508789062, |
|
"loss": 18.8806, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02492038905620575, |
|
"rewards/margins": 0.1194721907377243, |
|
"rewards/rejected": -0.09455180168151855, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.116385911179173e-08, |
|
"logits/chosen": -2.2440013885498047, |
|
"logits/rejected": -1.960422158241272, |
|
"logps/chosen": -270.11187744140625, |
|
"logps/rejected": -207.26058959960938, |
|
"loss": 18.2745, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.03377198800444603, |
|
"rewards/margins": 0.13419213891029358, |
|
"rewards/rejected": -0.10042013972997665, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.350689127105667e-08, |
|
"logits/chosen": -2.2937111854553223, |
|
"logits/rejected": -1.987168550491333, |
|
"logps/chosen": -262.86199951171875, |
|
"logps/rejected": -219.86508178710938, |
|
"loss": 18.1999, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.03628316521644592, |
|
"rewards/margins": 0.14258472621440887, |
|
"rewards/rejected": -0.10630156099796295, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.584992343032159e-08, |
|
"logits/chosen": -2.2060506343841553, |
|
"logits/rejected": -1.9665876626968384, |
|
"logps/chosen": -253.2500457763672, |
|
"logps/rejected": -216.85476684570312, |
|
"loss": 18.8148, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.03216860815882683, |
|
"rewards/margins": 0.12755930423736572, |
|
"rewards/rejected": -0.0953906923532486, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.819295558958652e-08, |
|
"logits/chosen": -2.2680506706237793, |
|
"logits/rejected": -1.9561970233917236, |
|
"logps/chosen": -277.43157958984375, |
|
"logps/rejected": -209.50244140625, |
|
"loss": 18.345, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.03760701045393944, |
|
"rewards/margins": 0.14586040377616882, |
|
"rewards/rejected": -0.10825341939926147, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.0535987748851455e-08, |
|
"logits/chosen": -2.3038992881774902, |
|
"logits/rejected": -2.04447078704834, |
|
"logps/chosen": -265.35980224609375, |
|
"logps/rejected": -219.22085571289062, |
|
"loss": 18.1755, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": 0.027750462293624878, |
|
"rewards/margins": 0.12442169338464737, |
|
"rewards/rejected": -0.09667123109102249, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.287901990811638e-08, |
|
"logits/chosen": -2.2642054557800293, |
|
"logits/rejected": -2.0056357383728027, |
|
"logps/chosen": -258.467041015625, |
|
"logps/rejected": -232.66946411132812, |
|
"loss": 18.3268, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.033985260874032974, |
|
"rewards/margins": 0.14256197214126587, |
|
"rewards/rejected": -0.1085767149925232, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.522205206738132e-08, |
|
"logits/chosen": -2.2641890048980713, |
|
"logits/rejected": -2.0118279457092285, |
|
"logps/chosen": -265.30517578125, |
|
"logps/rejected": -217.5371856689453, |
|
"loss": 18.4502, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": 0.027841120958328247, |
|
"rewards/margins": 0.12672238051891327, |
|
"rewards/rejected": -0.09888125956058502, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7565084226646246e-08, |
|
"logits/chosen": -2.2387919425964355, |
|
"logits/rejected": -2.0168097019195557, |
|
"logps/chosen": -254.23617553710938, |
|
"logps/rejected": -224.2781219482422, |
|
"loss": 18.6327, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": 0.03157157823443413, |
|
"rewards/margins": 0.13236665725708008, |
|
"rewards/rejected": -0.10079507529735565, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9908116385911178e-08, |
|
"logits/chosen": -2.2574362754821777, |
|
"logits/rejected": -2.012924909591675, |
|
"logps/chosen": -250.27371215820312, |
|
"logps/rejected": -215.03756713867188, |
|
"loss": 18.7916, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.023055683821439743, |
|
"rewards/margins": 0.11122564226388931, |
|
"rewards/rejected": -0.08816995471715927, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.225114854517611e-08, |
|
"logits/chosen": -2.309433937072754, |
|
"logits/rejected": -2.0074551105499268, |
|
"logps/chosen": -270.73040771484375, |
|
"logps/rejected": -209.7691192626953, |
|
"loss": 18.5426, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": 0.025719935074448586, |
|
"rewards/margins": 0.13181468844413757, |
|
"rewards/rejected": -0.10609474033117294, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.594180704441042e-09, |
|
"logits/chosen": -2.2386531829833984, |
|
"logits/rejected": -2.058807849884033, |
|
"logps/chosen": -248.2914581298828, |
|
"logps/rejected": -213.177490234375, |
|
"loss": 18.4503, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0301600880920887, |
|
"rewards/margins": 0.12142640352249146, |
|
"rewards/rejected": -0.09126633405685425, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.0242695808410645, |
|
"eval_logits/rejected": -1.7967454195022583, |
|
"eval_logps/chosen": -255.23193359375, |
|
"eval_logps/rejected": -212.03785705566406, |
|
"eval_loss": 18.33973503112793, |
|
"eval_rewards/accuracies": 0.7200000286102295, |
|
"eval_rewards/chosen": 0.029216337949037552, |
|
"eval_rewards/margins": 0.12978971004486084, |
|
"eval_rewards/rejected": -0.10057336091995239, |
|
"eval_runtime": 239.5813, |
|
"eval_samples_per_second": 8.348, |
|
"eval_steps_per_second": 0.522, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 726, |
|
"total_flos": 0.0, |
|
"train_loss": 19.76867720969124, |
|
"train_runtime": 32942.8373, |
|
"train_samples_per_second": 5.643, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 726, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|