{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9992254066615027, "eval_steps": 100, "global_step": 363, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.3513513513513514e-08, "logits/chosen": -0.3176477253437042, "logits/rejected": -0.44033315777778625, "logps/chosen": -323.77838134765625, "logps/rejected": -252.17037963867188, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "learning_rate": 1.3513513513513515e-07, "logits/chosen": -0.30215251445770264, "logits/rejected": -0.5123822093009949, "logps/chosen": -364.54791259765625, "logps/rejected": -273.88922119140625, "loss": 0.6929, "rewards/accuracies": 0.4739583432674408, "rewards/chosen": 0.0046452307142317295, "rewards/margins": 0.006923990789800882, "rewards/rejected": -0.00227876054123044, "step": 10 }, { "epoch": 0.17, "learning_rate": 2.702702702702703e-07, "logits/chosen": -0.3361722528934479, "logits/rejected": -0.5208011865615845, "logps/chosen": -336.6085205078125, "logps/rejected": -258.9732666015625, "loss": 0.6931, "rewards/accuracies": 0.53125, "rewards/chosen": -1.764330045261886e-05, "rewards/margins": 0.0017105641309171915, "rewards/rejected": -0.0017282068729400635, "step": 20 }, { "epoch": 0.25, "learning_rate": 4.054054054054054e-07, "logits/chosen": -0.30692434310913086, "logits/rejected": -0.48451298475265503, "logps/chosen": -340.2192077636719, "logps/rejected": -277.30206298828125, "loss": 0.6942, "rewards/accuracies": 0.510937511920929, "rewards/chosen": -0.0009899890283122659, "rewards/margins": -0.0009086016798391938, "rewards/rejected": -8.13871156424284e-05, "step": 30 }, { "epoch": 0.33, "learning_rate": 4.95398773006135e-07, "logits/chosen": -0.35014405846595764, "logits/rejected": -0.48199597001075745, "logps/chosen": -325.4999084472656, "logps/rejected": -281.00787353515625, "loss": 0.6933, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -0.0008938731625676155, "rewards/margins": -0.00308123673312366, "rewards/rejected": 0.0021873635705560446, "step": 40 }, { "epoch": 0.41, "learning_rate": 4.800613496932515e-07, "logits/chosen": -0.29833748936653137, "logits/rejected": -0.4896470904350281, "logps/chosen": -335.3420715332031, "logps/rejected": -276.52862548828125, "loss": 0.6938, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -0.0022904174402356148, "rewards/margins": -0.005074300337582827, "rewards/rejected": 0.002783883363008499, "step": 50 }, { "epoch": 0.5, "learning_rate": 4.647239263803681e-07, "logits/chosen": -0.3576056659221649, "logits/rejected": -0.5076812505722046, "logps/chosen": -317.60040283203125, "logps/rejected": -272.95111083984375, "loss": 0.6924, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.0018081676680594683, "rewards/margins": 0.004018681589514017, "rewards/rejected": -0.002210513921454549, "step": 60 }, { "epoch": 0.58, "learning_rate": 4.4938650306748465e-07, "logits/chosen": -0.3065566420555115, "logits/rejected": -0.47715896368026733, "logps/chosen": -332.10443115234375, "logps/rejected": -271.3089904785156, "loss": 0.6938, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.0021473777014762163, "rewards/margins": 0.0010715832468122244, "rewards/rejected": 0.00107579433824867, "step": 70 }, { "epoch": 0.66, "learning_rate": 4.340490797546012e-07, "logits/chosen": -0.3575075566768646, "logits/rejected": -0.46308469772338867, "logps/chosen": -353.43768310546875, "logps/rejected": -296.3087463378906, "loss": 0.6927, "rewards/accuracies": 0.515625, "rewards/chosen": 0.00587086845189333, "rewards/margins": 0.002153487876057625, "rewards/rejected": 0.0037173808086663485, "step": 80 }, { "epoch": 0.74, "learning_rate": 4.187116564417178e-07, "logits/chosen": -0.3637954294681549, "logits/rejected": -0.4861740469932556, "logps/chosen": -337.003662109375, "logps/rejected": -293.85443115234375, "loss": 0.6929, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.0004090176953468472, "rewards/margins": -0.003162259701639414, "rewards/rejected": 0.003571277018636465, "step": 90 }, { "epoch": 0.83, "learning_rate": 4.0337423312883434e-07, "logits/chosen": -0.36683008074760437, "logits/rejected": -0.478945255279541, "logps/chosen": -318.0301208496094, "logps/rejected": -269.9043273925781, "loss": 0.6922, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0035112896002829075, "rewards/margins": 0.0036252178251743317, "rewards/rejected": -0.00011392822489142418, "step": 100 }, { "epoch": 0.91, "learning_rate": 3.880368098159509e-07, "logits/chosen": -0.33621570467948914, "logits/rejected": -0.4734673500061035, "logps/chosen": -353.40789794921875, "logps/rejected": -288.8216552734375, "loss": 0.6916, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.011151823215186596, "rewards/margins": 0.006945834495127201, "rewards/rejected": 0.0042059896513819695, "step": 110 }, { "epoch": 0.99, "learning_rate": 3.7269938650306747e-07, "logits/chosen": -0.32742369174957275, "logits/rejected": -0.46600741147994995, "logps/chosen": -333.48138427734375, "logps/rejected": -268.22503662109375, "loss": 0.6918, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.011352723464369774, "rewards/margins": 0.00880088098347187, "rewards/rejected": 0.002551841316744685, "step": 120 }, { "epoch": 1.0, "eval_logits/chosen": -0.17398348450660706, "eval_logits/rejected": -0.3204001486301422, "eval_logps/chosen": -360.14556884765625, "eval_logps/rejected": -288.76519775390625, "eval_loss": 0.69110107421875, "eval_rewards/accuracies": 0.5158730149269104, "eval_rewards/chosen": 0.011802121065557003, "eval_rewards/margins": 0.0090893330052495, "eval_rewards/rejected": 0.0027127889916300774, "eval_runtime": 155.9602, "eval_samples_per_second": 12.824, "eval_steps_per_second": 0.404, "step": 121 }, { "epoch": 1.07, "learning_rate": 3.5736196319018404e-07, "logits/chosen": -0.3686336576938629, "logits/rejected": -0.4647085666656494, "logps/chosen": -308.15850830078125, "logps/rejected": -267.60150146484375, "loss": 0.6913, "rewards/accuracies": 0.5, "rewards/chosen": 0.007929937914013863, "rewards/margins": 0.004114674404263496, "rewards/rejected": 0.003815263509750366, "step": 130 }, { "epoch": 1.16, "learning_rate": 3.420245398773006e-07, "logits/chosen": -0.3048830032348633, "logits/rejected": -0.46840834617614746, "logps/chosen": -331.2347717285156, "logps/rejected": -280.2388916015625, "loss": 0.6905, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.011773429811000824, "rewards/margins": 0.00874190591275692, "rewards/rejected": 0.0030315222684293985, "step": 140 }, { "epoch": 1.24, "learning_rate": 3.2668711656441716e-07, "logits/chosen": -0.33495235443115234, "logits/rejected": -0.49104562401771545, "logps/chosen": -340.7163391113281, "logps/rejected": -282.6747741699219, "loss": 0.6897, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.017182378098368645, "rewards/margins": 0.010856041684746742, "rewards/rejected": 0.006326337344944477, "step": 150 }, { "epoch": 1.32, "learning_rate": 3.1134969325153373e-07, "logits/chosen": -0.3351030945777893, "logits/rejected": -0.4738260805606842, "logps/chosen": -333.8556823730469, "logps/rejected": -291.29840087890625, "loss": 0.69, "rewards/accuracies": 0.515625, "rewards/chosen": 0.013335606083273888, "rewards/margins": 0.008105851709842682, "rewards/rejected": 0.005229754839092493, "step": 160 }, { "epoch": 1.4, "learning_rate": 2.960122699386503e-07, "logits/chosen": -0.39908671379089355, "logits/rejected": -0.5024284720420837, "logps/chosen": -315.42840576171875, "logps/rejected": -271.08343505859375, "loss": 0.6902, "rewards/accuracies": 0.546875, "rewards/chosen": 0.012735432013869286, "rewards/margins": 0.008142312988638878, "rewards/rejected": 0.004593119956552982, "step": 170 }, { "epoch": 1.49, "learning_rate": 2.8067484662576686e-07, "logits/chosen": -0.33173808455467224, "logits/rejected": -0.5215679407119751, "logps/chosen": -342.9329528808594, "logps/rejected": -273.66485595703125, "loss": 0.6903, "rewards/accuracies": 0.5328124761581421, "rewards/chosen": 0.010897275060415268, "rewards/margins": 0.004992068745195866, "rewards/rejected": 0.005905206315219402, "step": 180 }, { "epoch": 1.57, "learning_rate": 2.653374233128834e-07, "logits/chosen": -0.360731840133667, "logits/rejected": -0.4705289900302887, "logps/chosen": -338.09039306640625, "logps/rejected": -281.22674560546875, "loss": 0.6893, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.013345611281692982, "rewards/margins": 0.008156336843967438, "rewards/rejected": 0.005189274903386831, "step": 190 }, { "epoch": 1.65, "learning_rate": 2.5e-07, "logits/chosen": -0.271279901266098, "logits/rejected": -0.5268155932426453, "logps/chosen": -361.9761657714844, "logps/rejected": -279.6358642578125, "loss": 0.689, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.015021780505776405, "rewards/margins": 0.010343039408326149, "rewards/rejected": 0.004678742028772831, "step": 200 }, { "epoch": 1.74, "learning_rate": 2.3466257668711655e-07, "logits/chosen": -0.33011576533317566, "logits/rejected": -0.4637225270271301, "logps/chosen": -347.186767578125, "logps/rejected": -278.4908142089844, "loss": 0.6892, "rewards/accuracies": 0.5609375238418579, "rewards/chosen": 0.017149794846773148, "rewards/margins": 0.00933277327567339, "rewards/rejected": 0.007817023433744907, "step": 210 }, { "epoch": 1.82, "learning_rate": 2.1932515337423312e-07, "logits/chosen": -0.320119172334671, "logits/rejected": -0.4756544232368469, "logps/chosen": -337.0110778808594, "logps/rejected": -275.7445373535156, "loss": 0.6889, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.014787524938583374, "rewards/margins": 0.008823606185615063, "rewards/rejected": 0.005963918752968311, "step": 220 }, { "epoch": 1.9, "learning_rate": 2.0398773006134968e-07, "logits/chosen": -0.341126024723053, "logits/rejected": -0.5036292672157288, "logps/chosen": -342.09808349609375, "logps/rejected": -284.2479553222656, "loss": 0.688, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.018701931461691856, "rewards/margins": 0.013875585980713367, "rewards/rejected": 0.004826345480978489, "step": 230 }, { "epoch": 1.98, "learning_rate": 1.8865030674846625e-07, "logits/chosen": -0.3302518129348755, "logits/rejected": -0.5235751867294312, "logps/chosen": -336.75885009765625, "logps/rejected": -276.89056396484375, "loss": 0.6887, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.014634380117058754, "rewards/margins": 0.010870048776268959, "rewards/rejected": 0.003764331340789795, "step": 240 }, { "epoch": 2.0, "eval_logits/chosen": -0.1741456687450409, "eval_logits/rejected": -0.32032549381256104, "eval_logps/chosen": -360.0811767578125, "eval_logps/rejected": -288.72760009765625, "eval_loss": 0.6885652542114258, "eval_rewards/accuracies": 0.5436508059501648, "eval_rewards/chosen": 0.018233804032206535, "eval_rewards/margins": 0.011760968714952469, "eval_rewards/rejected": 0.006472836248576641, "eval_runtime": 155.5913, "eval_samples_per_second": 12.854, "eval_steps_per_second": 0.405, "step": 242 }, { "epoch": 2.07, "learning_rate": 1.733128834355828e-07, "logits/chosen": -0.338714063167572, "logits/rejected": -0.4940338730812073, "logps/chosen": -329.88330078125, "logps/rejected": -284.783447265625, "loss": 0.6882, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.018329061567783356, "rewards/margins": 0.010221143253147602, "rewards/rejected": 0.008107921108603477, "step": 250 }, { "epoch": 2.15, "learning_rate": 1.5797546012269938e-07, "logits/chosen": -0.336908757686615, "logits/rejected": -0.508610725402832, "logps/chosen": -355.3616027832031, "logps/rejected": -275.9775085449219, "loss": 0.6872, "rewards/accuracies": 0.5640624761581421, "rewards/chosen": 0.016039682552218437, "rewards/margins": 0.017992937937378883, "rewards/rejected": -0.001953254686668515, "step": 260 }, { "epoch": 2.23, "learning_rate": 1.4263803680981594e-07, "logits/chosen": -0.3601071536540985, "logits/rejected": -0.53021240234375, "logps/chosen": -337.66424560546875, "logps/rejected": -281.8548889160156, "loss": 0.6877, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.021220825612545013, "rewards/margins": 0.01665753871202469, "rewards/rejected": 0.004563285503536463, "step": 270 }, { "epoch": 2.31, "learning_rate": 1.273006134969325e-07, "logits/chosen": -0.2682925760746002, "logits/rejected": -0.43069085478782654, "logps/chosen": -344.897705078125, "logps/rejected": -284.98138427734375, "loss": 0.6876, "rewards/accuracies": 0.5640624761581421, "rewards/chosen": 0.01918674074113369, "rewards/margins": 0.009623361751437187, "rewards/rejected": 0.009563378989696503, "step": 280 }, { "epoch": 2.4, "learning_rate": 1.1196319018404908e-07, "logits/chosen": -0.3256112337112427, "logits/rejected": -0.4699035584926605, "logps/chosen": -340.5793151855469, "logps/rejected": -278.5997314453125, "loss": 0.688, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.019764618948101997, "rewards/margins": 0.012027645483613014, "rewards/rejected": 0.007736973464488983, "step": 290 }, { "epoch": 2.48, "learning_rate": 9.662576687116564e-08, "logits/chosen": -0.3254753351211548, "logits/rejected": -0.49168673157691956, "logps/chosen": -323.43658447265625, "logps/rejected": -268.68194580078125, "loss": 0.6872, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.024882303550839424, "rewards/margins": 0.01763671264052391, "rewards/rejected": 0.0072455937042832375, "step": 300 }, { "epoch": 2.56, "learning_rate": 8.12883435582822e-08, "logits/chosen": -0.2992364764213562, "logits/rejected": -0.5038896799087524, "logps/chosen": -336.90447998046875, "logps/rejected": -273.3828430175781, "loss": 0.6875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.019791873171925545, "rewards/margins": 0.015618661418557167, "rewards/rejected": 0.00417321128770709, "step": 310 }, { "epoch": 2.64, "learning_rate": 6.595092024539877e-08, "logits/chosen": -0.3277471959590912, "logits/rejected": -0.48526397347450256, "logps/chosen": -316.490966796875, "logps/rejected": -280.82403564453125, "loss": 0.6884, "rewards/accuracies": 0.573437511920929, "rewards/chosen": 0.020627859979867935, "rewards/margins": 0.014810837805271149, "rewards/rejected": 0.005817021708935499, "step": 320 }, { "epoch": 2.73, "learning_rate": 5.061349693251534e-08, "logits/chosen": -0.29638558626174927, "logits/rejected": -0.4925597310066223, "logps/chosen": -344.7980651855469, "logps/rejected": -279.2032775878906, "loss": 0.6873, "rewards/accuracies": 0.578125, "rewards/chosen": 0.024601539596915245, "rewards/margins": 0.014131123200058937, "rewards/rejected": 0.010470417328178883, "step": 330 }, { "epoch": 2.81, "learning_rate": 3.5276073619631896e-08, "logits/chosen": -0.3097667098045349, "logits/rejected": -0.4607706069946289, "logps/chosen": -333.1075134277344, "logps/rejected": -285.7237548828125, "loss": 0.687, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.02195788361132145, "rewards/margins": 0.015133949927985668, "rewards/rejected": 0.006823929958045483, "step": 340 }, { "epoch": 2.89, "learning_rate": 1.9938650306748464e-08, "logits/chosen": -0.3493804633617401, "logits/rejected": -0.45914044976234436, "logps/chosen": -327.6866455078125, "logps/rejected": -290.8825378417969, "loss": 0.6873, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.018699336796998978, "rewards/margins": 0.008175373077392578, "rewards/rejected": 0.010523964650928974, "step": 350 }, { "epoch": 2.97, "learning_rate": 4.601226993865031e-09, "logits/chosen": -0.3387224078178406, "logits/rejected": -0.477064847946167, "logps/chosen": -340.5426940917969, "logps/rejected": -276.86090087890625, "loss": 0.6865, "rewards/accuracies": 0.578125, "rewards/chosen": 0.023312732577323914, "rewards/margins": 0.014612337574362755, "rewards/rejected": 0.008700395002961159, "step": 360 }, { "epoch": 3.0, "eval_logits/chosen": -0.17340299487113953, "eval_logits/rejected": -0.31969568133354187, "eval_logps/chosen": -359.973876953125, "eval_logps/rejected": -288.69586181640625, "eval_loss": 0.6869306564331055, "eval_rewards/accuracies": 0.5833333134651184, "eval_rewards/chosen": 0.028966180980205536, "eval_rewards/margins": 0.01931903511285782, "eval_rewards/rejected": 0.009647144004702568, "eval_runtime": 155.81, "eval_samples_per_second": 12.836, "eval_steps_per_second": 0.404, "step": 363 }, { "epoch": 3.0, "step": 363, "total_flos": 0.0, "train_loss": 0.689942762707219, "train_runtime": 25419.9306, "train_samples_per_second": 7.313, "train_steps_per_second": 0.014 } ], "logging_steps": 10, "max_steps": 363, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }