{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 476, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01050420168067227, "grad_norm": 18.321366845625462, "learning_rate": 3.125e-08, "logits/chosen": -2.9222915172576904, "logits/rejected": -2.8865013122558594, "logps/chosen": -0.9845348596572876, "logps/rejected": -1.163271427154541, "loss": 1.6281, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -1.9690697193145752, "rewards/margins": 0.35747313499450684, "rewards/rejected": -2.326542854309082, "step": 5 }, { "epoch": 0.02100840336134454, "grad_norm": 17.6534655125861, "learning_rate": 6.25e-08, "logits/chosen": -2.9073705673217773, "logits/rejected": -2.8619837760925293, "logps/chosen": -0.9123918414115906, "logps/rejected": -1.1516292095184326, "loss": 1.5762, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8247836828231812, "rewards/margins": 0.47847509384155273, "rewards/rejected": -2.3032584190368652, "step": 10 }, { "epoch": 0.031512605042016806, "grad_norm": 19.44309460886479, "learning_rate": 9.375e-08, "logits/chosen": -2.939253807067871, "logits/rejected": -2.871269941329956, "logps/chosen": -0.9964561462402344, "logps/rejected": -1.157931923866272, "loss": 1.6292, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9929122924804688, "rewards/margins": 0.32295167446136475, "rewards/rejected": -2.315863847732544, "step": 15 }, { "epoch": 0.04201680672268908, "grad_norm": 23.00550320924175, "learning_rate": 1.25e-07, "logits/chosen": -2.8980793952941895, "logits/rejected": -2.8317883014678955, "logps/chosen": -1.0304123163223267, "logps/rejected": -1.2014151811599731, "loss": 1.598, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0608246326446533, "rewards/margins": 0.34200599789619446, "rewards/rejected": -2.4028303623199463, "step": 20 }, { "epoch": 0.052521008403361345, "grad_norm": 25.91201580448508, "learning_rate": 1.5625e-07, "logits/chosen": -2.89921236038208, "logits/rejected": -2.838594913482666, "logps/chosen": -0.9657201766967773, "logps/rejected": -1.170414686203003, "loss": 1.6399, "rewards/accuracies": 0.625, "rewards/chosen": -1.9314403533935547, "rewards/margins": 0.40938907861709595, "rewards/rejected": -2.340829372406006, "step": 25 }, { "epoch": 0.06302521008403361, "grad_norm": 19.053951631856187, "learning_rate": 1.875e-07, "logits/chosen": -2.915055513381958, "logits/rejected": -2.8307695388793945, "logps/chosen": -1.031659722328186, "logps/rejected": -1.2121422290802002, "loss": 1.5382, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.063319444656372, "rewards/margins": 0.3609650731086731, "rewards/rejected": -2.4242844581604004, "step": 30 }, { "epoch": 0.07352941176470588, "grad_norm": 22.225870405405676, "learning_rate": 2.1874999999999997e-07, "logits/chosen": -2.8420331478118896, "logits/rejected": -2.8062918186187744, "logps/chosen": -1.0356570482254028, "logps/rejected": -1.2093064785003662, "loss": 1.5637, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0713140964508057, "rewards/margins": 0.34729865193367004, "rewards/rejected": -2.4186129570007324, "step": 35 }, { "epoch": 0.08403361344537816, "grad_norm": 25.66800900270909, "learning_rate": 2.5e-07, "logits/chosen": -2.845728635787964, "logits/rejected": -2.8214545249938965, "logps/chosen": -1.0431854724884033, "logps/rejected": -1.3399583101272583, "loss": 1.5204, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0863709449768066, "rewards/margins": 0.5935453176498413, "rewards/rejected": -2.6799166202545166, "step": 40 }, { "epoch": 0.09453781512605042, "grad_norm": 18.254417500947117, "learning_rate": 2.8125e-07, "logits/chosen": -2.8101553916931152, "logits/rejected": -2.773531436920166, "logps/chosen": -1.061798334121704, "logps/rejected": -1.3759087324142456, "loss": 1.501, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.123596668243408, "rewards/margins": 0.6282207369804382, "rewards/rejected": -2.751817464828491, "step": 45 }, { "epoch": 0.10504201680672269, "grad_norm": 20.430861520566957, "learning_rate": 2.999838368626891e-07, "logits/chosen": -2.9204559326171875, "logits/rejected": -2.878157615661621, "logps/chosen": -1.0430495738983154, "logps/rejected": -1.2767090797424316, "loss": 1.5858, "rewards/accuracies": 0.625, "rewards/chosen": -2.086099147796631, "rewards/margins": 0.4673191010951996, "rewards/rejected": -2.5534181594848633, "step": 50 }, { "epoch": 0.11554621848739496, "grad_norm": 19.914448467924856, "learning_rate": 2.9980204156901854e-07, "logits/chosen": -2.7936322689056396, "logits/rejected": -2.7450051307678223, "logps/chosen": -1.1547470092773438, "logps/rejected": -1.436762809753418, "loss": 1.5254, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -2.3094940185546875, "rewards/margins": 0.5640314817428589, "rewards/rejected": -2.873525619506836, "step": 55 }, { "epoch": 0.12605042016806722, "grad_norm": 27.25108493191, "learning_rate": 2.994184927185504e-07, "logits/chosen": -2.8165132999420166, "logits/rejected": -2.765676736831665, "logps/chosen": -1.178091287612915, "logps/rejected": -1.3924609422683716, "loss": 1.5556, "rewards/accuracies": 0.625, "rewards/chosen": -2.35618257522583, "rewards/margins": 0.428739458322525, "rewards/rejected": -2.784921884536743, "step": 60 }, { "epoch": 0.13655462184873948, "grad_norm": 25.118665709906168, "learning_rate": 2.9883370687530456e-07, "logits/chosen": -2.8244755268096924, "logits/rejected": -2.7773241996765137, "logps/chosen": -1.1520100831985474, "logps/rejected": -1.447547197341919, "loss": 1.451, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3040201663970947, "rewards/margins": 0.5910741090774536, "rewards/rejected": -2.895094394683838, "step": 65 }, { "epoch": 0.14705882352941177, "grad_norm": 29.16487182636346, "learning_rate": 2.980484716295075e-07, "logits/chosen": -2.787673234939575, "logits/rejected": -2.726388692855835, "logps/chosen": -1.0457687377929688, "logps/rejected": -1.5030543804168701, "loss": 1.4511, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0915374755859375, "rewards/margins": 0.9145712852478027, "rewards/rejected": -3.0061087608337402, "step": 70 }, { "epoch": 0.15756302521008403, "grad_norm": 26.07757243320597, "learning_rate": 2.970638445368648e-07, "logits/chosen": -2.776176929473877, "logits/rejected": -2.7326908111572266, "logps/chosen": -1.0123913288116455, "logps/rejected": -1.404775619506836, "loss": 1.4303, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -2.024782657623291, "rewards/margins": 0.78476881980896, "rewards/rejected": -2.809551239013672, "step": 75 }, { "epoch": 0.16806722689075632, "grad_norm": 35.195975635749924, "learning_rate": 2.958811516942438e-07, "logits/chosen": -2.767622470855713, "logits/rejected": -2.7111330032348633, "logps/chosen": -1.1310784816741943, "logps/rejected": -1.712956428527832, "loss": 1.3445, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.2621569633483887, "rewards/margins": 1.1637558937072754, "rewards/rejected": -3.425912857055664, "step": 80 }, { "epoch": 0.17857142857142858, "grad_norm": 29.558096882428416, "learning_rate": 2.9450198595368514e-07, "logits/chosen": -2.7697668075561523, "logits/rejected": -2.7279648780822754, "logps/chosen": -1.150879979133606, "logps/rejected": -1.5715720653533936, "loss": 1.3627, "rewards/accuracies": 0.6875, "rewards/chosen": -2.301759958267212, "rewards/margins": 0.8413840532302856, "rewards/rejected": -3.143144130706787, "step": 85 }, { "epoch": 0.18907563025210083, "grad_norm": 31.18138106236945, "learning_rate": 2.929282047771477e-07, "logits/chosen": -2.696549892425537, "logits/rejected": -2.6848576068878174, "logps/chosen": -1.1329095363616943, "logps/rejected": -1.585242509841919, "loss": 1.3747, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2658190727233887, "rewards/margins": 0.9046661257743835, "rewards/rejected": -3.170485019683838, "step": 90 }, { "epoch": 0.19957983193277312, "grad_norm": 91.23116963300726, "learning_rate": 2.9116192773487665e-07, "logits/chosen": -2.682312488555908, "logits/rejected": -2.673649549484253, "logps/chosen": -1.3071677684783936, "logps/rejected": -1.7945388555526733, "loss": 1.4405, "rewards/accuracies": 0.71875, "rewards/chosen": -2.614335536956787, "rewards/margins": 0.9747417569160461, "rewards/rejected": -3.5890777111053467, "step": 95 }, { "epoch": 0.21008403361344538, "grad_norm": 38.910751298944, "learning_rate": 2.892055336507641e-07, "logits/chosen": -2.6822099685668945, "logits/rejected": -2.6384642124176025, "logps/chosen": -1.2206847667694092, "logps/rejected": -1.8117921352386475, "loss": 1.3468, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.4413695335388184, "rewards/margins": 1.1822149753570557, "rewards/rejected": -3.623584270477295, "step": 100 }, { "epoch": 0.22058823529411764, "grad_norm": 27.439545713989038, "learning_rate": 2.8706165739854637e-07, "logits/chosen": -2.684013605117798, "logits/rejected": -2.660853147506714, "logps/chosen": -1.1910176277160645, "logps/rejected": -1.6350994110107422, "loss": 1.3852, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.382035255432129, "rewards/margins": 0.8881640434265137, "rewards/rejected": -3.2701988220214844, "step": 105 }, { "epoch": 0.23109243697478993, "grad_norm": 29.807019016962876, "learning_rate": 2.847331863531529e-07, "logits/chosen": -2.6825053691864014, "logits/rejected": -2.6679558753967285, "logps/chosen": -1.1532232761383057, "logps/rejected": -1.7548431158065796, "loss": 1.2615, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.3064465522766113, "rewards/margins": 1.203240156173706, "rewards/rejected": -3.509686231613159, "step": 110 }, { "epoch": 0.2415966386554622, "grad_norm": 47.6414807939217, "learning_rate": 2.8222325650198677e-07, "logits/chosen": -2.676471471786499, "logits/rejected": -2.6575491428375244, "logps/chosen": -1.2915210723876953, "logps/rejected": -1.9804328680038452, "loss": 1.3405, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.5830421447753906, "rewards/margins": 1.377823829650879, "rewards/rejected": -3.9608657360076904, "step": 115 }, { "epoch": 0.25210084033613445, "grad_norm": 33.68771160542956, "learning_rate": 2.7953524822137317e-07, "logits/chosen": -2.6282732486724854, "logits/rejected": -2.6111860275268555, "logps/chosen": -1.2532024383544922, "logps/rejected": -2.1360292434692383, "loss": 1.2154, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.5064048767089844, "rewards/margins": 1.7656539678573608, "rewards/rejected": -4.272058486938477, "step": 120 }, { "epoch": 0.26260504201680673, "grad_norm": 36.94049761692212, "learning_rate": 2.766727817238648e-07, "logits/chosen": -2.625383138656616, "logits/rejected": -2.5985493659973145, "logps/chosen": -1.3159258365631104, "logps/rejected": -1.8669437170028687, "loss": 1.3794, "rewards/accuracies": 0.71875, "rewards/chosen": -2.6318516731262207, "rewards/margins": 1.1020352840423584, "rewards/rejected": -3.7338874340057373, "step": 125 }, { "epoch": 0.27310924369747897, "grad_norm": 44.2795876444211, "learning_rate": 2.7363971218253573e-07, "logits/chosen": -2.585216760635376, "logits/rejected": -2.5424036979675293, "logps/chosen": -1.410796046257019, "logps/rejected": -2.0416605472564697, "loss": 1.3051, "rewards/accuracies": 0.71875, "rewards/chosen": -2.821592092514038, "rewards/margins": 1.261729121208191, "rewards/rejected": -4.0833210945129395, "step": 130 }, { "epoch": 0.28361344537815125, "grad_norm": 41.62676495102148, "learning_rate": 2.7044012453882974e-07, "logits/chosen": -2.5913612842559814, "logits/rejected": -2.554213047027588, "logps/chosen": -1.5970208644866943, "logps/rejected": -2.28006649017334, "loss": 1.2034, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -3.1940417289733887, "rewards/margins": 1.3660913705825806, "rewards/rejected": -4.56013298034668, "step": 135 }, { "epoch": 0.29411764705882354, "grad_norm": 36.45682514602446, "learning_rate": 2.670783280009569e-07, "logits/chosen": -2.583467960357666, "logits/rejected": -2.563615083694458, "logps/chosen": -1.3852840662002563, "logps/rejected": -1.976252794265747, "loss": 1.2209, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -2.7705681324005127, "rewards/margins": 1.1819374561309814, "rewards/rejected": -3.952505588531494, "step": 140 }, { "epoch": 0.30462184873949577, "grad_norm": 32.90514134094626, "learning_rate": 2.635588502402468e-07, "logits/chosen": -2.6025681495666504, "logits/rejected": -2.5791728496551514, "logps/chosen": -1.444962978363037, "logps/rejected": -2.082648515701294, "loss": 1.2251, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.889925956726074, "rewards/margins": 1.2753708362579346, "rewards/rejected": -4.165297031402588, "step": 145 }, { "epoch": 0.31512605042016806, "grad_norm": 46.925189207028446, "learning_rate": 2.598864312932762e-07, "logits/chosen": -2.5708370208740234, "logits/rejected": -2.5425729751586914, "logps/chosen": -1.558255910873413, "logps/rejected": -2.360576629638672, "loss": 1.2404, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.116511821746826, "rewards/margins": 1.6046416759490967, "rewards/rejected": -4.721153259277344, "step": 150 }, { "epoch": 0.32563025210084034, "grad_norm": 44.68173396497493, "learning_rate": 2.560660171779821e-07, "logits/chosen": -2.5237948894500732, "logits/rejected": -2.5131349563598633, "logps/chosen": -1.7005817890167236, "logps/rejected": -2.477543592453003, "loss": 1.2383, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -3.4011635780334473, "rewards/margins": 1.5539240837097168, "rewards/rejected": -4.955087184906006, "step": 155 }, { "epoch": 0.33613445378151263, "grad_norm": 42.56897964236879, "learning_rate": 2.521027532323594e-07, "logits/chosen": -2.50708270072937, "logits/rejected": -2.4973719120025635, "logps/chosen": -1.5736862421035767, "logps/rejected": -2.4314279556274414, "loss": 1.2177, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.1473724842071533, "rewards/margins": 1.7154836654663086, "rewards/rejected": -4.862855911254883, "step": 160 }, { "epoch": 0.34663865546218486, "grad_norm": 42.67514136639567, "learning_rate": 2.480019771847139e-07, "logits/chosen": -2.4965438842773438, "logits/rejected": -2.5141289234161377, "logps/chosen": -1.6085281372070312, "logps/rejected": -2.5046117305755615, "loss": 1.1715, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.2170562744140625, "rewards/margins": 1.79216730594635, "rewards/rejected": -5.009223461151123, "step": 165 }, { "epoch": 0.35714285714285715, "grad_norm": 56.3843788509327, "learning_rate": 2.4376921196480405e-07, "logits/chosen": -2.4241461753845215, "logits/rejected": -2.4171204566955566, "logps/chosen": -1.8740981817245483, "logps/rejected": -2.842223644256592, "loss": 1.1553, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.7481963634490967, "rewards/margins": 1.9362504482269287, "rewards/rejected": -5.684447288513184, "step": 170 }, { "epoch": 0.36764705882352944, "grad_norm": 58.35243830598972, "learning_rate": 2.3941015826555265e-07, "logits/chosen": -2.433060646057129, "logits/rejected": -2.4348819255828857, "logps/chosen": -2.003147840499878, "logps/rejected": -2.907435894012451, "loss": 1.2262, "rewards/accuracies": 0.75, "rewards/chosen": -4.006295680999756, "rewards/margins": 1.808576226234436, "rewards/rejected": -5.814871788024902, "step": 175 }, { "epoch": 0.37815126050420167, "grad_norm": 62.00858329659252, "learning_rate": 2.3493068686534757e-07, "logits/chosen": -2.4191861152648926, "logits/rejected": -2.4209141731262207, "logps/chosen": -2.0410985946655273, "logps/rejected": -3.1209053993225098, "loss": 1.2189, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -4.082197189331055, "rewards/margins": 2.159613847732544, "rewards/rejected": -6.2418107986450195, "step": 180 }, { "epoch": 0.38865546218487396, "grad_norm": 52.62029016306216, "learning_rate": 2.3033683072127066e-07, "logits/chosen": -2.4004642963409424, "logits/rejected": -2.3723645210266113, "logps/chosen": -1.9122893810272217, "logps/rejected": -3.104297161102295, "loss": 1.1119, "rewards/accuracies": 0.78125, "rewards/chosen": -3.8245787620544434, "rewards/margins": 2.38401460647583, "rewards/rejected": -6.20859432220459, "step": 185 }, { "epoch": 0.39915966386554624, "grad_norm": 49.57165162916381, "learning_rate": 2.2563477684390454e-07, "logits/chosen": -2.394556999206543, "logits/rejected": -2.4077131748199463, "logps/chosen": -1.9445598125457764, "logps/rejected": -3.2773900032043457, "loss": 1.0746, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.8891196250915527, "rewards/margins": 2.6656596660614014, "rewards/rejected": -6.554780006408691, "step": 190 }, { "epoch": 0.4096638655462185, "grad_norm": 42.22482180826213, "learning_rate": 2.2083085796465976e-07, "logits/chosen": -2.3444042205810547, "logits/rejected": -2.3371148109436035, "logps/chosen": -2.0608248710632324, "logps/rejected": -2.9502105712890625, "loss": 1.1684, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -4.121649742126465, "rewards/margins": 1.7787716388702393, "rewards/rejected": -5.900421142578125, "step": 195 }, { "epoch": 0.42016806722689076, "grad_norm": 62.069592428442725, "learning_rate": 2.1593154400684523e-07, "logits/chosen": -2.3920085430145264, "logits/rejected": -2.3790066242218018, "logps/chosen": -2.172396183013916, "logps/rejected": -3.3875110149383545, "loss": 1.1134, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -4.344792366027832, "rewards/margins": 2.430229663848877, "rewards/rejected": -6.775022029876709, "step": 200 }, { "epoch": 0.43067226890756305, "grad_norm": 63.80548454611886, "learning_rate": 2.1094343337196797e-07, "logits/chosen": -2.2799956798553467, "logits/rejected": -2.3044838905334473, "logps/chosen": -2.1241445541381836, "logps/rejected": -3.2871341705322266, "loss": 1.074, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.248289108276367, "rewards/margins": 2.325979471206665, "rewards/rejected": -6.574268341064453, "step": 205 }, { "epoch": 0.4411764705882353, "grad_norm": 60.76644197865358, "learning_rate": 2.058732440529989e-07, "logits/chosen": -2.369267225265503, "logits/rejected": -2.3428282737731934, "logps/chosen": -2.2345564365386963, "logps/rejected": -3.428501844406128, "loss": 1.0777, "rewards/accuracies": 0.78125, "rewards/chosen": -4.469112873077393, "rewards/margins": 2.3878910541534424, "rewards/rejected": -6.857003688812256, "step": 210 }, { "epoch": 0.45168067226890757, "grad_norm": 49.5591416904311, "learning_rate": 2.0072780458657222e-07, "logits/chosen": -2.3571441173553467, "logits/rejected": -2.3563666343688965, "logps/chosen": -2.1674928665161133, "logps/rejected": -3.2230000495910645, "loss": 1.0862, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -4.334985733032227, "rewards/margins": 2.1110141277313232, "rewards/rejected": -6.446000099182129, "step": 215 }, { "epoch": 0.46218487394957986, "grad_norm": 53.25790647881489, "learning_rate": 1.9551404485630487e-07, "logits/chosen": -2.3252339363098145, "logits/rejected": -2.3368701934814453, "logps/chosen": -2.3293991088867188, "logps/rejected": -3.515172243118286, "loss": 1.113, "rewards/accuracies": 0.78125, "rewards/chosen": -4.6587982177734375, "rewards/margins": 2.371546506881714, "rewards/rejected": -7.030344486236572, "step": 220 }, { "epoch": 0.4726890756302521, "grad_norm": 107.94133477979558, "learning_rate": 1.9023898675962123e-07, "logits/chosen": -2.2349350452423096, "logits/rejected": -2.270430088043213, "logps/chosen": -2.319396495819092, "logps/rejected": -3.6063385009765625, "loss": 1.0598, "rewards/accuracies": 0.78125, "rewards/chosen": -4.638792991638184, "rewards/margins": 2.573883533477783, "rewards/rejected": -7.212677001953125, "step": 225 }, { "epoch": 0.4831932773109244, "grad_norm": 51.80093777317445, "learning_rate": 1.8490973475065407e-07, "logits/chosen": -2.2946877479553223, "logits/rejected": -2.2905642986297607, "logps/chosen": -2.3950748443603516, "logps/rejected": -3.634678602218628, "loss": 1.0982, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -4.790149688720703, "rewards/margins": 2.4792075157165527, "rewards/rejected": -7.269357204437256, "step": 230 }, { "epoch": 0.49369747899159666, "grad_norm": 72.76258850252798, "learning_rate": 1.795334662719576e-07, "logits/chosen": -2.278480052947998, "logits/rejected": -2.299923896789551, "logps/chosen": -2.357292652130127, "logps/rejected": -3.7696902751922607, "loss": 1.0057, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -4.714585304260254, "rewards/margins": 2.8247950077056885, "rewards/rejected": -7.5393805503845215, "step": 235 }, { "epoch": 0.5042016806722689, "grad_norm": 64.28632501194514, "learning_rate": 1.7411742208792024e-07, "logits/chosen": -2.2843871116638184, "logits/rejected": -2.300901412963867, "logps/chosen": -2.508634090423584, "logps/rejected": -3.8370189666748047, "loss": 1.033, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -5.017268180847168, "rewards/margins": 2.6567699909210205, "rewards/rejected": -7.674037933349609, "step": 240 }, { "epoch": 0.5147058823529411, "grad_norm": 56.78201656922531, "learning_rate": 1.686688965328944e-07, "logits/chosen": -2.2179243564605713, "logits/rejected": -2.2388010025024414, "logps/chosen": -2.3462517261505127, "logps/rejected": -3.506201982498169, "loss": 0.9703, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -4.692503452301025, "rewards/margins": 2.3199009895324707, "rewards/rejected": -7.012403964996338, "step": 245 }, { "epoch": 0.5252100840336135, "grad_norm": 66.31368878059381, "learning_rate": 1.6319522768717944e-07, "logits/chosen": -2.254875421524048, "logits/rejected": -2.2779059410095215, "logps/chosen": -2.398496150970459, "logps/rejected": -3.7779440879821777, "loss": 1.0355, "rewards/accuracies": 0.8125, "rewards/chosen": -4.796992301940918, "rewards/margins": 2.758897542953491, "rewards/rejected": -7.5558881759643555, "step": 250 }, { "epoch": 0.5357142857142857, "grad_norm": 56.3335721813079, "learning_rate": 1.5770378749408654e-07, "logits/chosen": -2.2989799976348877, "logits/rejected": -2.2941720485687256, "logps/chosen": -2.581568479537964, "logps/rejected": -3.853482723236084, "loss": 1.0114, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -5.163136959075928, "rewards/margins": 2.5438289642333984, "rewards/rejected": -7.706965446472168, "step": 255 }, { "epoch": 0.5462184873949579, "grad_norm": 64.04241236117856, "learning_rate": 1.522019718313975e-07, "logits/chosen": -2.2507102489471436, "logits/rejected": -2.272916316986084, "logps/chosen": -2.6012022495269775, "logps/rejected": -4.0311384201049805, "loss": 0.992, "rewards/accuracies": 0.8125, "rewards/chosen": -5.202404499053955, "rewards/margins": 2.859873056411743, "rewards/rejected": -8.062276840209961, "step": 260 }, { "epoch": 0.5567226890756303, "grad_norm": 59.88114738443522, "learning_rate": 1.4669719055058805e-07, "logits/chosen": -2.2266743183135986, "logits/rejected": -2.2351810932159424, "logps/chosen": -2.7907989025115967, "logps/rejected": -3.9706473350524902, "loss": 1.0608, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -5.581597805023193, "rewards/margins": 2.35969614982605, "rewards/rejected": -7.9412946701049805, "step": 265 }, { "epoch": 0.5672268907563025, "grad_norm": 63.37030995368488, "learning_rate": 1.411968574972317e-07, "logits/chosen": -2.230888843536377, "logits/rejected": -2.2535951137542725, "logps/chosen": -2.7027249336242676, "logps/rejected": -4.1824774742126465, "loss": 0.8988, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -5.405449867248535, "rewards/margins": 2.9595046043395996, "rewards/rejected": -8.364954948425293, "step": 270 }, { "epoch": 0.5777310924369747, "grad_norm": 69.41737055216304, "learning_rate": 1.357083805260243e-07, "logits/chosen": -2.2285051345825195, "logits/rejected": -2.2328968048095703, "logps/chosen": -2.7089076042175293, "logps/rejected": -3.9290478229522705, "loss": 0.969, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -5.417815208435059, "rewards/margins": 2.440279483795166, "rewards/rejected": -7.858095645904541, "step": 275 }, { "epoch": 0.5882352941176471, "grad_norm": 63.48615863862009, "learning_rate": 1.302391515238772e-07, "logits/chosen": -2.2015397548675537, "logits/rejected": -2.2215192317962646, "logps/chosen": -2.722857713699341, "logps/rejected": -4.155056953430176, "loss": 0.9593, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -5.445715427398682, "rewards/margins": 2.86439847946167, "rewards/rejected": -8.310113906860352, "step": 280 }, { "epoch": 0.5987394957983193, "grad_norm": 87.6726372411929, "learning_rate": 1.247965364545152e-07, "logits/chosen": -2.1690385341644287, "logits/rejected": -2.1941065788269043, "logps/chosen": -2.697335720062256, "logps/rejected": -4.129209995269775, "loss": 1.0182, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -5.394671440124512, "rewards/margins": 2.8637471199035645, "rewards/rejected": -8.25841999053955, "step": 285 }, { "epoch": 0.6092436974789915, "grad_norm": 54.49746884782157, "learning_rate": 1.193878654379889e-07, "logits/chosen": -2.1245057582855225, "logits/rejected": -2.1610589027404785, "logps/chosen": -2.6949501037597656, "logps/rejected": -4.0747246742248535, "loss": 1.0182, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -5.389900207519531, "rewards/margins": 2.759549617767334, "rewards/rejected": -8.149449348449707, "step": 290 }, { "epoch": 0.6197478991596639, "grad_norm": 49.136356343546524, "learning_rate": 1.1402042287846068e-07, "logits/chosen": -2.1676132678985596, "logits/rejected": -2.1930439472198486, "logps/chosen": -2.85373592376709, "logps/rejected": -4.212955951690674, "loss": 1.0398, "rewards/accuracies": 0.78125, "rewards/chosen": -5.70747184753418, "rewards/margins": 2.7184391021728516, "rewards/rejected": -8.425911903381348, "step": 295 }, { "epoch": 0.6302521008403361, "grad_norm": 56.2186810691314, "learning_rate": 1.0870143765356105e-07, "logits/chosen": -2.1709885597229004, "logits/rejected": -2.1842150688171387, "logps/chosen": -2.9935240745544434, "logps/rejected": -4.36973762512207, "loss": 1.0064, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -5.987048149108887, "rewards/margins": 2.7524266242980957, "rewards/rejected": -8.73947525024414, "step": 300 }, { "epoch": 0.6407563025210085, "grad_norm": 74.55055606717697, "learning_rate": 1.0343807337852794e-07, "logits/chosen": -2.1351749897003174, "logits/rejected": -2.1373703479766846, "logps/chosen": -2.965303897857666, "logps/rejected": -4.419961929321289, "loss": 1.0268, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -5.930607795715332, "rewards/margins": 2.9093151092529297, "rewards/rejected": -8.839923858642578, "step": 305 }, { "epoch": 0.6512605042016807, "grad_norm": 53.97579171817796, "learning_rate": 9.82374187582421e-08, "logits/chosen": -2.1092991828918457, "logits/rejected": -2.133781909942627, "logps/chosen": -2.9700093269348145, "logps/rejected": -4.346618175506592, "loss": 0.9648, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -5.940018653869629, "rewards/margins": 2.753218650817871, "rewards/rejected": -8.693236351013184, "step": 310 }, { "epoch": 0.6617647058823529, "grad_norm": 66.28146153490614, "learning_rate": 9.310647804015124e-08, "logits/chosen": -2.133643627166748, "logits/rejected": -2.160266637802124, "logps/chosen": -2.9957821369171143, "logps/rejected": -4.556756973266602, "loss": 0.937, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -5.9915642738342285, "rewards/margins": 3.1219494342803955, "rewards/rejected": -9.113513946533203, "step": 315 }, { "epoch": 0.6722689075630253, "grad_norm": 49.303213418937055, "learning_rate": 8.805216158094177e-08, "logits/chosen": -2.076920986175537, "logits/rejected": -2.103963851928711, "logps/chosen": -2.907010555267334, "logps/rejected": -4.666647911071777, "loss": 0.9387, "rewards/accuracies": 0.8125, "rewards/chosen": -5.814021110534668, "rewards/margins": 3.5192761421203613, "rewards/rejected": -9.333295822143555, "step": 320 }, { "epoch": 0.6827731092436975, "grad_norm": 67.32319494946066, "learning_rate": 8.308127653966262e-08, "logits/chosen": -2.0415196418762207, "logits/rejected": -2.0577666759490967, "logps/chosen": -3.1487503051757812, "logps/rejected": -4.704668045043945, "loss": 0.9346, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.2975006103515625, "rewards/margins": 3.111835241317749, "rewards/rejected": -9.40933609008789, "step": 325 }, { "epoch": 0.6932773109243697, "grad_norm": 60.93426199203996, "learning_rate": 7.820051770983612e-08, "logits/chosen": -2.0549426078796387, "logits/rejected": -2.080475330352783, "logps/chosen": -3.1458420753479004, "logps/rejected": -4.8635969161987305, "loss": 0.966, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -6.291684150695801, "rewards/margins": 3.4355111122131348, "rewards/rejected": -9.727193832397461, "step": 330 }, { "epoch": 0.7037815126050421, "grad_norm": 72.28419657503075, "learning_rate": 7.341645850290216e-08, "logits/chosen": -2.1288955211639404, "logits/rejected": -2.1594443321228027, "logps/chosen": -3.1346468925476074, "logps/rejected": -4.768304347991943, "loss": 1.019, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -6.269293785095215, "rewards/margins": 3.267315626144409, "rewards/rejected": -9.536608695983887, "step": 335 }, { "epoch": 0.7142857142857143, "grad_norm": 60.72644174180833, "learning_rate": 6.873554209514085e-08, "logits/chosen": -2.0705599784851074, "logits/rejected": -2.0726349353790283, "logps/chosen": -2.935683488845825, "logps/rejected": -4.3867692947387695, "loss": 0.9702, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -5.87136697769165, "rewards/margins": 2.9021708965301514, "rewards/rejected": -8.773538589477539, "step": 340 }, { "epoch": 0.7247899159663865, "grad_norm": 52.6099555735741, "learning_rate": 6.416407274999497e-08, "logits/chosen": -2.113405227661133, "logits/rejected": -2.1457953453063965, "logps/chosen": -3.0049102306365967, "logps/rejected": -4.615386962890625, "loss": 0.9687, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -6.009820461273193, "rewards/margins": 3.2209534645080566, "rewards/rejected": -9.23077392578125, "step": 345 }, { "epoch": 0.7352941176470589, "grad_norm": 69.6143506053754, "learning_rate": 5.970820732748143e-08, "logits/chosen": -2.145555257797241, "logits/rejected": -2.155163288116455, "logps/chosen": -2.938427209854126, "logps/rejected": -4.6191511154174805, "loss": 0.878, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -5.876854419708252, "rewards/margins": 3.3614463806152344, "rewards/rejected": -9.238302230834961, "step": 350 }, { "epoch": 0.7457983193277311, "grad_norm": 70.71307640111154, "learning_rate": 5.537394699212498e-08, "logits/chosen": -2.1382346153259277, "logits/rejected": -2.163740634918213, "logps/chosen": -2.980686664581299, "logps/rejected": -4.480741500854492, "loss": 0.9898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.961373329162598, "rewards/margins": 3.0001087188720703, "rewards/rejected": -8.961483001708984, "step": 355 }, { "epoch": 0.7563025210084033, "grad_norm": 73.19945321147338, "learning_rate": 5.1167129130583346e-08, "logits/chosen": -2.109528064727783, "logits/rejected": -2.1514618396759033, "logps/chosen": -2.996703624725342, "logps/rejected": -4.683353900909424, "loss": 1.0311, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -5.993407249450684, "rewards/margins": 3.373300075531006, "rewards/rejected": -9.366707801818848, "step": 360 }, { "epoch": 0.7668067226890757, "grad_norm": 70.68128938841156, "learning_rate": 4.709341948984809e-08, "logits/chosen": -2.0933072566986084, "logits/rejected": -2.1408255100250244, "logps/chosen": -2.9475154876708984, "logps/rejected": -4.628712177276611, "loss": 1.0051, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -5.895030975341797, "rewards/margins": 3.3623931407928467, "rewards/rejected": -9.257424354553223, "step": 365 }, { "epoch": 0.7773109243697479, "grad_norm": 64.71452548748283, "learning_rate": 4.315830454661059e-08, "logits/chosen": -2.086402654647827, "logits/rejected": -2.1012749671936035, "logps/chosen": -2.9121134281158447, "logps/rejected": -4.349917888641357, "loss": 0.9727, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -5.8242268562316895, "rewards/margins": 2.8756089210510254, "rewards/rejected": -8.699835777282715, "step": 370 }, { "epoch": 0.7878151260504201, "grad_norm": 71.60834624596436, "learning_rate": 3.936708411806887e-08, "logits/chosen": -2.124846935272217, "logits/rejected": -2.1803550720214844, "logps/chosen": -2.9349002838134766, "logps/rejected": -4.718347549438477, "loss": 0.9764, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -5.869800567626953, "rewards/margins": 3.566895008087158, "rewards/rejected": -9.436695098876953, "step": 375 }, { "epoch": 0.7983193277310925, "grad_norm": 55.835007766843376, "learning_rate": 3.572486422412786e-08, "logits/chosen": -2.104611873626709, "logits/rejected": -2.1398825645446777, "logps/chosen": -2.874159336090088, "logps/rejected": -4.522528648376465, "loss": 0.9513, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -5.748318672180176, "rewards/margins": 3.296739101409912, "rewards/rejected": -9.04505729675293, "step": 380 }, { "epoch": 0.8088235294117647, "grad_norm": 54.54718274731096, "learning_rate": 3.2236550210606293e-08, "logits/chosen": -2.13325834274292, "logits/rejected": -2.1514346599578857, "logps/chosen": -2.728529691696167, "logps/rejected": -4.492846488952637, "loss": 0.9402, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.457059383392334, "rewards/margins": 3.5286338329315186, "rewards/rejected": -8.985692977905273, "step": 385 }, { "epoch": 0.819327731092437, "grad_norm": 64.73590798684994, "learning_rate": 2.8906840142711338e-08, "logits/chosen": -2.0870397090911865, "logits/rejected": -2.1221370697021484, "logps/chosen": -2.9295685291290283, "logps/rejected": -4.712892055511475, "loss": 0.9203, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -5.859137058258057, "rewards/margins": 3.5666465759277344, "rewards/rejected": -9.42578411102295, "step": 390 }, { "epoch": 0.8298319327731093, "grad_norm": 56.24812000405815, "learning_rate": 2.5740218477679143e-08, "logits/chosen": -2.076784610748291, "logits/rejected": -2.0827224254608154, "logps/chosen": -2.910884141921997, "logps/rejected": -4.398539066314697, "loss": 0.8926, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -5.821768283843994, "rewards/margins": 2.975309371948242, "rewards/rejected": -8.797078132629395, "step": 395 }, { "epoch": 0.8403361344537815, "grad_norm": 65.02327391971039, "learning_rate": 2.2740950025102763e-08, "logits/chosen": -2.0536999702453613, "logits/rejected": -2.058232545852661, "logps/chosen": -3.009183883666992, "logps/rejected": -4.569349765777588, "loss": 0.9758, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -6.018367767333984, "rewards/margins": 3.1203320026397705, "rewards/rejected": -9.138699531555176, "step": 400 }, { "epoch": 0.8508403361344538, "grad_norm": 71.60344245483444, "learning_rate": 1.9913074203082053e-08, "logits/chosen": -2.0714104175567627, "logits/rejected": -2.0895228385925293, "logps/chosen": -3.0680434703826904, "logps/rejected": -4.809669494628906, "loss": 1.002, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -6.136086940765381, "rewards/margins": 3.483250856399536, "rewards/rejected": -9.619338989257812, "step": 405 }, { "epoch": 0.8613445378151261, "grad_norm": 65.02582256297173, "learning_rate": 1.726039959793059e-08, "logits/chosen": -2.0531625747680664, "logits/rejected": -2.0893194675445557, "logps/chosen": -3.2407803535461426, "logps/rejected": -4.729245185852051, "loss": 0.9391, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -6.481560707092285, "rewards/margins": 2.9769301414489746, "rewards/rejected": -9.458490371704102, "step": 410 }, { "epoch": 0.8718487394957983, "grad_norm": 66.60722999226081, "learning_rate": 1.4786498834767618e-08, "logits/chosen": -1.971679449081421, "logits/rejected": -2.0226242542266846, "logps/chosen": -2.956986427307129, "logps/rejected": -4.357911109924316, "loss": 0.9793, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -5.913972854614258, "rewards/margins": 2.8018486499786377, "rewards/rejected": -8.715822219848633, "step": 415 }, { "epoch": 0.8823529411764706, "grad_norm": 67.46172075980118, "learning_rate": 1.2494703765902337e-08, "logits/chosen": -2.0839121341705322, "logits/rejected": -2.104898452758789, "logps/chosen": -3.1962718963623047, "logps/rejected": -4.687077522277832, "loss": 0.9073, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -6.392543792724609, "rewards/margins": 2.9816107749938965, "rewards/rejected": -9.374155044555664, "step": 420 }, { "epoch": 0.8928571428571429, "grad_norm": 80.87130272740922, "learning_rate": 1.0388100983491676e-08, "logits/chosen": -2.0597221851348877, "logits/rejected": -2.0896944999694824, "logps/chosen": -3.026052236557007, "logps/rejected": -4.573755741119385, "loss": 0.9555, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -6.052104473114014, "rewards/margins": 3.0954062938690186, "rewards/rejected": -9.14751148223877, "step": 425 }, { "epoch": 0.9033613445378151, "grad_norm": 70.56768229498226, "learning_rate": 8.469527662514425e-09, "logits/chosen": -2.0741794109344482, "logits/rejected": -2.097032070159912, "logps/chosen": -3.0541605949401855, "logps/rejected": -4.719814777374268, "loss": 1.0143, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -6.108321189880371, "rewards/margins": 3.331307888031006, "rewards/rejected": -9.439629554748535, "step": 430 }, { "epoch": 0.9138655462184874, "grad_norm": 72.71200868786163, "learning_rate": 6.7415677396608474e-09, "logits/chosen": -2.0740599632263184, "logits/rejected": -2.0966227054595947, "logps/chosen": -3.1755881309509277, "logps/rejected": -5.003739356994629, "loss": 0.9747, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -6.3511762619018555, "rewards/margins": 3.6563029289245605, "rewards/rejected": -10.007478713989258, "step": 435 }, { "epoch": 0.9243697478991597, "grad_norm": 68.28482752709235, "learning_rate": 5.206548433283803e-09, "logits/chosen": -2.015186071395874, "logits/rejected": -2.100969076156616, "logps/chosen": -3.135103464126587, "logps/rejected": -4.680062294006348, "loss": 0.9059, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -6.270206928253174, "rewards/margins": 3.0899174213409424, "rewards/rejected": -9.360124588012695, "step": 440 }, { "epoch": 0.9348739495798319, "grad_norm": 53.32723170520827, "learning_rate": 3.866537109098561e-09, "logits/chosen": -2.0853240489959717, "logits/rejected": -2.0845720767974854, "logps/chosen": -2.9771265983581543, "logps/rejected": -4.7920613288879395, "loss": 0.9242, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -5.954253196716309, "rewards/margins": 3.6298699378967285, "rewards/rejected": -9.584122657775879, "step": 445 }, { "epoch": 0.9453781512605042, "grad_norm": 77.58999305035255, "learning_rate": 2.7233384958522676e-09, "logits/chosen": -2.0929324626922607, "logits/rejected": -2.088423490524292, "logps/chosen": -3.0112125873565674, "logps/rejected": -4.747193336486816, "loss": 0.859, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -6.022425174713135, "rewards/margins": 3.471961498260498, "rewards/rejected": -9.494386672973633, "step": 450 }, { "epoch": 0.9558823529411765, "grad_norm": 69.00371191627924, "learning_rate": 1.7784922547133318e-09, "logits/chosen": -2.03417706489563, "logits/rejected": -2.0785162448883057, "logps/chosen": -3.0350539684295654, "logps/rejected": -4.6372761726379395, "loss": 1.0211, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -6.070107936859131, "rewards/margins": 3.204444408416748, "rewards/rejected": -9.274552345275879, "step": 455 }, { "epoch": 0.9663865546218487, "grad_norm": 80.70006340013546, "learning_rate": 1.033270905653949e-09, "logits/chosen": -2.077859878540039, "logits/rejected": -2.1275644302368164, "logps/chosen": -3.1961588859558105, "logps/rejected": -5.026784420013428, "loss": 0.9054, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -6.392317771911621, "rewards/margins": 3.6612517833709717, "rewards/rejected": -10.053568840026855, "step": 460 }, { "epoch": 0.976890756302521, "grad_norm": 65.94555657144473, "learning_rate": 4.8867811361889e-10, "logits/chosen": -2.0415802001953125, "logits/rejected": -2.073897123336792, "logps/chosen": -3.136763572692871, "logps/rejected": -4.838761329650879, "loss": 0.9205, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -6.273527145385742, "rewards/margins": 3.40399432182312, "rewards/rejected": -9.677522659301758, "step": 465 }, { "epoch": 0.9873949579831933, "grad_norm": 72.55584643358395, "learning_rate": 1.454473367883291e-10, "logits/chosen": -2.0744833946228027, "logits/rejected": -2.1010680198669434, "logps/chosen": -3.007612943649292, "logps/rejected": -4.534255027770996, "loss": 0.8893, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.015225887298584, "rewards/margins": 3.0532851219177246, "rewards/rejected": -9.068510055541992, "step": 470 }, { "epoch": 0.9978991596638656, "grad_norm": 71.68265122537953, "learning_rate": 4.040838755653419e-12, "logits/chosen": -2.0488152503967285, "logits/rejected": -2.0957658290863037, "logps/chosen": -2.9260973930358887, "logps/rejected": -4.68855619430542, "loss": 0.9609, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -5.852194786071777, "rewards/margins": 3.5249176025390625, "rewards/rejected": -9.37711238861084, "step": 475 }, { "epoch": 1.0, "step": 476, "total_flos": 0.0, "train_loss": 1.1419020675811447, "train_runtime": 10201.3152, "train_samples_per_second": 5.971, "train_steps_per_second": 0.047 } ], "logging_steps": 5, "max_steps": 476, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }