|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996020692399522, |
|
"eval_steps": 1000, |
|
"global_step": 314, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0031834460803820135, |
|
"grad_norm": 0.8913188458215807, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -2.332231044769287, |
|
"logits/rejected": -2.3125171661376953, |
|
"logps/chosen": -178.02963256835938, |
|
"logps/rejected": -150.3365478515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03183446080382014, |
|
"grad_norm": 0.7954378994525454, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.299248456954956, |
|
"logits/rejected": -2.2925186157226562, |
|
"logps/chosen": -160.2787322998047, |
|
"logps/rejected": -158.51319885253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4322916567325592, |
|
"rewards/chosen": -3.1086481612874195e-05, |
|
"rewards/margins": -0.00011221379099879414, |
|
"rewards/rejected": 8.112730574794114e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06366892160764027, |
|
"grad_norm": 0.856612493875923, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.2908735275268555, |
|
"logits/rejected": -2.294975519180298, |
|
"logps/chosen": -156.20362854003906, |
|
"logps/rejected": -160.60763549804688, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48828125, |
|
"rewards/chosen": -9.316079376731068e-05, |
|
"rewards/margins": 9.743528607941698e-06, |
|
"rewards/rejected": -0.00010290431964676827, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0955033824114604, |
|
"grad_norm": 0.8601850412460429, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.2940285205841064, |
|
"logits/rejected": -2.2888407707214355, |
|
"logps/chosen": -159.58680725097656, |
|
"logps/rejected": -162.42189025878906, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.52734375, |
|
"rewards/chosen": -0.0016975710168480873, |
|
"rewards/margins": 0.00033986102789640427, |
|
"rewards/rejected": -0.0020374320447444916, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12733784321528055, |
|
"grad_norm": 0.8744440488120392, |
|
"learning_rate": 4.990077890125363e-07, |
|
"logits/chosen": -2.301192045211792, |
|
"logits/rejected": -2.2977750301361084, |
|
"logps/chosen": -151.9224395751953, |
|
"logps/rejected": -152.43655395507812, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.555468738079071, |
|
"rewards/chosen": -0.005973272956907749, |
|
"rewards/margins": 0.0012832467909902334, |
|
"rewards/rejected": -0.007256519980728626, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15917230401910068, |
|
"grad_norm": 0.95273331836508, |
|
"learning_rate": 4.949904262591467e-07, |
|
"logits/chosen": -2.3083178997039795, |
|
"logits/rejected": -2.30576753616333, |
|
"logps/chosen": -161.62750244140625, |
|
"logps/rejected": -159.9287109375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.016185810789465904, |
|
"rewards/margins": 0.003111806232482195, |
|
"rewards/rejected": -0.019297616556286812, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1910067648229208, |
|
"grad_norm": 0.9051345997712666, |
|
"learning_rate": 4.879356673988089e-07, |
|
"logits/chosen": -2.3459715843200684, |
|
"logits/rejected": -2.340350389480591, |
|
"logps/chosen": -159.88613891601562, |
|
"logps/rejected": -162.18612670898438, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.555468738079071, |
|
"rewards/chosen": -0.029835861176252365, |
|
"rewards/margins": 0.005405827891081572, |
|
"rewards/rejected": -0.03524169698357582, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22284122562674094, |
|
"grad_norm": 0.9611988462128772, |
|
"learning_rate": 4.779309774701573e-07, |
|
"logits/chosen": -2.3523900508880615, |
|
"logits/rejected": -2.348705768585205, |
|
"logps/chosen": -166.91146850585938, |
|
"logps/rejected": -164.3793182373047, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": -0.05225539207458496, |
|
"rewards/margins": 0.009335539303719997, |
|
"rewards/rejected": -0.06159093230962753, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2546756864305611, |
|
"grad_norm": 0.9747994880646563, |
|
"learning_rate": 4.6510039481503485e-07, |
|
"logits/chosen": -2.367791175842285, |
|
"logits/rejected": -2.3638641834259033, |
|
"logps/chosen": -166.5675811767578, |
|
"logps/rejected": -163.23748779296875, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.56640625, |
|
"rewards/chosen": -0.0814916118979454, |
|
"rewards/margins": 0.01518010813742876, |
|
"rewards/rejected": -0.09667172282934189, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28651014723438123, |
|
"grad_norm": 1.0299054182265805, |
|
"learning_rate": 4.4960299324869857e-07, |
|
"logits/chosen": -2.3870062828063965, |
|
"logits/rejected": -2.382286787033081, |
|
"logps/chosen": -168.0864715576172, |
|
"logps/rejected": -170.67047119140625, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": -0.10201652348041534, |
|
"rewards/margins": 0.017032291740179062, |
|
"rewards/rejected": -0.11904881149530411, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31834460803820136, |
|
"grad_norm": 1.0287716469740125, |
|
"learning_rate": 4.3163090985954074e-07, |
|
"logits/chosen": -2.3779287338256836, |
|
"logits/rejected": -2.369999408721924, |
|
"logps/chosen": -170.18251037597656, |
|
"logps/rejected": -172.33718872070312, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.5835937261581421, |
|
"rewards/chosen": -0.12887540459632874, |
|
"rewards/margins": 0.023395564407110214, |
|
"rewards/rejected": -0.15227097272872925, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3501790688420215, |
|
"grad_norm": 1.0989482799498032, |
|
"learning_rate": 4.114069628897006e-07, |
|
"logits/chosen": -2.358673572540283, |
|
"logits/rejected": -2.3572804927825928, |
|
"logps/chosen": -173.09437561035156, |
|
"logps/rejected": -176.59164428710938, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.14945295453071594, |
|
"rewards/margins": 0.03385554999113083, |
|
"rewards/rejected": -0.18330851197242737, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3820135296458416, |
|
"grad_norm": 1.215724859704609, |
|
"learning_rate": 3.891818892301304e-07, |
|
"logits/chosen": -2.3581154346466064, |
|
"logits/rejected": -2.3629350662231445, |
|
"logps/chosen": -176.79432678222656, |
|
"logps/rejected": -180.97039794921875, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.563281238079071, |
|
"rewards/chosen": -0.2229323834180832, |
|
"rewards/margins": 0.02610206976532936, |
|
"rewards/rejected": -0.24903444945812225, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.41384799044966175, |
|
"grad_norm": 1.276372767011757, |
|
"learning_rate": 3.6523123577970693e-07, |
|
"logits/chosen": -2.362705707550049, |
|
"logits/rejected": -2.3545660972595215, |
|
"logps/chosen": -187.2711944580078, |
|
"logps/rejected": -192.3023681640625, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.577343761920929, |
|
"rewards/chosen": -0.2734658420085907, |
|
"rewards/margins": 0.04484738036990166, |
|
"rewards/rejected": -0.31831321120262146, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4456824512534819, |
|
"grad_norm": 1.3874387536648047, |
|
"learning_rate": 3.3985194320937815e-07, |
|
"logits/chosen": -2.3698925971984863, |
|
"logits/rejected": -2.375953435897827, |
|
"logps/chosen": -189.7623748779297, |
|
"logps/rejected": -198.46641540527344, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.577343761920929, |
|
"rewards/chosen": -0.28896641731262207, |
|
"rewards/margins": 0.0472131185233593, |
|
"rewards/rejected": -0.3361795246601105, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.477516912057302, |
|
"grad_norm": 1.4443860790246479, |
|
"learning_rate": 3.133586644859039e-07, |
|
"logits/chosen": -2.3575439453125, |
|
"logits/rejected": -2.3553812503814697, |
|
"logps/chosen": -191.13748168945312, |
|
"logps/rejected": -193.09817504882812, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": -0.3255929946899414, |
|
"rewards/margins": 0.048027556389570236, |
|
"rewards/rejected": -0.37362051010131836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5093513728611222, |
|
"grad_norm": 1.6146506390035185, |
|
"learning_rate": 2.8607986379820664e-07, |
|
"logits/chosen": -2.3630471229553223, |
|
"logits/rejected": -2.3578333854675293, |
|
"logps/chosen": -203.55029296875, |
|
"logps/rejected": -198.3260955810547, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.36638548970222473, |
|
"rewards/margins": 0.03375838324427605, |
|
"rewards/rejected": -0.4001438617706299, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5411858336649423, |
|
"grad_norm": 1.442156527952266, |
|
"learning_rate": 2.583537442519186e-07, |
|
"logits/chosen": -2.37186861038208, |
|
"logits/rejected": -2.37400484085083, |
|
"logps/chosen": -196.1448211669922, |
|
"logps/rejected": -203.14988708496094, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.571093738079071, |
|
"rewards/chosen": -0.39402055740356445, |
|
"rewards/margins": 0.05580953508615494, |
|
"rewards/rejected": -0.4498301148414612, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5730202944687625, |
|
"grad_norm": 1.6421108977251095, |
|
"learning_rate": 2.3052405482064919e-07, |
|
"logits/chosen": -2.375570297241211, |
|
"logits/rejected": -2.3756964206695557, |
|
"logps/chosen": -210.69284057617188, |
|
"logps/rejected": -210.54672241210938, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4329681396484375, |
|
"rewards/margins": 0.0399855375289917, |
|
"rewards/rejected": -0.4729536473751068, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6048547552725826, |
|
"grad_norm": 1.8616819792733938, |
|
"learning_rate": 2.029358285394716e-07, |
|
"logits/chosen": -2.375413179397583, |
|
"logits/rejected": -2.3708198070526123, |
|
"logps/chosen": -206.62564086914062, |
|
"logps/rejected": -206.83370971679688, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.41667842864990234, |
|
"rewards/margins": 0.06583230942487717, |
|
"rewards/rejected": -0.4825107455253601, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6366892160764027, |
|
"grad_norm": 2.107824221269726, |
|
"learning_rate": 1.7593110477859152e-07, |
|
"logits/chosen": -2.3875057697296143, |
|
"logits/rejected": -2.387460470199585, |
|
"logps/chosen": -201.22967529296875, |
|
"logps/rejected": -205.489501953125, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.5757812261581421, |
|
"rewards/chosen": -0.42719680070877075, |
|
"rewards/margins": 0.06031234189867973, |
|
"rewards/rejected": -0.487509161233902, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6685236768802229, |
|
"grad_norm": 2.4249761136425048, |
|
"learning_rate": 1.4984468863253007e-07, |
|
"logits/chosen": -2.394925594329834, |
|
"logits/rejected": -2.386389970779419, |
|
"logps/chosen": -215.95462036132812, |
|
"logps/rejected": -215.75820922851562, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.569531261920929, |
|
"rewards/chosen": -0.4547385275363922, |
|
"rewards/margins": 0.05981076508760452, |
|
"rewards/rejected": -0.5145493149757385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.700358137684043, |
|
"grad_norm": 1.8116066543619882, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -2.3963189125061035, |
|
"logits/rejected": -2.3958048820495605, |
|
"logps/chosen": -212.20693969726562, |
|
"logps/rejected": -216.2605743408203, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.5757812261581421, |
|
"rewards/chosen": -0.4895060658454895, |
|
"rewards/margins": 0.06058992072939873, |
|
"rewards/rejected": -0.5500959157943726, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7321925984878631, |
|
"grad_norm": 1.8113200913829595, |
|
"learning_rate": 1.0170506381766119e-07, |
|
"logits/chosen": -2.387866497039795, |
|
"logits/rejected": -2.388319730758667, |
|
"logps/chosen": -207.97604370117188, |
|
"logps/rejected": -218.859130859375, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.586718738079071, |
|
"rewards/chosen": -0.49019813537597656, |
|
"rewards/margins": 0.07117091119289398, |
|
"rewards/rejected": -0.5613690614700317, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7640270592916832, |
|
"grad_norm": 2.1202663184992563, |
|
"learning_rate": 8.024869116091879e-08, |
|
"logits/chosen": -2.3979618549346924, |
|
"logits/rejected": -2.406341075897217, |
|
"logps/chosen": -219.69271850585938, |
|
"logps/rejected": -221.71139526367188, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.4954513907432556, |
|
"rewards/margins": 0.05036097764968872, |
|
"rewards/rejected": -0.5458123683929443, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7958615200955034, |
|
"grad_norm": 1.8622859998372623, |
|
"learning_rate": 6.089689855854869e-08, |
|
"logits/chosen": -2.3931431770324707, |
|
"logits/rejected": -2.4037561416625977, |
|
"logps/chosen": -207.83798217773438, |
|
"logps/rejected": -216.9298858642578, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.5835937261581421, |
|
"rewards/chosen": -0.4849010407924652, |
|
"rewards/margins": 0.0694160908460617, |
|
"rewards/rejected": -0.5543171167373657, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8276959808993235, |
|
"grad_norm": 1.8122633866114284, |
|
"learning_rate": 4.388960991455998e-08, |
|
"logits/chosen": -2.3899784088134766, |
|
"logits/rejected": -2.395357847213745, |
|
"logps/chosen": -205.0048828125, |
|
"logps/rejected": -213.7442626953125, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.5882812738418579, |
|
"rewards/chosen": -0.4997900128364563, |
|
"rewards/margins": 0.0625927522778511, |
|
"rewards/rejected": -0.5623827576637268, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8595304417031436, |
|
"grad_norm": 2.0654372503377014, |
|
"learning_rate": 2.943768192692958e-08, |
|
"logits/chosen": -2.400757074356079, |
|
"logits/rejected": -2.4067251682281494, |
|
"logps/chosen": -205.0299072265625, |
|
"logps/rejected": -216.3090362548828, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": -0.4922141134738922, |
|
"rewards/margins": 0.06625890731811523, |
|
"rewards/rejected": -0.5584729313850403, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8913649025069638, |
|
"grad_norm": 2.2123553368333644, |
|
"learning_rate": 1.7720289882128092e-08, |
|
"logits/chosen": -2.3984594345092773, |
|
"logits/rejected": -2.4013664722442627, |
|
"logps/chosen": -204.21585083007812, |
|
"logps/rejected": -210.4906768798828, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": -0.497935950756073, |
|
"rewards/margins": 0.04604557901620865, |
|
"rewards/rejected": -0.5439816117286682, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9231993633107839, |
|
"grad_norm": 2.8178850748361763, |
|
"learning_rate": 8.882706236405885e-09, |
|
"logits/chosen": -2.3987581729888916, |
|
"logits/rejected": -2.398507833480835, |
|
"logps/chosen": -209.7099609375, |
|
"logps/rejected": -216.7168426513672, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.49557456374168396, |
|
"rewards/margins": 0.07922448217868805, |
|
"rewards/rejected": -0.5747990608215332, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.955033824114604, |
|
"grad_norm": 2.094778053644434, |
|
"learning_rate": 3.0344995250326245e-09, |
|
"logits/chosen": -2.4099345207214355, |
|
"logits/rejected": -2.406144380569458, |
|
"logps/chosen": -210.4052734375, |
|
"logps/rejected": -220.04794311523438, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": -0.5059640407562256, |
|
"rewards/margins": 0.0776122659444809, |
|
"rewards/rejected": -0.5835763216018677, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9868682849184242, |
|
"grad_norm": 2.5156498514265735, |
|
"learning_rate": 2.481759294498398e-10, |
|
"logits/chosen": -2.3863139152526855, |
|
"logits/rejected": -2.3877015113830566, |
|
"logps/chosen": -207.8264923095703, |
|
"logps/rejected": -215.2146453857422, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.5679687261581421, |
|
"rewards/chosen": -0.5232841372489929, |
|
"rewards/margins": 0.059422146528959274, |
|
"rewards/rejected": -0.5827063322067261, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9996020692399522, |
|
"step": 314, |
|
"total_flos": 0.0, |
|
"train_loss": 0.678918091354856, |
|
"train_runtime": 7383.0158, |
|
"train_samples_per_second": 21.78, |
|
"train_steps_per_second": 0.043 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 314, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|