|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9921671018276762, |
|
"eval_steps": 500, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.531942491552821, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -2.851747512817383, |
|
"logits/rejected": -2.833996534347534, |
|
"logps/chosen": -165.70089721679688, |
|
"logps/rejected": -198.857666015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.930803989300868, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -2.770416259765625, |
|
"logits/rejected": -2.7731680870056152, |
|
"logps/chosen": -171.3281707763672, |
|
"logps/rejected": -172.58348083496094, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.00034834028338082135, |
|
"rewards/margins": 4.263037408236414e-05, |
|
"rewards/rejected": 0.0003057100111618638, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 7.205939520530408, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.785672664642334, |
|
"logits/rejected": -2.7945070266723633, |
|
"logps/chosen": -189.79400634765625, |
|
"logps/rejected": -194.38011169433594, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.005779535509645939, |
|
"rewards/margins": 0.0015561816981062293, |
|
"rewards/rejected": 0.004223353695124388, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.119689881451758, |
|
"learning_rate": 4.957432749209755e-07, |
|
"logits/chosen": -2.841862678527832, |
|
"logits/rejected": -2.8522396087646484, |
|
"logps/chosen": -196.4453582763672, |
|
"logps/rejected": -186.3593292236328, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0351785309612751, |
|
"rewards/margins": 0.009719189256429672, |
|
"rewards/rejected": 0.025459343567490578, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.025742204681022, |
|
"learning_rate": 4.83118057351089e-07, |
|
"logits/chosen": -2.8577423095703125, |
|
"logits/rejected": -2.8679168224334717, |
|
"logps/chosen": -163.30587768554688, |
|
"logps/rejected": -176.16122436523438, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.054369617253541946, |
|
"rewards/margins": 0.01927168108522892, |
|
"rewards/rejected": 0.035097938030958176, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 7.047833772227819, |
|
"learning_rate": 4.6255428393240354e-07, |
|
"logits/chosen": -2.8176944255828857, |
|
"logits/rejected": -2.8154852390289307, |
|
"logps/chosen": -127.52900695800781, |
|
"logps/rejected": -149.99598693847656, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.039430197328329086, |
|
"rewards/margins": 0.044888969510793686, |
|
"rewards/rejected": -0.005458767991513014, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 7.4255717276037405, |
|
"learning_rate": 4.3475222930516473e-07, |
|
"logits/chosen": -2.781858444213867, |
|
"logits/rejected": -2.7814831733703613, |
|
"logps/chosen": -161.177734375, |
|
"logps/rejected": -173.82421875, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.01698228344321251, |
|
"rewards/margins": 0.0684308260679245, |
|
"rewards/rejected": -0.08541311323642731, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.891881929971765, |
|
"learning_rate": 4.006586590948141e-07, |
|
"logits/chosen": -2.848252296447754, |
|
"logits/rejected": -2.8431050777435303, |
|
"logps/chosen": -192.15963745117188, |
|
"logps/rejected": -209.07540893554688, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.008883295580744743, |
|
"rewards/margins": 0.0794510543346405, |
|
"rewards/rejected": -0.0883343443274498, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 9.209834953181781, |
|
"learning_rate": 3.614345889441346e-07, |
|
"logits/chosen": -2.7681477069854736, |
|
"logits/rejected": -2.78022837638855, |
|
"logps/chosen": -135.9792022705078, |
|
"logps/rejected": -164.3667449951172, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.031201759353280067, |
|
"rewards/margins": 0.13447019457817078, |
|
"rewards/rejected": -0.16567197442054749, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 13.835886251568184, |
|
"learning_rate": 3.184157475180207e-07, |
|
"logits/chosen": -2.7284975051879883, |
|
"logits/rejected": -2.7436182498931885, |
|
"logps/chosen": -205.8560028076172, |
|
"logps/rejected": -212.56710815429688, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17208269238471985, |
|
"rewards/margins": 0.13685402274131775, |
|
"rewards/rejected": -0.30893674492836, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.309186722273289, |
|
"learning_rate": 2.730670898658255e-07, |
|
"logits/chosen": -2.7203848361968994, |
|
"logits/rejected": -2.7220139503479004, |
|
"logps/chosen": -183.94479370117188, |
|
"logps/rejected": -218.1922149658203, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.09618374705314636, |
|
"rewards/margins": 0.18668127059936523, |
|
"rewards/rejected": -0.2828650176525116, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.04836501966109, |
|
"learning_rate": 2.2693291013417452e-07, |
|
"logits/chosen": -2.6110920906066895, |
|
"logits/rejected": -2.6190452575683594, |
|
"logps/chosen": -151.388916015625, |
|
"logps/rejected": -174.0006561279297, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.032760851085186005, |
|
"rewards/margins": 0.17945989966392517, |
|
"rewards/rejected": -0.2122207134962082, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 14.818475765214615, |
|
"learning_rate": 1.8158425248197928e-07, |
|
"logits/chosen": -2.69221568107605, |
|
"logits/rejected": -2.689034938812256, |
|
"logps/chosen": -181.30128479003906, |
|
"logps/rejected": -231.5193634033203, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.09343220293521881, |
|
"rewards/margins": 0.2633873522281647, |
|
"rewards/rejected": -0.3568195104598999, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 15.265728023102268, |
|
"learning_rate": 1.3856541105586545e-07, |
|
"logits/chosen": -2.7168681621551514, |
|
"logits/rejected": -2.7309060096740723, |
|
"logps/chosen": -185.16700744628906, |
|
"logps/rejected": -220.42764282226562, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23089858889579773, |
|
"rewards/margins": 0.27521029114723206, |
|
"rewards/rejected": -0.5061088800430298, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 13.52741638941588, |
|
"learning_rate": 9.934134090518592e-08, |
|
"logits/chosen": -2.6834919452667236, |
|
"logits/rejected": -2.6923632621765137, |
|
"logps/chosen": -200.9665069580078, |
|
"logps/rejected": -217.9497528076172, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.23829719424247742, |
|
"rewards/margins": 0.2035256326198578, |
|
"rewards/rejected": -0.4418228268623352, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 14.871873879280589, |
|
"learning_rate": 6.524777069483525e-08, |
|
"logits/chosen": -2.6725871562957764, |
|
"logits/rejected": -2.6699538230895996, |
|
"logps/chosen": -185.2981719970703, |
|
"logps/rejected": -229.42092895507812, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13438589870929718, |
|
"rewards/margins": 0.32371044158935547, |
|
"rewards/rejected": -0.45809632539749146, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 12.313204564006284, |
|
"learning_rate": 3.74457160675965e-08, |
|
"logits/chosen": -2.6488523483276367, |
|
"logits/rejected": -2.6512537002563477, |
|
"logps/chosen": -177.8891143798828, |
|
"logps/rejected": -211.4371795654297, |
|
"loss": 0.6019, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.09899892657995224, |
|
"rewards/margins": 0.26206719875335693, |
|
"rewards/rejected": -0.36106616258621216, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 17.242389025181602, |
|
"learning_rate": 1.6881942648911074e-08, |
|
"logits/chosen": -2.6852784156799316, |
|
"logits/rejected": -2.6899216175079346, |
|
"logps/chosen": -171.39414978027344, |
|
"logps/rejected": -207.66738891601562, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.11210503429174423, |
|
"rewards/margins": 0.2755950093269348, |
|
"rewards/rejected": -0.38770005106925964, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 13.932688124952723, |
|
"learning_rate": 4.256725079024553e-09, |
|
"logits/chosen": -2.6324477195739746, |
|
"logits/rejected": -2.6469483375549316, |
|
"logps/chosen": -181.08218383789062, |
|
"logps/rejected": -215.79953002929688, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12142710387706757, |
|
"rewards/margins": 0.2807455062866211, |
|
"rewards/rejected": -0.40217262506484985, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 15.247505163019246, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.682211399078369, |
|
"logits/rejected": -2.697298765182495, |
|
"logps/chosen": -198.21182250976562, |
|
"logps/rejected": -223.2611541748047, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12603162229061127, |
|
"rewards/margins": 0.2787989377975464, |
|
"rewards/rejected": -0.40483060479164124, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 95, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6401761331056294, |
|
"train_runtime": 2555.4095, |
|
"train_samples_per_second": 4.785, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|