|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 285, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.875, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"logits/chosen": -2.735238790512085, |
|
"logits/rejected": -3.2783570289611816, |
|
"logps/chosen": -165.14926147460938, |
|
"logps/rejected": -228.84988403320312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 1.724137931034483e-06, |
|
"logits/chosen": -2.7123961448669434, |
|
"logits/rejected": -3.239219903945923, |
|
"logps/chosen": -164.08265686035156, |
|
"logps/rejected": -227.88357543945312, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6111111044883728, |
|
"rewards/chosen": 0.0008442008402198553, |
|
"rewards/margins": 0.002071016002446413, |
|
"rewards/rejected": -0.0012268151622265577, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 3.448275862068966e-06, |
|
"logits/chosen": -2.7431998252868652, |
|
"logits/rejected": -3.2384581565856934, |
|
"logps/chosen": -163.16790771484375, |
|
"logps/rejected": -229.4684600830078, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007237003184854984, |
|
"rewards/margins": 0.023129161447286606, |
|
"rewards/rejected": -0.015892159193754196, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 4.999811754597862e-06, |
|
"logits/chosen": -2.704376220703125, |
|
"logits/rejected": -3.244763135910034, |
|
"logps/chosen": -158.67416381835938, |
|
"logps/rejected": -237.81668090820312, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03751087933778763, |
|
"rewards/margins": 0.13478627800941467, |
|
"rewards/rejected": -0.09727539122104645, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.97725658856945e-06, |
|
"logits/chosen": -2.718313217163086, |
|
"logits/rejected": -3.2308642864227295, |
|
"logps/chosen": -150.95199584960938, |
|
"logps/rejected": -256.7232666015625, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11078281700611115, |
|
"rewards/margins": 0.415455162525177, |
|
"rewards/rejected": -0.3046723008155823, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 4.917441177612131e-06, |
|
"logits/chosen": -2.699450731277466, |
|
"logits/rejected": -3.2096571922302246, |
|
"logps/chosen": -140.04672241210938, |
|
"logps/rejected": -286.2795104980469, |
|
"loss": 0.3683, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2172900140285492, |
|
"rewards/margins": 0.8159033060073853, |
|
"rewards/rejected": -0.5986132025718689, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.82126520118304e-06, |
|
"logits/chosen": -2.7382638454437256, |
|
"logits/rejected": -3.209529399871826, |
|
"logps/chosen": -127.2552261352539, |
|
"logps/rejected": -322.1763610839844, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36736661195755005, |
|
"rewards/margins": 1.3110934495925903, |
|
"rewards/rejected": -0.9437268972396851, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 4.6901752354885166e-06, |
|
"logits/chosen": -2.696700096130371, |
|
"logits/rejected": -3.2043445110321045, |
|
"logps/chosen": -115.544677734375, |
|
"logps/rejected": -366.6884765625, |
|
"loss": 0.1455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4782423973083496, |
|
"rewards/margins": 1.8693335056304932, |
|
"rewards/rejected": -1.391090750694275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 4.526142995631488e-06, |
|
"logits/chosen": -2.7269909381866455, |
|
"logits/rejected": -3.2105610370635986, |
|
"logps/chosen": -109.22001647949219, |
|
"logps/rejected": -415.4657287597656, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5552297234535217, |
|
"rewards/margins": 2.424295663833618, |
|
"rewards/rejected": -1.8690656423568726, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 4.331635679181032e-06, |
|
"logits/chosen": -2.685180425643921, |
|
"logits/rejected": -3.2150089740753174, |
|
"logps/chosen": -104.05949401855469, |
|
"logps/rejected": -453.306884765625, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5974525213241577, |
|
"rewards/margins": 2.865081548690796, |
|
"rewards/rejected": -2.2676291465759277, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 4.109578857224478e-06, |
|
"logits/chosen": -2.702922821044922, |
|
"logits/rejected": -3.216787338256836, |
|
"logps/chosen": -100.30770111083984, |
|
"logps/rejected": -488.27081298828125, |
|
"loss": 0.039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6299440264701843, |
|
"rewards/margins": 3.2341396808624268, |
|
"rewards/rejected": -2.6041955947875977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/chosen": -2.7171199321746826, |
|
"eval_logits/rejected": -3.2064568996429443, |
|
"eval_logps/chosen": -79.25934600830078, |
|
"eval_logps/rejected": -452.1828918457031, |
|
"eval_loss": 0.052521564066410065, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.5119398236274719, |
|
"eval_rewards/margins": 2.9491662979125977, |
|
"eval_rewards/rejected": -2.4372265338897705, |
|
"eval_runtime": 1.2939, |
|
"eval_samples_per_second": 3.864, |
|
"eval_steps_per_second": 2.319, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 3.863312471055116e-06, |
|
"logits/chosen": -2.7052559852600098, |
|
"logits/rejected": -3.191067934036255, |
|
"logps/chosen": -98.0762710571289, |
|
"logps/rejected": -517.7929077148438, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.654212236404419, |
|
"rewards/margins": 3.561893939971924, |
|
"rewards/rejected": -2.907681465148926, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 3.5965405963463197e-06, |
|
"logits/chosen": -2.7348670959472656, |
|
"logits/rejected": -3.205155849456787, |
|
"logps/chosen": -95.99646759033203, |
|
"logps/rejected": -544.2891845703125, |
|
"loss": 0.0213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6714237332344055, |
|
"rewards/margins": 3.842583417892456, |
|
"rewards/rejected": -3.1711599826812744, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 3.313275730405658e-06, |
|
"logits/chosen": -2.698134183883667, |
|
"logits/rejected": -3.215333938598633, |
|
"logps/chosen": -94.20452880859375, |
|
"logps/rejected": -564.6442260742188, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6862392425537109, |
|
"rewards/margins": 4.055664539337158, |
|
"rewards/rejected": -3.369424819946289, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.0177784404805466e-06, |
|
"logits/chosen": -2.728977680206299, |
|
"logits/rejected": -3.2040677070617676, |
|
"logps/chosen": -95.00137329101562, |
|
"logps/rejected": -582.2269287109375, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6958636045455933, |
|
"rewards/margins": 4.233551502227783, |
|
"rewards/rejected": -3.5376884937286377, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 2.7144932808611002e-06, |
|
"logits/chosen": -2.705024003982544, |
|
"logits/rejected": -3.1964850425720215, |
|
"logps/chosen": -94.14689636230469, |
|
"logps/rejected": -587.1410522460938, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6878983974456787, |
|
"rewards/margins": 4.294985294342041, |
|
"rewards/rejected": -3.6070873737335205, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 2.407981942646603e-06, |
|
"logits/chosen": -2.7173984050750732, |
|
"logits/rejected": -3.229872226715088, |
|
"logps/chosen": -93.74874877929688, |
|
"logps/rejected": -597.3945922851562, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6988341212272644, |
|
"rewards/margins": 4.410033226013184, |
|
"rewards/rejected": -3.7111988067626953, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 2.102854641665347e-06, |
|
"logits/chosen": -2.711818218231201, |
|
"logits/rejected": -3.219580888748169, |
|
"logps/chosen": -92.11972045898438, |
|
"logps/rejected": -603.1061401367188, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7005870342254639, |
|
"rewards/margins": 4.465435981750488, |
|
"rewards/rejected": -3.7648491859436035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 1.8037007765373677e-06, |
|
"logits/chosen": -2.736745834350586, |
|
"logits/rejected": -3.206763505935669, |
|
"logps/chosen": -93.66650390625, |
|
"logps/rejected": -605.5599365234375, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.700027346611023, |
|
"rewards/margins": 4.484536170959473, |
|
"rewards/rejected": -3.7845091819763184, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 1.5150198998473802e-06, |
|
"logits/chosen": -2.6961426734924316, |
|
"logits/rejected": -3.201364517211914, |
|
"logps/chosen": -92.75911712646484, |
|
"logps/rejected": -609.6072998046875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7066330313682556, |
|
"rewards/margins": 4.538401126861572, |
|
"rewards/rejected": -3.8317675590515137, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 1.2411540406857064e-06, |
|
"logits/chosen": -2.697049140930176, |
|
"logits/rejected": -3.2053134441375732, |
|
"logps/chosen": -92.19438171386719, |
|
"logps/rejected": -611.137939453125, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7058606147766113, |
|
"rewards/margins": 4.546854496002197, |
|
"rewards/rejected": -3.840993881225586, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/chosen": -2.7109670639038086, |
|
"eval_logits/rejected": -3.2030317783355713, |
|
"eval_logps/chosen": -74.4535140991211, |
|
"eval_logps/rejected": -542.8434448242188, |
|
"eval_loss": 0.023632029071450233, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.5599982738494873, |
|
"eval_rewards/margins": 3.903829574584961, |
|
"eval_rewards/rejected": -3.343831777572632, |
|
"eval_runtime": 1.2832, |
|
"eval_samples_per_second": 3.896, |
|
"eval_steps_per_second": 2.338, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 9.862223964891864e-07, |
|
"logits/chosen": -2.728084087371826, |
|
"logits/rejected": -3.2059364318847656, |
|
"logps/chosen": -90.77999114990234, |
|
"logps/rejected": -605.8444213867188, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.694692850112915, |
|
"rewards/margins": 4.4922895431518555, |
|
"rewards/rejected": -3.797595977783203, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 7.54059376477568e-07, |
|
"logits/chosen": -2.712827682495117, |
|
"logits/rejected": -3.2164974212646484, |
|
"logps/chosen": -93.00579071044922, |
|
"logps/rejected": -616.4993286132812, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7107797861099243, |
|
"rewards/margins": 4.593611717224121, |
|
"rewards/rejected": -3.8828322887420654, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 5.481569285697638e-07, |
|
"logits/chosen": -2.7499520778656006, |
|
"logits/rejected": -3.2049834728240967, |
|
"logps/chosen": -93.71913146972656, |
|
"logps/rejected": -612.4299926757812, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6972833871841431, |
|
"rewards/margins": 4.548653602600098, |
|
"rewards/rejected": -3.851370334625244, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 3.71612017236837e-07, |
|
"logits/chosen": -2.7227025032043457, |
|
"logits/rejected": -3.198974132537842, |
|
"logps/chosen": -93.90065002441406, |
|
"logps/rejected": -616.3263549804688, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7073711156845093, |
|
"rewards/margins": 4.595206260681152, |
|
"rewards/rejected": -3.8878350257873535, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 2.2708004227369407e-07, |
|
"logits/chosen": -2.7253494262695312, |
|
"logits/rejected": -3.200887680053711, |
|
"logps/chosen": -92.90316009521484, |
|
"logps/rejected": -616.0049438476562, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6983229517936707, |
|
"rewards/margins": 4.588388919830322, |
|
"rewards/rejected": -3.890066146850586, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 1.1673489911451536e-07, |
|
"logits/chosen": -2.6972110271453857, |
|
"logits/rejected": -3.2123961448669434, |
|
"logps/chosen": -92.834716796875, |
|
"logps/rejected": -614.0274658203125, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7053926587104797, |
|
"rewards/margins": 4.57442569732666, |
|
"rewards/rejected": -3.869033098220825, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 4.223628142195929e-08, |
|
"logits/chosen": -2.706958055496216, |
|
"logits/rejected": -3.205293655395508, |
|
"logps/chosen": -93.16792297363281, |
|
"logps/rejected": -615.3536376953125, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7087845802307129, |
|
"rewards/margins": 4.595980167388916, |
|
"rewards/rejected": -3.887195110321045, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 4.704717749627052e-09, |
|
"logits/chosen": -2.6964120864868164, |
|
"logits/rejected": -3.2064449787139893, |
|
"logps/chosen": -93.33256530761719, |
|
"logps/rejected": -614.2545166015625, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6982468962669373, |
|
"rewards/margins": 4.565683841705322, |
|
"rewards/rejected": -3.8674368858337402, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 285, |
|
"total_flos": 0.0, |
|
"train_loss": 0.12933478820742222, |
|
"train_runtime": 748.758, |
|
"train_samples_per_second": 1.521, |
|
"train_steps_per_second": 0.381 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 285, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|