|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992429977289932, |
|
"eval_steps": 500, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 43.6239071005382, |
|
"learning_rate": 2.941176470588235e-09, |
|
"logits/chosen": -1.3522639274597168, |
|
"logits/rejected": -1.3693311214447021, |
|
"logps/chosen": -262.57476806640625, |
|
"logps/rejected": -283.94244384765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 40.670158110610615, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -1.1757179498672485, |
|
"logits/rejected": -1.2358938455581665, |
|
"logps/chosen": -280.3355407714844, |
|
"logps/rejected": -300.9811706542969, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": 0.0006423706654459238, |
|
"rewards/margins": 0.00042565667536109686, |
|
"rewards/rejected": 0.00021671393187716603, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 40.34301193149703, |
|
"learning_rate": 4.994932636402031e-08, |
|
"logits/chosen": -1.1265027523040771, |
|
"logits/rejected": -1.3426095247268677, |
|
"logps/chosen": -277.8979187011719, |
|
"logps/rejected": -299.1261291503906, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.0021728514693677425, |
|
"rewards/margins": 0.000990995205938816, |
|
"rewards/rejected": 0.0011818561470136046, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 38.47792188182457, |
|
"learning_rate": 4.905416503522123e-08, |
|
"logits/chosen": -1.0218889713287354, |
|
"logits/rejected": -1.151049256324768, |
|
"logps/chosen": -273.4291687011719, |
|
"logps/rejected": -301.57781982421875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.016858745366334915, |
|
"rewards/margins": 0.0027274340391159058, |
|
"rewards/rejected": 0.014131310395896435, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 56.52700447561209, |
|
"learning_rate": 4.707922373336523e-08, |
|
"logits/chosen": -1.084263801574707, |
|
"logits/rejected": -1.2900816202163696, |
|
"logps/chosen": -292.4299011230469, |
|
"logps/rejected": -308.45062255859375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.030295047909021378, |
|
"rewards/margins": 0.000461754942080006, |
|
"rewards/rejected": 0.029833292588591576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 38.00149695178064, |
|
"learning_rate": 4.4113156629677314e-08, |
|
"logits/chosen": -1.167959451675415, |
|
"logits/rejected": -1.299862265586853, |
|
"logps/chosen": -296.1455383300781, |
|
"logps/rejected": -305.6954040527344, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": 0.04823774844408035, |
|
"rewards/margins": 0.00479243416339159, |
|
"rewards/rejected": 0.043445318937301636, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 37.72422002493558, |
|
"learning_rate": 4.028910905897228e-08, |
|
"logits/chosen": -1.181056261062622, |
|
"logits/rejected": -1.0861554145812988, |
|
"logps/chosen": -292.48040771484375, |
|
"logps/rejected": -304.0435485839844, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": 0.06034231185913086, |
|
"rewards/margins": 0.006084255874156952, |
|
"rewards/rejected": 0.054258059710264206, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 42.79122392676645, |
|
"learning_rate": 3.577874068920445e-08, |
|
"logits/chosen": -1.210323691368103, |
|
"logits/rejected": -1.065538763999939, |
|
"logps/chosen": -286.93572998046875, |
|
"logps/rejected": -306.0190124511719, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": 0.06624683737754822, |
|
"rewards/margins": 0.004461642820388079, |
|
"rewards/rejected": 0.06178520247340202, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 39.75881017842744, |
|
"learning_rate": 3.078451980100854e-08, |
|
"logits/chosen": -1.1516613960266113, |
|
"logits/rejected": -1.3043029308319092, |
|
"logps/chosen": -270.6875, |
|
"logps/rejected": -290.72998046875, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.04866773635149002, |
|
"rewards/margins": 0.0037067097146064043, |
|
"rewards/rejected": 0.04496103152632713, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 43.01085877492651, |
|
"learning_rate": 2.5530634583340587e-08, |
|
"logits/chosen": -1.2572039365768433, |
|
"logits/rejected": -1.0870755910873413, |
|
"logps/chosen": -273.9654235839844, |
|
"logps/rejected": -290.50836181640625, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.03104880452156067, |
|
"rewards/margins": 0.00489948783069849, |
|
"rewards/rejected": 0.026149321347475052, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 47.166985943498034, |
|
"learning_rate": 2.0252929432814285e-08, |
|
"logits/chosen": -1.1381770372390747, |
|
"logits/rejected": -1.3748772144317627, |
|
"logps/chosen": -282.6134338378906, |
|
"logps/rejected": -304.66790771484375, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.03918559476733208, |
|
"rewards/margins": 0.01519505213946104, |
|
"rewards/rejected": 0.023990539833903313, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 44.673514372021515, |
|
"learning_rate": 1.5188318011445905e-08, |
|
"logits/chosen": -1.065263271331787, |
|
"logits/rejected": -1.2649091482162476, |
|
"logps/chosen": -277.50775146484375, |
|
"logps/rejected": -300.609619140625, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": 0.028311368077993393, |
|
"rewards/margins": 0.008604733273386955, |
|
"rewards/rejected": 0.01970663294196129, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 46.58773437073759, |
|
"learning_rate": 1.0564148305586295e-08, |
|
"logits/chosen": -1.1271841526031494, |
|
"logits/rejected": -1.1778924465179443, |
|
"logps/chosen": -279.55084228515625, |
|
"logps/rejected": -298.75030517578125, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.028242077678442, |
|
"rewards/margins": 0.003035143483430147, |
|
"rewards/rejected": 0.02520693466067314, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 41.9191117851892, |
|
"learning_rate": 6.587997083462196e-09, |
|
"logits/chosen": -1.0855623483657837, |
|
"logits/rejected": -1.1804945468902588, |
|
"logps/chosen": -283.80682373046875, |
|
"logps/rejected": -294.71844482421875, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": 0.033344708383083344, |
|
"rewards/margins": 0.013705698773264885, |
|
"rewards/rejected": 0.01963900588452816, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 44.355218289856595, |
|
"learning_rate": 3.438351873250492e-09, |
|
"logits/chosen": -1.092165470123291, |
|
"logits/rejected": -1.280500054359436, |
|
"logps/chosen": -278.0908508300781, |
|
"logps/rejected": -305.513427734375, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.033924926072359085, |
|
"rewards/margins": 0.007848087698221207, |
|
"rewards/rejected": 0.026076842099428177, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 44.46354128863562, |
|
"learning_rate": 1.256598743236703e-09, |
|
"logits/chosen": -1.0778554677963257, |
|
"logits/rejected": -1.2542009353637695, |
|
"logps/chosen": -265.0628967285156, |
|
"logps/rejected": -297.0721130371094, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.022747624665498734, |
|
"rewards/margins": 0.007828270085155964, |
|
"rewards/rejected": 0.014919353649020195, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 46.98138173894952, |
|
"learning_rate": 1.4067554877743859e-10, |
|
"logits/chosen": -1.170921802520752, |
|
"logits/rejected": -1.1549434661865234, |
|
"logps/chosen": -280.11676025390625, |
|
"logps/rejected": -299.28729248046875, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": 0.031538333743810654, |
|
"rewards/margins": 0.006610988173633814, |
|
"rewards/rejected": 0.024927344173192978, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6908076347726764, |
|
"train_runtime": 32496.9517, |
|
"train_samples_per_second": 0.65, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|