|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 24.144679616293203, |
|
"learning_rate": 6.25e-09, |
|
"logits/chosen": -0.4835050106048584, |
|
"logits/rejected": -0.45789963006973267, |
|
"logps/chosen": -214.22390747070312, |
|
"logps/rejected": -238.45899963378906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 27.157833222439553, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -0.5191683769226074, |
|
"logits/rejected": -0.6054410338401794, |
|
"logps/chosen": -233.09689331054688, |
|
"logps/rejected": -247.0145721435547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": -0.00034871429670602083, |
|
"rewards/margins": -0.0003790514019783586, |
|
"rewards/rejected": 3.03371598420199e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 22.695219914328067, |
|
"learning_rate": 9.979871469976195e-08, |
|
"logits/chosen": -0.5110132098197937, |
|
"logits/rejected": -0.5159324407577515, |
|
"logps/chosen": -226.8810272216797, |
|
"logps/rejected": -234.03579711914062, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.012425726279616356, |
|
"rewards/margins": 0.0005857773358002305, |
|
"rewards/rejected": -0.013011504895985126, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 27.267303752253287, |
|
"learning_rate": 9.755282581475768e-08, |
|
"logits/chosen": -0.46132326126098633, |
|
"logits/rejected": -0.5038495063781738, |
|
"logps/chosen": -228.7631378173828, |
|
"logps/rejected": -243.3440399169922, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.05522267147898674, |
|
"rewards/margins": 0.011394877918064594, |
|
"rewards/rejected": -0.06661754846572876, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 25.80441755927495, |
|
"learning_rate": 9.29224396800933e-08, |
|
"logits/chosen": -0.5605762600898743, |
|
"logits/rejected": -0.4145810008049011, |
|
"logps/chosen": -235.9694366455078, |
|
"logps/rejected": -245.53140258789062, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.12484677881002426, |
|
"rewards/margins": 0.01826881244778633, |
|
"rewards/rejected": -0.1431155800819397, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 29.148756992059326, |
|
"learning_rate": 8.613974319136957e-08, |
|
"logits/chosen": -0.33988311886787415, |
|
"logits/rejected": -0.4071916937828064, |
|
"logps/chosen": -225.6988525390625, |
|
"logps/rejected": -248.31716918945312, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11917146295309067, |
|
"rewards/margins": 0.03840441256761551, |
|
"rewards/rejected": -0.15757587552070618, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 31.691697738483036, |
|
"learning_rate": 7.754484907260513e-08, |
|
"logits/chosen": -0.32609015703201294, |
|
"logits/rejected": -0.26230502128601074, |
|
"logps/chosen": -227.6748504638672, |
|
"logps/rejected": -247.0984649658203, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07656601071357727, |
|
"rewards/margins": 0.052247386425733566, |
|
"rewards/rejected": -0.12881340086460114, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 29.21753574540554, |
|
"learning_rate": 6.756874120406714e-08, |
|
"logits/chosen": -0.46613264083862305, |
|
"logits/rejected": -0.40840277075767517, |
|
"logps/chosen": -227.2427520751953, |
|
"logps/rejected": -237.88174438476562, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.011777873151004314, |
|
"rewards/margins": 0.03103628195822239, |
|
"rewards/rejected": -0.04281415045261383, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 27.295936995560456, |
|
"learning_rate": 5.6711663290882774e-08, |
|
"logits/chosen": -0.3539288640022278, |
|
"logits/rejected": -0.4821072518825531, |
|
"logps/chosen": -227.5625, |
|
"logps/rejected": -246.0811767578125, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -0.02285691350698471, |
|
"rewards/margins": 0.04252464324235916, |
|
"rewards/rejected": -0.06538156419992447, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 37.94611808985469, |
|
"learning_rate": 4.551803455482833e-08, |
|
"logits/chosen": -0.31488946080207825, |
|
"logits/rejected": -0.2586648762226105, |
|
"logps/chosen": -235.135009765625, |
|
"logps/rejected": -250.8152618408203, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.05702267214655876, |
|
"rewards/margins": 0.03373400494456291, |
|
"rewards/rejected": -0.09075668454170227, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 31.89166999144619, |
|
"learning_rate": 3.4549150281252633e-08, |
|
"logits/chosen": -0.2342231571674347, |
|
"logits/rejected": -0.11592201143503189, |
|
"logps/chosen": -241.8380126953125, |
|
"logps/rejected": -248.9578094482422, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10655052959918976, |
|
"rewards/margins": 0.02655043639242649, |
|
"rewards/rejected": -0.1331009566783905, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 33.31375799730263, |
|
"learning_rate": 2.43550361297047e-08, |
|
"logits/chosen": -0.18439307808876038, |
|
"logits/rejected": -0.2467001974582672, |
|
"logps/chosen": -231.55880737304688, |
|
"logps/rejected": -247.73422241210938, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.1505533754825592, |
|
"rewards/margins": 0.03467424958944321, |
|
"rewards/rejected": -0.185227632522583, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 47.472733400866, |
|
"learning_rate": 1.5446867550656767e-08, |
|
"logits/chosen": -0.33943504095077515, |
|
"logits/rejected": -0.27855488657951355, |
|
"logps/chosen": -237.60281372070312, |
|
"logps/rejected": -256.1316833496094, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1447935551404953, |
|
"rewards/margins": 0.04320163279771805, |
|
"rewards/rejected": -0.18799519538879395, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 33.56575623501931, |
|
"learning_rate": 8.271337313934867e-09, |
|
"logits/chosen": -0.20994436740875244, |
|
"logits/rejected": -0.30067163705825806, |
|
"logps/chosen": -242.28720092773438, |
|
"logps/rejected": -260.9054260253906, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13135352730751038, |
|
"rewards/margins": 0.033290181308984756, |
|
"rewards/rejected": -0.16464371979236603, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 36.04452677999868, |
|
"learning_rate": 3.1882564680131396e-09, |
|
"logits/chosen": -0.2294171154499054, |
|
"logits/rejected": -0.14980368316173553, |
|
"logps/chosen": -226.87939453125, |
|
"logps/rejected": -243.36831665039062, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.11503396183252335, |
|
"rewards/margins": 0.014261065050959587, |
|
"rewards/rejected": -0.12929502129554749, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 34.70896434984119, |
|
"learning_rate": 4.52511911603265e-10, |
|
"logits/chosen": -0.28860437870025635, |
|
"logits/rejected": -0.18086276948451996, |
|
"logps/chosen": -245.88034057617188, |
|
"logps/rejected": -255.14749145507812, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12854784727096558, |
|
"rewards/margins": 0.03599141910672188, |
|
"rewards/rejected": -0.16453926265239716, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6857114946230863, |
|
"train_runtime": 19132.8262, |
|
"train_samples_per_second": 1.045, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|