|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.996742671009772, |
|
"eval_steps": 100, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 20.72440115890601, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.8101868629455566, |
|
"logits/rejected": -2.7995030879974365, |
|
"logps/chosen": -574.7534790039062, |
|
"logps/rejected": -1069.037109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 18.465682889641315, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.828524589538574, |
|
"logits/rejected": -2.7697534561157227, |
|
"logps/chosen": -477.94866943359375, |
|
"logps/rejected": -1011.936767578125, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.0030513808596879244, |
|
"rewards/margins": 0.0022961744107306004, |
|
"rewards/rejected": 0.000755206448957324, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 20.63587915637583, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": -2.914271593093872, |
|
"logits/rejected": -2.8110787868499756, |
|
"logps/chosen": -559.07470703125, |
|
"logps/rejected": -1107.041748046875, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.043809644877910614, |
|
"rewards/margins": 0.053940802812576294, |
|
"rewards/rejected": -0.010131158865988255, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 18.63790840633573, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": -2.95158314704895, |
|
"logits/rejected": -2.8900146484375, |
|
"logps/chosen": -586.1901245117188, |
|
"logps/rejected": -1104.927978515625, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07399635016918182, |
|
"rewards/margins": 0.1986941397190094, |
|
"rewards/rejected": -0.12469778954982758, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 27.298745582804646, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": -3.0119404792785645, |
|
"logits/rejected": -3.0303287506103516, |
|
"logps/chosen": -568.1453247070312, |
|
"logps/rejected": -1168.633544921875, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1426580548286438, |
|
"rewards/margins": 0.7169780731201172, |
|
"rewards/rejected": -0.8596361875534058, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 28.357781831320167, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": -3.2949161529541016, |
|
"logits/rejected": -3.241774320602417, |
|
"logps/chosen": -628.916015625, |
|
"logps/rejected": -1339.3349609375, |
|
"loss": 0.2917, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.8088345527648926, |
|
"rewards/margins": 2.2371134757995605, |
|
"rewards/rejected": -3.0459485054016113, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 21.80399650152746, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": -3.2456772327423096, |
|
"logits/rejected": -3.213705539703369, |
|
"logps/chosen": -700.1171264648438, |
|
"logps/rejected": -1630.8173828125, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.35238516330719, |
|
"rewards/margins": 4.4795427322387695, |
|
"rewards/rejected": -5.831927299499512, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 18.258479162120135, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": -3.1275057792663574, |
|
"logits/rejected": -3.216618061065674, |
|
"logps/chosen": -673.2752075195312, |
|
"logps/rejected": -1530.3189697265625, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.7634323835372925, |
|
"rewards/margins": 3.932297468185425, |
|
"rewards/rejected": -4.695730686187744, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 40.88867782206525, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": -3.0596213340759277, |
|
"logits/rejected": -3.1614341735839844, |
|
"logps/chosen": -607.4539184570312, |
|
"logps/rejected": -1750.110107421875, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0371493101119995, |
|
"rewards/margins": 5.035472869873047, |
|
"rewards/rejected": -6.072621822357178, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 20.720968605328384, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": -3.212242841720581, |
|
"logits/rejected": -3.208061695098877, |
|
"logps/chosen": -692.4600830078125, |
|
"logps/rejected": -1919.945068359375, |
|
"loss": 0.1388, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2357938289642334, |
|
"rewards/margins": 6.9475884437561035, |
|
"rewards/rejected": -8.183381080627441, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 21.014024444843912, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": -3.062844753265381, |
|
"logits/rejected": -3.173572063446045, |
|
"logps/chosen": -761.4752807617188, |
|
"logps/rejected": -1871.939208984375, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.5748692750930786, |
|
"rewards/margins": 6.684431552886963, |
|
"rewards/rejected": -8.259300231933594, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -3.279001474380493, |
|
"eval_logits/rejected": -3.112272024154663, |
|
"eval_logps/chosen": -721.5318603515625, |
|
"eval_logps/rejected": -1477.787353515625, |
|
"eval_loss": 0.32064372301101685, |
|
"eval_rewards/accuracies": 0.8707386255264282, |
|
"eval_rewards/chosen": -1.9160078763961792, |
|
"eval_rewards/margins": 3.549539089202881, |
|
"eval_rewards/rejected": -5.465547561645508, |
|
"eval_runtime": 295.1481, |
|
"eval_samples_per_second": 9.5, |
|
"eval_steps_per_second": 0.298, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 18.750300976193962, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": -3.0732262134552, |
|
"logits/rejected": -3.194598436355591, |
|
"logps/chosen": -676.952880859375, |
|
"logps/rejected": -1844.5322265625, |
|
"loss": 0.1452, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3141238689422607, |
|
"rewards/margins": 6.629952907562256, |
|
"rewards/rejected": -7.944077491760254, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 26.345958267827655, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": -3.1366074085235596, |
|
"logits/rejected": -3.1714870929718018, |
|
"logps/chosen": -760.309814453125, |
|
"logps/rejected": -1834.424560546875, |
|
"loss": 0.1493, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -1.501333475112915, |
|
"rewards/margins": 6.5416059494018555, |
|
"rewards/rejected": -8.042940139770508, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 28.267952965151345, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": -3.0828897953033447, |
|
"logits/rejected": -3.2096328735351562, |
|
"logps/chosen": -751.352783203125, |
|
"logps/rejected": -1817.760986328125, |
|
"loss": 0.1283, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5563374757766724, |
|
"rewards/margins": 6.55484676361084, |
|
"rewards/rejected": -8.111185073852539, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 33.23618703081401, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": -3.1680922508239746, |
|
"logits/rejected": -3.1776795387268066, |
|
"logps/chosen": -692.7706298828125, |
|
"logps/rejected": -1928.9267578125, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -1.3918243646621704, |
|
"rewards/margins": 6.6903181076049805, |
|
"rewards/rejected": -8.08214282989502, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 35.04978007474125, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": -3.089778423309326, |
|
"logits/rejected": -3.155916452407837, |
|
"logps/chosen": -713.7938842773438, |
|
"logps/rejected": -1777.8834228515625, |
|
"loss": 0.1383, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.6856982707977295, |
|
"rewards/margins": 5.726717472076416, |
|
"rewards/rejected": -7.412415504455566, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04516718044779659, |
|
"train_runtime": 721.6494, |
|
"train_samples_per_second": 13.591, |
|
"train_steps_per_second": 0.212 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|