|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 1000, |
|
"global_step": 125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.490324960115022, |
|
"learning_rate": 3.846153846153847e-07, |
|
"logits/chosen": -1.73323655128479, |
|
"logits/rejected": -1.963712453842163, |
|
"logps/chosen": -64.71795654296875, |
|
"logps/rejected": -92.56527709960938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.946222324869374, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.728408694267273, |
|
"logits/rejected": -1.9020811319351196, |
|
"logps/chosen": -72.22079467773438, |
|
"logps/rejected": -116.01087951660156, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": -0.08828946202993393, |
|
"rewards/margins": 0.1939040869474411, |
|
"rewards/rejected": -0.28219354152679443, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.522682391478169, |
|
"learning_rate": 4.9519632010080765e-06, |
|
"logits/chosen": -1.5478551387786865, |
|
"logits/rejected": -1.8321882486343384, |
|
"logps/chosen": -64.30168914794922, |
|
"logps/rejected": -259.0096435546875, |
|
"loss": 0.4142, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.03247198835015297, |
|
"rewards/margins": 1.648101806640625, |
|
"rewards/rejected": -1.6805737018585205, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.068578740455024, |
|
"learning_rate": 4.721114089947181e-06, |
|
"logits/chosen": -1.21674382686615, |
|
"logits/rejected": -1.6157382726669312, |
|
"logps/chosen": -70.33782958984375, |
|
"logps/rejected": -497.0357360839844, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.08688753098249435, |
|
"rewards/margins": 3.9799323081970215, |
|
"rewards/rejected": -4.066819667816162, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.22868786429214566, |
|
"learning_rate": 4.316650805085068e-06, |
|
"logits/chosen": -1.0377681255340576, |
|
"logits/rejected": -1.4782741069793701, |
|
"logps/chosen": -146.58694458007812, |
|
"logps/rejected": -769.5899658203125, |
|
"loss": 0.0266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.891402542591095, |
|
"rewards/margins": 5.863120079040527, |
|
"rewards/rejected": -6.754522800445557, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.80554513563544, |
|
"learning_rate": 3.770188363116324e-06, |
|
"logits/chosen": -1.2781574726104736, |
|
"logits/rejected": -1.560027837753296, |
|
"logps/chosen": -180.29161071777344, |
|
"logps/rejected": -1096.177001953125, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.180869460105896, |
|
"rewards/margins": 8.900360107421875, |
|
"rewards/rejected": -10.081230163574219, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.27603672981693994, |
|
"learning_rate": 3.1244411954180677e-06, |
|
"logits/chosen": -0.9170042276382446, |
|
"logits/rejected": -1.313642978668213, |
|
"logps/chosen": -199.44625854492188, |
|
"logps/rejected": -1061.7119140625, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4110476970672607, |
|
"rewards/margins": 8.259212493896484, |
|
"rewards/rejected": -9.67026138305664, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 4.0781788867244595, |
|
"learning_rate": 2.429884359310328e-06, |
|
"logits/chosen": -1.0711753368377686, |
|
"logits/rejected": -1.4404046535491943, |
|
"logps/chosen": -216.3368377685547, |
|
"logps/rejected": -1323.527587890625, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5303722620010376, |
|
"rewards/margins": 10.808874130249023, |
|
"rewards/rejected": -12.33924674987793, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.058123220019245296, |
|
"learning_rate": 1.7408081372259633e-06, |
|
"logits/chosen": -0.8975645899772644, |
|
"logits/rejected": -1.296263337135315, |
|
"logps/chosen": -145.1630859375, |
|
"logps/rejected": -1060.802734375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8476600646972656, |
|
"rewards/margins": 8.854598045349121, |
|
"rewards/rejected": -9.702258110046387, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.09682108714448862, |
|
"learning_rate": 1.1110744174509952e-06, |
|
"logits/chosen": -1.0131741762161255, |
|
"logits/rejected": -1.4304622411727905, |
|
"logps/chosen": -159.85574340820312, |
|
"logps/rejected": -1194.8968505859375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.978173553943634, |
|
"rewards/margins": 10.080551147460938, |
|
"rewards/rejected": -11.058725357055664, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.004709132530498062, |
|
"learning_rate": 5.899065604459814e-07, |
|
"logits/chosen": -0.9850813150405884, |
|
"logits/rejected": -1.3841135501861572, |
|
"logps/chosen": -170.55239868164062, |
|
"logps/rejected": -1331.4459228515625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.085587739944458, |
|
"rewards/margins": 11.312799453735352, |
|
"rewards/rejected": -12.398386001586914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.004246194962367942, |
|
"learning_rate": 2.1804183734670277e-07, |
|
"logits/chosen": -1.1493442058563232, |
|
"logits/rejected": -1.5497456789016724, |
|
"logps/chosen": -195.21815490722656, |
|
"logps/rejected": -1428.41064453125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3281663656234741, |
|
"rewards/margins": 12.071850776672363, |
|
"rewards/rejected": -13.400018692016602, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.010996089792622345, |
|
"learning_rate": 2.454718665888589e-08, |
|
"logits/chosen": -1.056302785873413, |
|
"logits/rejected": -1.4732356071472168, |
|
"logps/chosen": -179.13990783691406, |
|
"logps/rejected": -1402.2952880859375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1820361614227295, |
|
"rewards/margins": 11.938187599182129, |
|
"rewards/rejected": -13.120223999023438, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 125, |
|
"total_flos": 0.0, |
|
"train_loss": 0.10525495952181518, |
|
"train_runtime": 12485.2526, |
|
"train_samples_per_second": 0.641, |
|
"train_steps_per_second": 0.01 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|