{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 1000, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 5.490324960115022, "learning_rate": 3.846153846153847e-07, "logits/chosen": -1.73323655128479, "logits/rejected": -1.963712453842163, "logps/chosen": -64.71795654296875, "logps/rejected": -92.56527709960938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.4, "grad_norm": 1.946222324869374, "learning_rate": 3.846153846153847e-06, "logits/chosen": -1.728408694267273, "logits/rejected": -1.9020811319351196, "logps/chosen": -72.22079467773438, "logps/rejected": -116.01087951660156, "loss": 0.6242, "rewards/accuracies": 0.6041666865348816, "rewards/chosen": -0.08828946202993393, "rewards/margins": 0.1939040869474411, "rewards/rejected": -0.28219354152679443, "step": 10 }, { "epoch": 0.8, "grad_norm": 3.522682391478169, "learning_rate": 4.9519632010080765e-06, "logits/chosen": -1.5478551387786865, "logits/rejected": -1.8321882486343384, "logps/chosen": -64.30168914794922, "logps/rejected": -259.0096435546875, "loss": 0.4142, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.03247198835015297, "rewards/margins": 1.648101806640625, "rewards/rejected": -1.6805737018585205, "step": 20 }, { "epoch": 1.2, "grad_norm": 3.068578740455024, "learning_rate": 4.721114089947181e-06, "logits/chosen": -1.21674382686615, "logits/rejected": -1.6157382726669312, "logps/chosen": -70.33782958984375, "logps/rejected": -497.0357360839844, "loss": 0.189, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08688753098249435, "rewards/margins": 3.9799323081970215, "rewards/rejected": -4.066819667816162, "step": 30 }, { "epoch": 1.6, "grad_norm": 0.22868786429214566, "learning_rate": 4.316650805085068e-06, "logits/chosen": -1.0377681255340576, "logits/rejected": -1.4782741069793701, "logps/chosen": -146.58694458007812, "logps/rejected": -769.5899658203125, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.891402542591095, "rewards/margins": 5.863120079040527, "rewards/rejected": -6.754522800445557, "step": 40 }, { "epoch": 2.0, "grad_norm": 4.80554513563544, "learning_rate": 3.770188363116324e-06, "logits/chosen": -1.2781574726104736, "logits/rejected": -1.560027837753296, "logps/chosen": -180.29161071777344, "logps/rejected": -1096.177001953125, "loss": 0.042, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.180869460105896, "rewards/margins": 8.900360107421875, "rewards/rejected": -10.081230163574219, "step": 50 }, { "epoch": 2.4, "grad_norm": 0.27603672981693994, "learning_rate": 3.1244411954180677e-06, "logits/chosen": -0.9170042276382446, "logits/rejected": -1.313642978668213, "logps/chosen": -199.44625854492188, "logps/rejected": -1061.7119140625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.4110476970672607, "rewards/margins": 8.259212493896484, "rewards/rejected": -9.67026138305664, "step": 60 }, { "epoch": 2.8, "grad_norm": 4.0781788867244595, "learning_rate": 2.429884359310328e-06, "logits/chosen": -1.0711753368377686, "logits/rejected": -1.4404046535491943, "logps/chosen": -216.3368377685547, "logps/rejected": -1323.527587890625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5303722620010376, "rewards/margins": 10.808874130249023, "rewards/rejected": -12.33924674987793, "step": 70 }, { "epoch": 3.2, "grad_norm": 0.058123220019245296, "learning_rate": 1.7408081372259633e-06, "logits/chosen": -0.8975645899772644, "logits/rejected": -1.296263337135315, "logps/chosen": -145.1630859375, "logps/rejected": -1060.802734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8476600646972656, "rewards/margins": 8.854598045349121, "rewards/rejected": -9.702258110046387, "step": 80 }, { "epoch": 3.6, "grad_norm": 0.09682108714448862, "learning_rate": 1.1110744174509952e-06, "logits/chosen": -1.0131741762161255, "logits/rejected": -1.4304622411727905, "logps/chosen": -159.85574340820312, "logps/rejected": -1194.8968505859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.978173553943634, "rewards/margins": 10.080551147460938, "rewards/rejected": -11.058725357055664, "step": 90 }, { "epoch": 4.0, "grad_norm": 0.004709132530498062, "learning_rate": 5.899065604459814e-07, "logits/chosen": -0.9850813150405884, "logits/rejected": -1.3841135501861572, "logps/chosen": -170.55239868164062, "logps/rejected": -1331.4459228515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.085587739944458, "rewards/margins": 11.312799453735352, "rewards/rejected": -12.398386001586914, "step": 100 }, { "epoch": 4.4, "grad_norm": 0.004246194962367942, "learning_rate": 2.1804183734670277e-07, "logits/chosen": -1.1493442058563232, "logits/rejected": -1.5497456789016724, "logps/chosen": -195.21815490722656, "logps/rejected": -1428.41064453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3281663656234741, "rewards/margins": 12.071850776672363, "rewards/rejected": -13.400018692016602, "step": 110 }, { "epoch": 4.8, "grad_norm": 0.010996089792622345, "learning_rate": 2.454718665888589e-08, "logits/chosen": -1.056302785873413, "logits/rejected": -1.4732356071472168, "logps/chosen": -179.13990783691406, "logps/rejected": -1402.2952880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1820361614227295, "rewards/margins": 11.938187599182129, "rewards/rejected": -13.120223999023438, "step": 120 }, { "epoch": 5.0, "step": 125, "total_flos": 0.0, "train_loss": 0.10525495952181518, "train_runtime": 12485.2526, "train_samples_per_second": 0.641, "train_steps_per_second": 0.01 } ], "logging_steps": 10, "max_steps": 125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }