|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 207, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"logits/chosen": -1.823734998703003, |
|
"logits/rejected": -1.96222984790802, |
|
"logps/chosen": -984.5184936523438, |
|
"logps/rejected": -29.066242218017578, |
|
"loss": 0.7963, |
|
"rewards/accuracies": 0.6538461446762085, |
|
"rewards/chosen": 0.2930363118648529, |
|
"rewards/margins": 0.29948848485946655, |
|
"rewards/rejected": -0.00645211897790432, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.9516129032258067e-05, |
|
"logits/chosen": -1.7756704092025757, |
|
"logits/rejected": -1.890375018119812, |
|
"logps/chosen": -1008.671630859375, |
|
"logps/rejected": -30.05452537536621, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.6538461446762085, |
|
"rewards/chosen": 0.6647549867630005, |
|
"rewards/margins": 0.7006121277809143, |
|
"rewards/rejected": -0.03585716709494591, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.7419354838709678e-05, |
|
"logits/chosen": -1.837444543838501, |
|
"logits/rejected": -1.8552197217941284, |
|
"logps/chosen": -1084.7537841796875, |
|
"logps/rejected": -30.599714279174805, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.7692307829856873, |
|
"rewards/chosen": 0.9935499429702759, |
|
"rewards/margins": 1.160874366760254, |
|
"rewards/rejected": -0.16732460260391235, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.532258064516129e-05, |
|
"logits/chosen": -1.745394229888916, |
|
"logits/rejected": -1.8828259706497192, |
|
"logps/chosen": -1103.2149658203125, |
|
"logps/rejected": -32.83525085449219, |
|
"loss": 0.2193, |
|
"rewards/accuracies": 0.8846153616905212, |
|
"rewards/chosen": 2.401637077331543, |
|
"rewards/margins": 2.809011697769165, |
|
"rewards/rejected": -0.4073745608329773, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.3225806451612902e-05, |
|
"logits/chosen": -1.6953773498535156, |
|
"logits/rejected": -2.03174090385437, |
|
"logps/chosen": -1086.2177734375, |
|
"logps/rejected": -34.20427703857422, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.713472843170166, |
|
"rewards/margins": 3.305988073348999, |
|
"rewards/rejected": -0.5925151109695435, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.1129032258064516e-05, |
|
"logits/chosen": -1.7250920534133911, |
|
"logits/rejected": -1.886851191520691, |
|
"logps/chosen": -902.3397827148438, |
|
"logps/rejected": -36.79640579223633, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.591458797454834, |
|
"rewards/margins": 3.4001564979553223, |
|
"rewards/rejected": -0.8086973428726196, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.903225806451613e-05, |
|
"logits/chosen": -1.6640688180923462, |
|
"logits/rejected": -1.9599171876907349, |
|
"logps/chosen": -1034.3873291015625, |
|
"logps/rejected": -38.65880584716797, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.461390495300293, |
|
"rewards/margins": 4.427910327911377, |
|
"rewards/rejected": -0.9665195941925049, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.6935483870967744e-05, |
|
"logits/chosen": -1.6940295696258545, |
|
"logits/rejected": -1.9844238758087158, |
|
"logps/chosen": -1174.732666015625, |
|
"logps/rejected": -41.705257415771484, |
|
"loss": 0.0235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3263399600982666, |
|
"rewards/margins": 4.630356788635254, |
|
"rewards/rejected": -1.3040169477462769, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.4838709677419355e-05, |
|
"logits/chosen": -1.6856719255447388, |
|
"logits/rejected": -1.8793022632598877, |
|
"logps/chosen": -1046.614990234375, |
|
"logps/rejected": -43.94160842895508, |
|
"loss": 0.0238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.614682912826538, |
|
"rewards/margins": 5.117927074432373, |
|
"rewards/rejected": -1.5032439231872559, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.274193548387097e-05, |
|
"logits/chosen": -1.7377840280532837, |
|
"logits/rejected": -1.8570376634597778, |
|
"logps/chosen": -1106.663330078125, |
|
"logps/rejected": -47.238887786865234, |
|
"loss": 0.0242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.449557304382324, |
|
"rewards/margins": 5.24083137512207, |
|
"rewards/rejected": -1.7912741899490356, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.0645161290322582e-05, |
|
"logits/chosen": -1.7412984371185303, |
|
"logits/rejected": -1.9490795135498047, |
|
"logps/chosen": -1117.510009765625, |
|
"logps/rejected": -47.68777084350586, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.141476631164551, |
|
"rewards/margins": 5.044860363006592, |
|
"rewards/rejected": -1.903383731842041, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.548387096774194e-06, |
|
"logits/chosen": -1.6916511058807373, |
|
"logits/rejected": -1.9522241353988647, |
|
"logps/chosen": -1196.82861328125, |
|
"logps/rejected": -49.64944076538086, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8758084774017334, |
|
"rewards/margins": 5.988270282745361, |
|
"rewards/rejected": -2.1124606132507324, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.451612903225806e-06, |
|
"logits/chosen": -1.760750651359558, |
|
"logits/rejected": -1.925395131111145, |
|
"logps/chosen": -701.3285522460938, |
|
"logps/rejected": -50.479835510253906, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.707566261291504, |
|
"rewards/margins": 4.906687259674072, |
|
"rewards/rejected": -2.1991212368011475, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.35483870967742e-06, |
|
"logits/chosen": -1.6971558332443237, |
|
"logits/rejected": -1.9400659799575806, |
|
"logps/chosen": -959.2064208984375, |
|
"logps/rejected": -51.388999938964844, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8825502395629883, |
|
"rewards/margins": 6.139618873596191, |
|
"rewards/rejected": -2.2570688724517822, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.2580645161290324e-06, |
|
"logits/chosen": -1.7328979969024658, |
|
"logits/rejected": -2.0168874263763428, |
|
"logps/chosen": -926.796875, |
|
"logps/rejected": -52.40264129638672, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.548464059829712, |
|
"rewards/margins": 5.864409446716309, |
|
"rewards/rejected": -2.3159451484680176, |
|
"step": 195 |
|
} |
|
], |
|
"logging_steps": 13, |
|
"max_steps": 207, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|