|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 4.423334927457198e-05, |
|
"logits/chosen": 0.6726851463317871, |
|
"logits/rejected": 0.5370064973831177, |
|
"logps/chosen": -353.01397705078125, |
|
"logps/rejected": -321.87738037109375, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.6434375047683716, |
|
"rewards/chosen": 0.3095881938934326, |
|
"rewards/margins": 0.49872708320617676, |
|
"rewards/rejected": -0.18913888931274414, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 1.815842524819793e-05, |
|
"logits/chosen": 0.6333445310592651, |
|
"logits/rejected": 0.49084267020225525, |
|
"logps/chosen": -334.806396484375, |
|
"logps/rejected": -316.8036804199219, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.7724999785423279, |
|
"rewards/chosen": 0.6486607193946838, |
|
"rewards/margins": 1.2206343412399292, |
|
"rewards/rejected": -0.5719736814498901, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 2.397392281198729e-07, |
|
"logits/chosen": 0.6054231524467468, |
|
"logits/rejected": 0.45057016611099243, |
|
"logps/chosen": -348.30419921875, |
|
"logps/rejected": -318.5997314453125, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.7762500047683716, |
|
"rewards/chosen": 0.8248878717422485, |
|
"rewards/margins": 1.4257100820541382, |
|
"rewards/rejected": -0.6008222103118896, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 156, |
|
"total_flos": 4.792500052780122e+18, |
|
"train_loss": 0.5214443573584924, |
|
"train_runtime": 10667.9882, |
|
"train_samples_per_second": 0.937, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3906, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.792500052780122e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|