{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 200, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 50.835563259148245, "learning_rate": 7.142857142857142e-08, "logits/generated": -1.7165532112121582, "logits/real": -1.5090866088867188, "logps/generated": -275.8633728027344, "logps/real": -241.62222290039062, "loss": 0.9385, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.16, "grad_norm": 41.66364421784157, "learning_rate": 4.727272727272727e-07, "logits/generated": -1.8415862321853638, "logits/real": -1.5861574411392212, "logps/generated": -281.33892822265625, "logps/real": -260.4600830078125, "loss": 0.9056, "rewards/accuracies": 0.5138888955116272, "rewards/generated": 0.23393188416957855, "rewards/margins": 0.10307849943637848, "rewards/real": 0.33701038360595703, "step": 10 }, { "epoch": 0.32, "grad_norm": 35.24058113049003, "learning_rate": 3.818181818181818e-07, "logits/generated": -1.7791579961776733, "logits/real": -1.480543613433838, "logps/generated": -265.5545959472656, "logps/real": -228.2611083984375, "loss": 0.7334, "rewards/accuracies": 0.8374999761581421, "rewards/generated": 1.893307089805603, "rewards/margins": 0.8892760276794434, "rewards/real": 2.782582998275757, "step": 20 }, { "epoch": 0.48, "grad_norm": 34.94460500011804, "learning_rate": 2.909090909090909e-07, "logits/generated": -1.688865303993225, "logits/real": -1.3790943622589111, "logps/generated": -254.9552764892578, "logps/real": -221.18008422851562, "loss": 0.6877, "rewards/accuracies": 0.7250000238418579, "rewards/generated": 3.3104233741760254, "rewards/margins": 0.8946127891540527, "rewards/real": 4.205036163330078, "step": 30 }, { "epoch": 0.64, "grad_norm": 31.70597635761484, "learning_rate": 2e-07, "logits/generated": -1.5597318410873413, "logits/real": -1.2736680507659912, "logps/generated": -233.81716918945312, "logps/real": -204.30459594726562, "loss": 0.6532, "rewards/accuracies": 0.7749999761581421, "rewards/generated": 4.177728176116943, "rewards/margins": 1.0089408159255981, "rewards/real": 5.18666934967041, "step": 40 }, { "epoch": 0.8, "grad_norm": 33.862934848228804, "learning_rate": 1.0909090909090908e-07, "logits/generated": -1.547353982925415, "logits/real": -1.3472106456756592, "logps/generated": -235.5671844482422, "logps/real": -204.73304748535156, "loss": 0.6417, "rewards/accuracies": 0.75, "rewards/generated": 4.144225597381592, "rewards/margins": 1.0203888416290283, "rewards/real": 5.164614677429199, "step": 50 }, { "epoch": 0.96, "grad_norm": 35.459665070843094, "learning_rate": 1.818181818181818e-08, "logits/generated": -1.6352506875991821, "logits/real": -1.3348934650421143, "logps/generated": -237.15017700195312, "logps/real": -200.07449340820312, "loss": 0.6375, "rewards/accuracies": 0.800000011920929, "rewards/generated": 4.087512969970703, "rewards/margins": 1.2106025218963623, "rewards/real": 5.2981157302856445, "step": 60 }, { "epoch": 0.992, "step": 62, "total_flos": 0.0, "train_loss": 0.704919635288177, "train_runtime": 933.9268, "train_samples_per_second": 2.136, "train_steps_per_second": 0.066 } ], "logging_steps": 10, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }