{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.012416190712689347, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008277460475126231, "grad_norm": 0.6720314025878906, "learning_rate": 0.00019448275862068965, "loss": 2.806, "step": 10 }, { "epoch": 0.0016554920950252463, "grad_norm": 0.7282748818397522, "learning_rate": 0.00018068965517241382, "loss": 2.4015, "step": 20 }, { "epoch": 0.0024832381425378696, "grad_norm": 0.6224257349967957, "learning_rate": 0.00016689655172413793, "loss": 2.4062, "step": 30 }, { "epoch": 0.0033109841900504926, "grad_norm": 0.6465514898300171, "learning_rate": 0.00015310344827586207, "loss": 2.3612, "step": 40 }, { "epoch": 0.004138730237563116, "grad_norm": 0.5610107183456421, "learning_rate": 0.0001393103448275862, "loss": 2.1203, "step": 50 }, { "epoch": 0.004966476285075739, "grad_norm": 0.5969945192337036, "learning_rate": 0.00012551724137931035, "loss": 2.2384, "step": 60 }, { "epoch": 0.005794222332588362, "grad_norm": 0.6113339066505432, "learning_rate": 0.00011172413793103449, "loss": 2.3128, "step": 70 }, { "epoch": 0.006621968380100985, "grad_norm": 0.8051493167877197, "learning_rate": 9.793103448275862e-05, "loss": 2.2082, "step": 80 }, { "epoch": 0.007449714427613608, "grad_norm": 0.6741610169410706, "learning_rate": 8.413793103448277e-05, "loss": 2.3156, "step": 90 }, { "epoch": 0.008277460475126232, "grad_norm": 0.5629040598869324, "learning_rate": 7.03448275862069e-05, "loss": 2.1989, "step": 100 }, { "epoch": 0.009105206522638855, "grad_norm": 0.7672610282897949, "learning_rate": 5.6551724137931037e-05, "loss": 2.1749, "step": 110 }, { "epoch": 0.009932952570151478, "grad_norm": 0.5322269201278687, "learning_rate": 4.275862068965518e-05, "loss": 2.171, "step": 120 }, { "epoch": 0.010760698617664101, "grad_norm": 0.7353241443634033, "learning_rate": 2.8965517241379313e-05, "loss": 2.1584, "step": 130 }, { "epoch": 0.011588444665176724, "grad_norm": 0.6774106025695801, "learning_rate": 1.5172413793103448e-05, "loss": 2.2079, "step": 140 }, { "epoch": 0.012416190712689347, "grad_norm": 0.5964234471321106, "learning_rate": 1.3793103448275862e-06, "loss": 2.1165, "step": 150 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.120470461579264e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }