{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6169031462060457, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.030845157310302282, "grad_norm": 2.8206074237823486, "learning_rate": 1.0277492291880782e-05, "loss": 1.8082, "step": 50 }, { "epoch": 0.061690314620604564, "grad_norm": 3.4183013439178467, "learning_rate": 2.0554984583761563e-05, "loss": 0.6538, "step": 100 }, { "epoch": 0.09253547193090685, "grad_norm": 2.170591354370117, "learning_rate": 3.083247687564235e-05, "loss": 0.4563, "step": 150 }, { "epoch": 0.12338062924120913, "grad_norm": 1.4687080383300781, "learning_rate": 4.110996916752313e-05, "loss": 0.4263, "step": 200 }, { "epoch": 0.15422578655151142, "grad_norm": 1.836676836013794, "learning_rate": 5.1387461459403907e-05, "loss": 0.3994, "step": 250 }, { "epoch": 0.1850709438618137, "grad_norm": 1.2718663215637207, "learning_rate": 6.16649537512847e-05, "loss": 0.3665, "step": 300 }, { "epoch": 0.215916101172116, "grad_norm": 1.6945191621780396, "learning_rate": 7.194244604316547e-05, "loss": 0.3577, "step": 350 }, { "epoch": 0.24676125848241826, "grad_norm": 1.2829898595809937, "learning_rate": 8.221993833504625e-05, "loss": 0.347, "step": 400 }, { "epoch": 0.27760641579272055, "grad_norm": 1.01521635055542, "learning_rate": 9.249743062692704e-05, "loss": 0.3288, "step": 450 }, { "epoch": 0.30845157310302285, "grad_norm": 1.522111415863037, "learning_rate": 0.00010277492291880781, "loss": 0.3267, "step": 500 }, { "epoch": 0.3392967304133251, "grad_norm": 0.9678927659988403, "learning_rate": 0.00011305241521068859, "loss": 0.3198, "step": 550 }, { "epoch": 0.3701418877236274, "grad_norm": 1.2144405841827393, "learning_rate": 0.0001233299075025694, "loss": 0.3099, "step": 600 }, { "epoch": 0.4009870450339297, "grad_norm": 1.3122639656066895, "learning_rate": 0.00013360739979445017, "loss": 0.2929, "step": 650 }, { "epoch": 0.431832202344232, "grad_norm": 1.0934101343154907, "learning_rate": 0.00014388489208633093, "loss": 0.3003, "step": 700 }, { "epoch": 0.4626773596545342, "grad_norm": 0.7938969731330872, "learning_rate": 0.00015416238437821172, "loss": 0.2956, "step": 750 }, { "epoch": 0.4935225169648365, "grad_norm": 0.6571168303489685, "learning_rate": 0.0001644398766700925, "loss": 0.2736, "step": 800 }, { "epoch": 0.5243676742751388, "grad_norm": 1.0073938369750977, "learning_rate": 0.0001747173689619733, "loss": 0.2892, "step": 850 }, { "epoch": 0.5552128315854411, "grad_norm": 0.9874083399772644, "learning_rate": 0.00018499486125385408, "loss": 0.2723, "step": 900 }, { "epoch": 0.5860579888957433, "grad_norm": 1.1770968437194824, "learning_rate": 0.00019527235354573487, "loss": 0.2855, "step": 950 }, { "epoch": 0.6169031462060457, "grad_norm": 1.00326669216156, "learning_rate": 0.00019997622717095418, "loss": 0.2587, "step": 1000 } ], "logging_steps": 50, "max_steps": 4863, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7124677004623872.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }