{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2328830926874709, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02328830926874709, "eval_accuracy": 0.6251894229623197, "eval_loss": 1.7397805452346802, "eval_runtime": 28.5721, "eval_samples_per_second": 20.335, "eval_steps_per_second": 0.35, "step": 100 }, { "epoch": 0.04657661853749418, "eval_accuracy": 0.7068097781429745, "eval_loss": 1.3044862747192383, "eval_runtime": 28.3133, "eval_samples_per_second": 20.52, "eval_steps_per_second": 0.353, "step": 200 }, { "epoch": 0.06986492780624126, "eval_accuracy": 0.7264728450848325, "eval_loss": 1.1957931518554688, "eval_runtime": 28.4954, "eval_samples_per_second": 20.389, "eval_steps_per_second": 0.351, "step": 300 }, { "epoch": 0.09315323707498836, "eval_accuracy": 0.7365680716597873, "eval_loss": 1.1367889642715454, "eval_runtime": 28.5011, "eval_samples_per_second": 20.385, "eval_steps_per_second": 0.351, "step": 400 }, { "epoch": 0.11644154634373545, "grad_norm": 1.4327788352966309, "learning_rate": 4.8059307560937745e-05, "loss": 1.6774, "step": 500 }, { "epoch": 0.11644154634373545, "eval_accuracy": 0.7479708350529647, "eval_loss": 1.0806397199630737, "eval_runtime": 28.3151, "eval_samples_per_second": 20.519, "eval_steps_per_second": 0.353, "step": 500 }, { "epoch": 0.13972985561248252, "eval_accuracy": 0.7532630012849124, "eval_loss": 1.0570966005325317, "eval_runtime": 28.5769, "eval_samples_per_second": 20.331, "eval_steps_per_second": 0.35, "step": 600 }, { "epoch": 0.1630181648812296, "eval_accuracy": 0.7630408550249957, "eval_loss": 1.014039397239685, "eval_runtime": 28.2978, "eval_samples_per_second": 20.532, "eval_steps_per_second": 0.353, "step": 700 }, { "epoch": 0.18630647414997673, "eval_accuracy": 0.7612289910578419, "eval_loss": 1.0060029029846191, "eval_runtime": 28.4993, "eval_samples_per_second": 20.386, "eval_steps_per_second": 0.351, "step": 800 }, { "epoch": 0.2095947834187238, "eval_accuracy": 0.7636095789509747, "eval_loss": 1.0050908327102661, "eval_runtime": 28.3321, "eval_samples_per_second": 20.507, "eval_steps_per_second": 0.353, "step": 900 }, { "epoch": 0.2328830926874709, "grad_norm": 1.2356228828430176, "learning_rate": 4.611861512187549e-05, "loss": 1.0452, "step": 1000 }, { "epoch": 0.2328830926874709, "eval_accuracy": 0.767719824719944, "eval_loss": 0.9805149435997009, "eval_runtime": 28.5332, "eval_samples_per_second": 20.362, "eval_steps_per_second": 0.35, "step": 1000 }, { "epoch": 0.2328830926874709, "step": 1000, "total_flos": 1.3069163715939533e+18, "train_loss": 1.361302978515625, "train_runtime": 10712.6155, "train_samples_per_second": 76.947, "train_steps_per_second": 1.203 } ], "logging_steps": 500, "max_steps": 12882, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3069163715939533e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }