{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 412, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04854368932038835, "grad_norm": 0.19167186319828033, "learning_rate": 4.970984274562741e-06, "loss": 0.5965, "step": 20 }, { "epoch": 0.0970873786407767, "grad_norm": 0.10218532383441925, "learning_rate": 4.884610628109082e-06, "loss": 0.5749, "step": 40 }, { "epoch": 0.14563106796116504, "grad_norm": 0.0955277681350708, "learning_rate": 4.742884015847436e-06, "loss": 0.5653, "step": 60 }, { "epoch": 0.1941747572815534, "grad_norm": 0.09652815014123917, "learning_rate": 4.549094278152631e-06, "loss": 0.5592, "step": 80 }, { "epoch": 0.24271844660194175, "grad_norm": 0.09276870638132095, "learning_rate": 4.307739774881878e-06, "loss": 0.5562, "step": 100 }, { "epoch": 0.2912621359223301, "grad_norm": 0.09189510345458984, "learning_rate": 4.024422966835137e-06, "loss": 0.5518, "step": 120 }, { "epoch": 0.33980582524271846, "grad_norm": 0.09179496020078659, "learning_rate": 3.7057203681836407e-06, "loss": 0.551, "step": 140 }, { "epoch": 0.3883495145631068, "grad_norm": 0.09196960926055908, "learning_rate": 3.3590298886062833e-06, "loss": 0.5482, "step": 160 }, { "epoch": 0.4368932038834951, "grad_norm": 0.09062644839286804, "learning_rate": 2.9923991087167657e-06, "loss": 0.5461, "step": 180 }, { "epoch": 0.4854368932038835, "grad_norm": 0.09341968595981598, "learning_rate": 2.614338474951987e-06, "loss": 0.5446, "step": 200 }, { "epoch": 0.4854368932038835, "eval_accuracy": 0.32638514992244894, "eval_loss": 0.5471854209899902, "eval_runtime": 32.0389, "eval_samples_per_second": 132.027, "eval_steps_per_second": 0.531, "step": 200 }, { "epoch": 0.5339805825242718, "grad_norm": 0.09235095232725143, "learning_rate": 2.2336237501503103e-06, "loss": 0.5411, "step": 220 }, { "epoch": 0.5825242718446602, "grad_norm": 0.09220809489488602, "learning_rate": 1.8590923054515504e-06, "loss": 0.543, "step": 240 }, { "epoch": 0.6310679611650486, "grad_norm": 0.0929255411028862, "learning_rate": 1.499437982109305e-06, "loss": 0.5411, "step": 260 }, { "epoch": 0.6796116504854369, "grad_norm": 0.09099574387073517, "learning_rate": 1.1630092850023148e-06, "loss": 0.5423, "step": 280 }, { "epoch": 0.7281553398058253, "grad_norm": 0.08963935077190399, "learning_rate": 8.576155922941548e-07, "loss": 0.5397, "step": 300 }, { "epoch": 0.7766990291262136, "grad_norm": 0.09014247357845306, "learning_rate": 5.903458796151382e-07, "loss": 0.5414, "step": 320 }, { "epoch": 0.8252427184466019, "grad_norm": 0.0917976126074791, "learning_rate": 3.6740416664589634e-07, "loss": 0.5401, "step": 340 }, { "epoch": 0.8737864077669902, "grad_norm": 0.09078697115182877, "learning_rate": 1.9396550581205208e-07, "loss": 0.537, "step": 360 }, { "epoch": 0.9223300970873787, "grad_norm": 0.09018886834383011, "learning_rate": 7.405585596397314e-08, "loss": 0.5391, "step": 380 }, { "epoch": 0.970873786407767, "grad_norm": 0.09031691402196884, "learning_rate": 1.0458629483476868e-08, "loss": 0.5418, "step": 400 }, { "epoch": 0.970873786407767, "eval_accuracy": 0.3268526506529188, "eval_loss": 0.5428585410118103, "eval_runtime": 26.2685, "eval_samples_per_second": 161.029, "eval_steps_per_second": 0.647, "step": 400 }, { "epoch": 1.0, "step": 412, "total_flos": 7.628990136885182e+18, "train_loss": 0.5495563189960221, "train_runtime": 3539.9459, "train_samples_per_second": 119.039, "train_steps_per_second": 0.116 } ], "logging_steps": 20, "max_steps": 412, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.628990136885182e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }