{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9048750141386721, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09048750141386722, "eval_accuracy": 0.16714730898859703, "eval_loss": 4.686699867248535, "eval_runtime": 125.2427, "eval_samples_per_second": 7.218, "eval_steps_per_second": 7.218, "step": 100 }, { "epoch": 0.18097500282773443, "eval_accuracy": 0.25826880194607743, "eval_loss": 3.9017727375030518, "eval_runtime": 125.5606, "eval_samples_per_second": 7.2, "eval_steps_per_second": 7.2, "step": 200 }, { "epoch": 0.2714625042416016, "eval_accuracy": 0.29766981254599767, "eval_loss": 3.5929646492004395, "eval_runtime": 125.5573, "eval_samples_per_second": 7.2, "eval_steps_per_second": 7.2, "step": 300 }, { "epoch": 0.36195000565546886, "eval_accuracy": 0.3236774609630093, "eval_loss": 3.4225211143493652, "eval_runtime": 125.7379, "eval_samples_per_second": 7.19, "eval_steps_per_second": 7.19, "step": 400 }, { "epoch": 0.45243750706933605, "grad_norm": 4.40625, "learning_rate": 4.7737556561085976e-05, "loss": 4.0632, "step": 500 }, { "epoch": 0.45243750706933605, "eval_accuracy": 0.3405748527595257, "eval_loss": 3.307744026184082, "eval_runtime": 125.5713, "eval_samples_per_second": 7.199, "eval_steps_per_second": 7.199, "step": 500 }, { "epoch": 0.5429250084832032, "eval_accuracy": 0.354706238577747, "eval_loss": 3.21340012550354, "eval_runtime": 125.6117, "eval_samples_per_second": 7.197, "eval_steps_per_second": 7.197, "step": 600 }, { "epoch": 0.6334125098970704, "eval_accuracy": 0.3676221206998826, "eval_loss": 3.127941608428955, "eval_runtime": 125.646, "eval_samples_per_second": 7.195, "eval_steps_per_second": 7.195, "step": 700 }, { "epoch": 0.7239000113109377, "eval_accuracy": 0.377770033996102, "eval_loss": 3.0699830055236816, "eval_runtime": 125.238, "eval_samples_per_second": 7.218, "eval_steps_per_second": 7.218, "step": 800 }, { "epoch": 0.8143875127248049, "eval_accuracy": 0.3878323087639568, "eval_loss": 2.992367744445801, "eval_runtime": 126.0865, "eval_samples_per_second": 7.17, "eval_steps_per_second": 7.17, "step": 900 }, { "epoch": 0.9048750141386721, "grad_norm": 5.15625, "learning_rate": 4.547511312217195e-05, "loss": 3.0582, "step": 1000 }, { "epoch": 0.9048750141386721, "eval_accuracy": 0.3950470582191688, "eval_loss": 2.9669389724731445, "eval_runtime": 125.9784, "eval_samples_per_second": 7.176, "eval_steps_per_second": 7.176, "step": 1000 } ], "logging_steps": 500, "max_steps": 11050, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 200, "total_flos": 7.41888088866816e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }