{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.981366459627329, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 4.166666666666667e-05, "loss": 0.6218, "step": 10 }, { "epoch": 0.25, "eval_accuracy": 0.7417102966841187, "eval_loss": 0.573567271232605, "eval_runtime": 9.2285, "eval_samples_per_second": 62.09, "eval_steps_per_second": 1.95, "step": 10 }, { "epoch": 0.5, "learning_rate": 4.9326121764495596e-05, "loss": 0.6103, "step": 20 }, { "epoch": 0.5, "eval_accuracy": 0.7399650959860384, "eval_loss": 0.5730276703834534, "eval_runtime": 9.2303, "eval_samples_per_second": 62.078, "eval_steps_per_second": 1.95, "step": 20 }, { "epoch": 0.75, "learning_rate": 4.665063509461097e-05, "loss": 0.6105, "step": 30 }, { "epoch": 0.75, "eval_accuracy": 0.7277486910994765, "eval_loss": 0.5863298177719116, "eval_runtime": 9.2823, "eval_samples_per_second": 61.73, "eval_steps_per_second": 1.939, "step": 30 }, { "epoch": 0.99, "learning_rate": 4.215604094671835e-05, "loss": 0.6261, "step": 40 }, { "epoch": 0.99, "eval_accuracy": 0.7068062827225131, "eval_loss": 0.5964760184288025, "eval_runtime": 9.2516, "eval_samples_per_second": 61.935, "eval_steps_per_second": 1.946, "step": 40 }, { "epoch": 1.24, "learning_rate": 3.621997950501156e-05, "loss": 0.5684, "step": 50 }, { "epoch": 1.24, "eval_accuracy": 0.7521815008726004, "eval_loss": 0.567564845085144, "eval_runtime": 10.2074, "eval_samples_per_second": 56.136, "eval_steps_per_second": 1.763, "step": 50 }, { "epoch": 1.49, "learning_rate": 2.9341204441673266e-05, "loss": 0.5878, "step": 60 }, { "epoch": 1.49, "eval_accuracy": 0.6561954624781849, "eval_loss": 0.6583427786827087, "eval_runtime": 9.2811, "eval_samples_per_second": 61.739, "eval_steps_per_second": 1.939, "step": 60 }, { "epoch": 1.74, "learning_rate": 2.2097677146869242e-05, "loss": 0.5274, "step": 70 }, { "epoch": 1.74, "eval_accuracy": 0.7521815008726004, "eval_loss": 0.5735621452331543, "eval_runtime": 9.3011, "eval_samples_per_second": 61.605, "eval_steps_per_second": 1.935, "step": 70 }, { "epoch": 1.99, "learning_rate": 1.509800584902108e-05, "loss": 0.581, "step": 80 }, { "epoch": 1.99, "eval_accuracy": 0.7399650959860384, "eval_loss": 0.5575574636459351, "eval_runtime": 9.3052, "eval_samples_per_second": 61.579, "eval_steps_per_second": 1.934, "step": 80 }, { "epoch": 2.24, "learning_rate": 8.930309757836517e-06, "loss": 0.527, "step": 90 }, { "epoch": 2.24, "eval_accuracy": 0.7539267015706806, "eval_loss": 0.5575008988380432, "eval_runtime": 9.355, "eval_samples_per_second": 61.251, "eval_steps_per_second": 1.924, "step": 90 }, { "epoch": 2.48, "learning_rate": 4.112804714676594e-06, "loss": 0.5228, "step": 100 }, { "epoch": 2.48, "eval_accuracy": 0.7521815008726004, "eval_loss": 0.5663809776306152, "eval_runtime": 9.3904, "eval_samples_per_second": 61.02, "eval_steps_per_second": 1.917, "step": 100 }, { "epoch": 2.73, "learning_rate": 1.0502621921127776e-06, "loss": 0.4978, "step": 110 }, { "epoch": 2.73, "eval_accuracy": 0.7539267015706806, "eval_loss": 0.5707576274871826, "eval_runtime": 9.7621, "eval_samples_per_second": 58.697, "eval_steps_per_second": 1.844, "step": 110 }, { "epoch": 2.98, "learning_rate": 0.0, "loss": 0.5051, "step": 120 }, { "epoch": 2.98, "eval_accuracy": 0.7469458987783595, "eval_loss": 0.5715910792350769, "eval_runtime": 9.7356, "eval_samples_per_second": 58.856, "eval_steps_per_second": 1.849, "step": 120 }, { "epoch": 2.98, "step": 120, "total_flos": 3.5051816769866957e+18, "train_loss": 0.5655017614364624, "train_runtime": 483.5766, "train_samples_per_second": 31.962, "train_steps_per_second": 0.248 } ], "max_steps": 120, "num_train_epochs": 3, "total_flos": 3.5051816769866957e+18, "trial_name": null, "trial_params": null }