{ "best_metric": 0.9169825625473844, "best_model_checkpoint": "distilbert-base-uncased-finetuned-multiclass-classification/run-3/checkpoint-13190", "epoch": 5.0, "eval_steps": 500, "global_step": 13190, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18953752843062927, "grad_norm": 9.870668411254883, "learning_rate": 1.490581283927063e-05, "loss": 5.8661, "step": 500 }, { "epoch": 0.37907505686125853, "grad_norm": 11.824650764465332, "learning_rate": 1.4318507368850198e-05, "loss": 4.7233, "step": 1000 }, { "epoch": 0.5686125852918877, "grad_norm": 11.662109375, "learning_rate": 1.3731201898429762e-05, "loss": 3.8044, "step": 1500 }, { "epoch": 0.7581501137225171, "grad_norm": 11.159646987915039, "learning_rate": 1.3143896428009328e-05, "loss": 3.0524, "step": 2000 }, { "epoch": 0.9476876421531463, "grad_norm": 13.040279388427734, "learning_rate": 1.2556590957588894e-05, "loss": 2.5575, "step": 2500 }, { "epoch": 1.0, "eval_accuracy": 0.7350265352539803, "eval_loss": 2.0373177528381348, "eval_runtime": 41.2753, "eval_samples_per_second": 63.912, "eval_steps_per_second": 3.998, "step": 2638 }, { "epoch": 1.1372251705837755, "grad_norm": 14.501738548278809, "learning_rate": 1.1969285487168458e-05, "loss": 2.0424, "step": 3000 }, { "epoch": 1.3267626990144048, "grad_norm": 8.37807846069336, "learning_rate": 1.1381980016748024e-05, "loss": 1.6845, "step": 3500 }, { "epoch": 1.5163002274450341, "grad_norm": 16.81246566772461, "learning_rate": 1.0794674546327588e-05, "loss": 1.4752, "step": 4000 }, { "epoch": 1.7058377558756632, "grad_norm": 9.910699844360352, "learning_rate": 1.0207369075907154e-05, "loss": 1.3347, "step": 4500 }, { "epoch": 1.8953752843062928, "grad_norm": 8.623641014099121, "learning_rate": 9.62006360548672e-06, "loss": 1.185, "step": 5000 }, { "epoch": 2.0, "eval_accuracy": 0.8442001516300227, "eval_loss": 1.0104457139968872, "eval_runtime": 41.2462, "eval_samples_per_second": 63.957, "eval_steps_per_second": 4.0, "step": 5276 }, { "epoch": 2.084912812736922, "grad_norm": 4.728893756866455, "learning_rate": 9.032758135066284e-06, "loss": 0.9927, "step": 5500 }, { "epoch": 2.274450341167551, "grad_norm": 2.276144027709961, "learning_rate": 8.445452664645852e-06, "loss": 0.8242, "step": 6000 }, { "epoch": 2.4639878695981805, "grad_norm": 2.264138698577881, "learning_rate": 7.858147194225416e-06, "loss": 0.8038, "step": 6500 }, { "epoch": 2.6535253980288096, "grad_norm": 3.794312000274658, "learning_rate": 7.270841723804981e-06, "loss": 0.8027, "step": 7000 }, { "epoch": 2.8430629264594387, "grad_norm": 8.422940254211426, "learning_rate": 6.683536253384547e-06, "loss": 0.6962, "step": 7500 }, { "epoch": 3.0, "eval_accuracy": 0.8847611827141774, "eval_loss": 0.7159162163734436, "eval_runtime": 45.9678, "eval_samples_per_second": 57.388, "eval_steps_per_second": 3.589, "step": 7914 }, { "epoch": 3.0326004548900682, "grad_norm": 2.8076231479644775, "learning_rate": 6.096230782964112e-06, "loss": 0.6665, "step": 8000 }, { "epoch": 3.2221379833206973, "grad_norm": 6.104276657104492, "learning_rate": 5.508925312543677e-06, "loss": 0.5994, "step": 8500 }, { "epoch": 3.411675511751327, "grad_norm": 2.379484176635742, "learning_rate": 4.921619842123243e-06, "loss": 0.5959, "step": 9000 }, { "epoch": 3.601213040181956, "grad_norm": 10.061728477478027, "learning_rate": 4.334314371702808e-06, "loss": 0.5272, "step": 9500 }, { "epoch": 3.7907505686125855, "grad_norm": 3.485311269760132, "learning_rate": 3.747008901282373e-06, "loss": 0.459, "step": 10000 }, { "epoch": 3.9802880970432146, "grad_norm": 2.9593420028686523, "learning_rate": 3.1597034308619386e-06, "loss": 0.507, "step": 10500 }, { "epoch": 4.0, "eval_accuracy": 0.9078847611827142, "eval_loss": 0.606600284576416, "eval_runtime": 40.7207, "eval_samples_per_second": 64.783, "eval_steps_per_second": 4.052, "step": 10552 }, { "epoch": 4.169825625473844, "grad_norm": 0.23257030546665192, "learning_rate": 2.5723979604415036e-06, "loss": 0.4614, "step": 11000 }, { "epoch": 4.359363153904473, "grad_norm": 14.897615432739258, "learning_rate": 1.9850924900210694e-06, "loss": 0.4487, "step": 11500 }, { "epoch": 4.548900682335102, "grad_norm": 0.1480739712715149, "learning_rate": 1.3977870196006345e-06, "loss": 0.4853, "step": 12000 }, { "epoch": 4.7384382107657315, "grad_norm": 0.2928927540779114, "learning_rate": 8.104815491801998e-07, "loss": 0.4254, "step": 12500 }, { "epoch": 4.927975739196361, "grad_norm": 0.9608014225959778, "learning_rate": 2.2317607875976519e-07, "loss": 0.4395, "step": 13000 }, { "epoch": 5.0, "eval_accuracy": 0.9169825625473844, "eval_loss": 0.5716663599014282, "eval_runtime": 40.5807, "eval_samples_per_second": 65.006, "eval_steps_per_second": 4.066, "step": 13190 } ], "logging_steps": 500, "max_steps": 13190, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 438076953329664.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 1.5493118309691066e-05, "num_train_epochs": 5, "per_device_train_batch_size": 4, "seed": 29 } }