{ "best_metric": 0.3820537328720093, "best_model_checkpoint": "/mnt/beegfs/farid/mlora/outputs/xnli/aya-101/es/rank4_lr5e-5/checkpoint-5500", "epoch": 0.24445893089960888, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020371577574967405, "grad_norm": 2.2158336639404297, "learning_rate": 4.166666666666667e-05, "loss": 1.1454, "step": 500 }, { "epoch": 0.020371577574967405, "eval_accuracy": 0.5144578313253012, "eval_f1": 0.4936195264772718, "eval_loss": 1.038705587387085, "eval_runtime": 413.8422, "eval_samples_per_second": 6.017, "eval_steps_per_second": 0.377, "step": 500 }, { "epoch": 0.04074315514993481, "grad_norm": 8.116039276123047, "learning_rate": 4.62962962962963e-05, "loss": 0.75, "step": 1000 }, { "epoch": 0.04074315514993481, "eval_accuracy": 0.7746987951807229, "eval_f1": 0.7664760912434666, "eval_loss": 0.5814982652664185, "eval_runtime": 411.7423, "eval_samples_per_second": 6.047, "eval_steps_per_second": 0.379, "step": 1000 }, { "epoch": 0.06111473272490222, "grad_norm": 22.518217086791992, "learning_rate": 4.166666666666667e-05, "loss": 0.5587, "step": 1500 }, { "epoch": 0.06111473272490222, "eval_accuracy": 0.8240963855421687, "eval_f1": 0.8214797441431562, "eval_loss": 0.49682557582855225, "eval_runtime": 431.8588, "eval_samples_per_second": 5.766, "eval_steps_per_second": 0.361, "step": 1500 }, { "epoch": 0.08148631029986962, "grad_norm": 9.046836853027344, "learning_rate": 3.7037037037037037e-05, "loss": 0.517, "step": 2000 }, { "epoch": 0.08148631029986962, "eval_accuracy": 0.8429718875502008, "eval_f1": 0.8424773939330747, "eval_loss": 0.42901262640953064, "eval_runtime": 411.5643, "eval_samples_per_second": 6.05, "eval_steps_per_second": 0.379, "step": 2000 }, { "epoch": 0.10185788787483703, "grad_norm": 7.844255447387695, "learning_rate": 3.240740740740741e-05, "loss": 0.4916, "step": 2500 }, { "epoch": 0.10185788787483703, "eval_accuracy": 0.851004016064257, "eval_f1": 0.8489310910654924, "eval_loss": 0.44668635725975037, "eval_runtime": 411.3006, "eval_samples_per_second": 6.054, "eval_steps_per_second": 0.379, "step": 2500 }, { "epoch": 0.12222946544980444, "grad_norm": 8.98777961730957, "learning_rate": 2.777777777777778e-05, "loss": 0.4814, "step": 3000 }, { "epoch": 0.12222946544980444, "eval_accuracy": 0.8530120481927711, "eval_f1": 0.8514869296463266, "eval_loss": 0.4118644595146179, "eval_runtime": 411.2844, "eval_samples_per_second": 6.054, "eval_steps_per_second": 0.379, "step": 3000 }, { "epoch": 0.14260104302477183, "grad_norm": 9.63521957397461, "learning_rate": 2.314814814814815e-05, "loss": 0.4656, "step": 3500 }, { "epoch": 0.14260104302477183, "eval_accuracy": 0.8614457831325302, "eval_f1": 0.8604597422173991, "eval_loss": 0.39471763372421265, "eval_runtime": 431.5224, "eval_samples_per_second": 5.77, "eval_steps_per_second": 0.362, "step": 3500 }, { "epoch": 0.16297262059973924, "grad_norm": 4.712085723876953, "learning_rate": 1.8518518518518518e-05, "loss": 0.4736, "step": 4000 }, { "epoch": 0.16297262059973924, "eval_accuracy": 0.8646586345381526, "eval_f1": 0.8637591639135315, "eval_loss": 0.3822212219238281, "eval_runtime": 411.5091, "eval_samples_per_second": 6.051, "eval_steps_per_second": 0.379, "step": 4000 }, { "epoch": 0.18334419817470665, "grad_norm": 7.1373724937438965, "learning_rate": 1.388888888888889e-05, "loss": 0.4476, "step": 4500 }, { "epoch": 0.18334419817470665, "eval_accuracy": 0.8634538152610441, "eval_f1": 0.8622999838502284, "eval_loss": 0.39242908358573914, "eval_runtime": 411.5251, "eval_samples_per_second": 6.051, "eval_steps_per_second": 0.379, "step": 4500 }, { "epoch": 0.20371577574967406, "grad_norm": 5.437769412994385, "learning_rate": 9.259259259259259e-06, "loss": 0.4491, "step": 5000 }, { "epoch": 0.20371577574967406, "eval_accuracy": 0.8614457831325302, "eval_f1": 0.8605133652274247, "eval_loss": 0.3978983461856842, "eval_runtime": 431.6421, "eval_samples_per_second": 5.769, "eval_steps_per_second": 0.361, "step": 5000 }, { "epoch": 0.22408735332464147, "grad_norm": 6.67974328994751, "learning_rate": 4.6296296296296296e-06, "loss": 0.461, "step": 5500 }, { "epoch": 0.22408735332464147, "eval_accuracy": 0.8662650602409638, "eval_f1": 0.8655422260886931, "eval_loss": 0.3820537328720093, "eval_runtime": 411.7119, "eval_samples_per_second": 6.048, "eval_steps_per_second": 0.379, "step": 5500 }, { "epoch": 0.24445893089960888, "grad_norm": 7.511369705200195, "learning_rate": 0.0, "loss": 0.4517, "step": 6000 }, { "epoch": 0.24445893089960888, "eval_accuracy": 0.8646586345381526, "eval_f1": 0.8638975111231288, "eval_loss": 0.3871505558490753, "eval_runtime": 431.5222, "eval_samples_per_second": 5.77, "eval_steps_per_second": 0.362, "step": 6000 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 8.03166870528e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }