{ "best_metric": 0.5128149390220642, "best_model_checkpoint": "/mnt/beegfs/farid/mlora/outputs/xnli/aya-101/hi/rank4_lr5e-5/checkpoint-6000", "epoch": 0.24445893089960888, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020371577574967405, "grad_norm": 1.9568703174591064, "learning_rate": 4.166666666666667e-05, "loss": 1.1534, "step": 500 }, { "epoch": 0.020371577574967405, "eval_accuracy": 0.37991967871485943, "eval_f1": 0.3396581059481989, "eval_loss": 1.0917378664016724, "eval_runtime": 413.6608, "eval_samples_per_second": 6.019, "eval_steps_per_second": 0.377, "step": 500 }, { "epoch": 0.04074315514993481, "grad_norm": 9.426871299743652, "learning_rate": 4.62962962962963e-05, "loss": 1.0218, "step": 1000 }, { "epoch": 0.04074315514993481, "eval_accuracy": 0.6409638554216868, "eval_f1": 0.6436728838823977, "eval_loss": 0.8600370287895203, "eval_runtime": 412.0934, "eval_samples_per_second": 6.042, "eval_steps_per_second": 0.379, "step": 1000 }, { "epoch": 0.06111473272490222, "grad_norm": 20.413841247558594, "learning_rate": 4.166666666666667e-05, "loss": 0.8582, "step": 1500 }, { "epoch": 0.06111473272490222, "eval_accuracy": 0.7369477911646586, "eval_f1": 0.7370571656251429, "eval_loss": 0.6784067749977112, "eval_runtime": 431.3062, "eval_samples_per_second": 5.773, "eval_steps_per_second": 0.362, "step": 1500 }, { "epoch": 0.08148631029986962, "grad_norm": 8.49395751953125, "learning_rate": 3.7037037037037037e-05, "loss": 0.7793, "step": 2000 }, { "epoch": 0.08148631029986962, "eval_accuracy": 0.7682730923694779, "eval_f1": 0.7693110628872669, "eval_loss": 0.611162006855011, "eval_runtime": 431.4602, "eval_samples_per_second": 5.771, "eval_steps_per_second": 0.362, "step": 2000 }, { "epoch": 0.10185788787483703, "grad_norm": 7.8166375160217285, "learning_rate": 3.240740740740741e-05, "loss": 0.7563, "step": 2500 }, { "epoch": 0.10185788787483703, "eval_accuracy": 0.7871485943775101, "eval_f1": 0.7881188800855449, "eval_loss": 0.5777685046195984, "eval_runtime": 411.445, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.379, "step": 2500 }, { "epoch": 0.12222946544980444, "grad_norm": 11.658799171447754, "learning_rate": 2.777777777777778e-05, "loss": 0.7191, "step": 3000 }, { "epoch": 0.12222946544980444, "eval_accuracy": 0.7971887550200804, "eval_f1": 0.7971744387340783, "eval_loss": 0.5552772283554077, "eval_runtime": 411.1455, "eval_samples_per_second": 6.056, "eval_steps_per_second": 0.379, "step": 3000 }, { "epoch": 0.14260104302477183, "grad_norm": 8.530998229980469, "learning_rate": 2.314814814814815e-05, "loss": 0.7052, "step": 3500 }, { "epoch": 0.14260104302477183, "eval_accuracy": 0.8016064257028113, "eval_f1": 0.8021556716106938, "eval_loss": 0.5359864234924316, "eval_runtime": 411.8163, "eval_samples_per_second": 6.046, "eval_steps_per_second": 0.379, "step": 3500 }, { "epoch": 0.16297262059973924, "grad_norm": 6.73158597946167, "learning_rate": 1.8518518518518518e-05, "loss": 0.7079, "step": 4000 }, { "epoch": 0.16297262059973924, "eval_accuracy": 0.802008032128514, "eval_f1": 0.8023252480910511, "eval_loss": 0.5278254747390747, "eval_runtime": 411.3361, "eval_samples_per_second": 6.053, "eval_steps_per_second": 0.379, "step": 4000 }, { "epoch": 0.18334419817470665, "grad_norm": 9.069704055786133, "learning_rate": 1.388888888888889e-05, "loss": 0.6775, "step": 4500 }, { "epoch": 0.18334419817470665, "eval_accuracy": 0.8036144578313253, "eval_f1": 0.8043620274591458, "eval_loss": 0.5234741568565369, "eval_runtime": 431.5576, "eval_samples_per_second": 5.77, "eval_steps_per_second": 0.361, "step": 4500 }, { "epoch": 0.20371577574967406, "grad_norm": 9.322051048278809, "learning_rate": 9.259259259259259e-06, "loss": 0.6977, "step": 5000 }, { "epoch": 0.20371577574967406, "eval_accuracy": 0.8088353413654619, "eval_f1": 0.8089545270025741, "eval_loss": 0.5214890837669373, "eval_runtime": 412.4688, "eval_samples_per_second": 6.037, "eval_steps_per_second": 0.378, "step": 5000 }, { "epoch": 0.22408735332464147, "grad_norm": 5.2376508712768555, "learning_rate": 4.6296296296296296e-06, "loss": 0.6834, "step": 5500 }, { "epoch": 0.22408735332464147, "eval_accuracy": 0.8076305220883534, "eval_f1": 0.8082794716875278, "eval_loss": 0.5130343437194824, "eval_runtime": 411.1989, "eval_samples_per_second": 6.055, "eval_steps_per_second": 0.379, "step": 5500 }, { "epoch": 0.24445893089960888, "grad_norm": 9.288165092468262, "learning_rate": 0.0, "loss": 0.6639, "step": 6000 }, { "epoch": 0.24445893089960888, "eval_accuracy": 0.8080321285140563, "eval_f1": 0.8086136034722085, "eval_loss": 0.5128149390220642, "eval_runtime": 411.1581, "eval_samples_per_second": 6.056, "eval_steps_per_second": 0.379, "step": 6000 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 8.03166870528e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }