{ "best_metric": 0.9220723199736294, "best_model_checkpoint": "./saved_models/llama_3b_prompt_sbdh_gpt4_v2_0/checkpoint-144", "epoch": 6.0, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.041247367858887, "learning_rate": 3.3333333333333335e-05, "loss": 3.9133, "step": 24 }, { "epoch": 1.0, "eval_acc_macro": 0.4694269521783949, "eval_acc_micro": 0.509025270758086, "eval_auc_macro": 0.9508939125710083, "eval_auc_micro": 0.9142539505353097, "eval_f1_at_5": 0.39182832882982277, "eval_f1_at_8": 0.2863186309798368, "eval_f1_macro": 0.6055711778382781, "eval_f1_micro": 0.6746411483252942, "eval_loss": 0.25163671374320984, "eval_prec_at_5": 0.2502283105022831, "eval_prec_at_8": 0.168236301369863, "eval_prec_macro": 0.8351317849349306, "eval_prec_micro": 0.7975113122171044, "eval_rec_at_5": 0.9025875190258752, "eval_rec_at_8": 0.9604261796042617, "eval_rec_macro": 0.5519678498265336, "eval_rec_micro": 0.5845771144278122, "eval_runtime": 36.2308, "eval_samples_per_second": 24.178, "eval_steps_per_second": 3.036, "eval_threshold": -0.625, "step": 24 }, { "epoch": 2.0, "grad_norm": 1.1691702604293823, "learning_rate": 6.666666666666667e-05, "loss": 0.1193, "step": 48 }, { "epoch": 2.0, "eval_acc_macro": 0.8113749934046343, "eval_acc_micro": 0.8189149560116702, "eval_auc_macro": 0.9940036594427881, "eval_auc_micro": 0.9939886141325751, "eval_f1_at_5": 0.42918048633589306, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.8921678080506279, "eval_f1_micro": 0.900443369608956, "eval_loss": 0.06727338582277298, "eval_prec_at_5": 0.27488584474885847, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.8696485876912569, "eval_prec_micro": 0.8760784313724803, "eval_rec_at_5": 0.978310502283105, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9217289354703175, "eval_rec_micro": 0.9262023217246329, "eval_runtime": 121.244, "eval_samples_per_second": 7.225, "eval_steps_per_second": 0.907, "eval_threshold": -1.125, "step": 48 }, { "epoch": 3.0, "grad_norm": 0.8648784160614014, "learning_rate": 0.0001, "loss": 0.0587, "step": 72 }, { "epoch": 3.0, "eval_acc_macro": 0.8392595891870926, "eval_acc_micro": 0.849157733537454, "eval_auc_macro": 0.9951075369988027, "eval_auc_micro": 0.9959963596303174, "eval_f1_at_5": 0.4298468624753246, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9088894089226979, "eval_f1_micro": 0.9184265010351206, "eval_loss": 0.05153823271393776, "eval_prec_at_5": 0.2753424657534247, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9249501329228883, "eval_prec_micro": 0.9172870140611317, "eval_rec_at_5": 0.9794520547945206, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9031612618645798, "eval_rec_micro": 0.9195688225538209, "eval_runtime": 118.514, "eval_samples_per_second": 7.392, "eval_steps_per_second": 0.928, "eval_threshold": 0.125, "step": 72 }, { "epoch": 4.0, "grad_norm": 0.2277711182832718, "learning_rate": 9.411764705882353e-05, "loss": 0.0465, "step": 96 }, { "epoch": 4.0, "eval_acc_macro": 0.8500885385546534, "eval_acc_micro": 0.8541033434649806, "eval_auc_macro": 0.9952461152918444, "eval_auc_micro": 0.9963732184410706, "eval_f1_at_5": 0.4295136788854873, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9175170894131637, "eval_f1_micro": 0.9213114754097605, "eval_loss": 0.05052410811185837, "eval_prec_at_5": 0.2751141552511416, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9087740459598467, "eval_prec_micro": 0.9108589951376895, "eval_rec_at_5": 0.9788812785388128, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9275029071213349, "eval_rec_micro": 0.9320066334990935, "eval_runtime": 112.7846, "eval_samples_per_second": 7.767, "eval_steps_per_second": 0.975, "eval_threshold": 0.125, "step": 96 }, { "epoch": 5.0, "grad_norm": 0.3336959481239319, "learning_rate": 8.823529411764706e-05, "loss": 0.0368, "step": 120 }, { "epoch": 5.0, "eval_acc_macro": 0.8560051578711357, "eval_acc_micro": 0.8632148377124526, "eval_auc_macro": 0.9960314271621913, "eval_auc_micro": 0.9969526516710807, "eval_f1_at_5": 0.4298468624753246, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.920584293633742, "eval_f1_micro": 0.9265864786394916, "eval_loss": 0.047832097858190536, "eval_prec_at_5": 0.2753424657534247, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9209783471417811, "eval_prec_micro": 0.9269709543567695, "eval_rec_at_5": 0.9794520547945206, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9204814122555302, "eval_rec_micro": 0.9262023217246329, "eval_runtime": 116.9753, "eval_samples_per_second": 7.489, "eval_steps_per_second": 0.94, "eval_threshold": 0.125, "step": 120 }, { "epoch": 6.0, "grad_norm": 0.15532971918582916, "learning_rate": 8.23529411764706e-05, "loss": 0.0307, "step": 144 }, { "epoch": 6.0, "eval_acc_macro": 0.8581393259022515, "eval_acc_micro": 0.8658346333852679, "eval_auc_macro": 0.9960477018781981, "eval_auc_micro": 0.9967622842874246, "eval_f1_at_5": 0.4298468624753246, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9220723199736294, "eval_f1_micro": 0.9280936454848722, "eval_loss": 0.05041336640715599, "eval_prec_at_5": 0.2753424657534247, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9315146645321158, "eval_prec_micro": 0.9359190556491622, "eval_rec_at_5": 0.9794520547945206, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9137683032294842, "eval_rec_micro": 0.9203980099501724, "eval_runtime": 118.5157, "eval_samples_per_second": 7.391, "eval_steps_per_second": 0.928, "eval_threshold": 1.125, "step": 144 } ], "logging_steps": 500, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.653734367119278e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }