|
{ |
|
"best_metric": 0.5894856058137782, |
|
"best_model_checkpoint": "./runtime-text-classification/deberta-v3-xsmall-CoLA/checkpoint-134", |
|
"epoch": 3.0, |
|
"global_step": 201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 0.6778, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5.142857142857143e-05, |
|
"loss": 0.6609, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.998426707136545e-05, |
|
"loss": 0.6124, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.990171431423709e-05, |
|
"loss": 0.5988, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.9748603036792754e-05, |
|
"loss": 0.5704, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.952529453287223e-05, |
|
"loss": 0.5851, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5.923231573940778e-05, |
|
"loss": 0.5969, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.887035799302091e-05, |
|
"loss": 0.5408, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.844027539868812e-05, |
|
"loss": 0.5148, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.794308281432527e-05, |
|
"loss": 0.4907, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.7379953456046206e-05, |
|
"loss": 0.4503, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.675221612974643e-05, |
|
"loss": 0.4701, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.606135209554454e-05, |
|
"loss": 0.449, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5.530899157248014e-05, |
|
"loss": 0.4227, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.449690989171633e-05, |
|
"loss": 0.3914, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.3627023307323634e-05, |
|
"loss": 0.4338, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.2701384474530855e-05, |
|
"loss": 0.3944, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.172217760611265e-05, |
|
"loss": 0.4302, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.0691713318343134e-05, |
|
"loss": 0.4251, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.961242317867758e-05, |
|
"loss": 0.3713, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.848685396802782e-05, |
|
"loss": 0.4146, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.731766167117059e-05, |
|
"loss": 0.3945, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.43225371837615967, |
|
"eval_matthews_correlation": 0.5778184033685675, |
|
"eval_runtime": 5.0494, |
|
"eval_samples_per_second": 206.558, |
|
"eval_steps_per_second": 51.689, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.610760520946952e-05, |
|
"loss": 0.3873, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.4859539930699636e-05, |
|
"loss": 0.33, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.357641087133617e-05, |
|
"loss": 0.3265, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.226124580720669e-05, |
|
"loss": 0.3306, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.091714810890463e-05, |
|
"loss": 0.3695, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.9547289418823325e-05, |
|
"loss": 0.3862, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.815490216709029e-05, |
|
"loss": 0.3557, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.6743271944061884e-05, |
|
"loss": 0.2995, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.5315729747376686e-05, |
|
"loss": 0.2953, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.3875644121862195e-05, |
|
"loss": 0.3356, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.242641321084213e-05, |
|
"loss": 0.3182, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.097145673760068e-05, |
|
"loss": 0.358, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.9514207935924957e-05, |
|
"loss": 0.3351, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.805810544876693e-05, |
|
"loss": 0.3057, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.6606585214141475e-05, |
|
"loss": 0.3138, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.5163072357407205e-05, |
|
"loss": 0.335, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.373097310906175e-05, |
|
"loss": 0.3209, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.231366676712283e-05, |
|
"loss": 0.3483, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0914497723061486e-05, |
|
"loss": 0.2824, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.953676757010353e-05, |
|
"loss": 0.2709, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8183727312521392e-05, |
|
"loss": 0.3404, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.685856969429953e-05, |
|
"loss": 0.3214, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.423650860786438, |
|
"eval_matthews_correlation": 0.5894856058137782, |
|
"eval_runtime": 5.1798, |
|
"eval_samples_per_second": 201.361, |
|
"eval_steps_per_second": 50.388, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.556442166527585e-05, |
|
"loss": 0.2703, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.4304337002536097e-05, |
|
"loss": 0.2356, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.3081289104472803e-05, |
|
"loss": 0.2812, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.189816397451226e-05, |
|
"loss": 0.2759, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.075775341106581e-05, |
|
"loss": 0.25, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.662748419774903e-06, |
|
"loss": 0.3233, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.61573286359513e-06, |
|
"loss": 0.2499, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.619177365702619e-06, |
|
"loss": 0.2734, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.6754334796105274e-06, |
|
"loss": 0.2456, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.786728140251719e-06, |
|
"loss": 0.2782, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.95515840912187e-06, |
|
"loss": 0.248, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.182686525882242e-06, |
|
"loss": 0.2766, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.4711352780992968e-06, |
|
"loss": 0.2192, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.822183700046593e-06, |
|
"loss": 0.1969, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.237363110718876e-06, |
|
"loss": 0.2931, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.718053500406972e-06, |
|
"loss": 0.2551, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.2654802743603888e-06, |
|
"loss": 0.246, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.807113612212248e-07, |
|
"loss": 0.2474, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.646546930528284e-07, |
|
"loss": 0.2691, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.1805606290931987e-07, |
|
"loss": 0.3154, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.414973650015816e-07, |
|
"loss": 0.2823, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.5395221612279924e-08, |
|
"loss": 0.2711, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.3059, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.4636174738407135, |
|
"eval_matthews_correlation": 0.5794528111058918, |
|
"eval_runtime": 5.0259, |
|
"eval_samples_per_second": 207.524, |
|
"eval_steps_per_second": 51.931, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 201, |
|
"total_flos": 422473413165056.0, |
|
"train_loss": 0.3622648042825917, |
|
"train_runtime": 157.4171, |
|
"train_samples_per_second": 162.962, |
|
"train_steps_per_second": 1.277 |
|
} |
|
], |
|
"max_steps": 201, |
|
"num_train_epochs": 3, |
|
"total_flos": 422473413165056.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|