|
{ |
|
"best_metric": 0.71900826446281, |
|
"best_model_checkpoint": "models/single_label/deberta-v3-large/exp5/checkpoint-1500", |
|
"epoch": 0.07913166189678593, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.1645569620253166e-08, |
|
"loss": 1.4101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.329113924050633e-08, |
|
"loss": 1.4037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_osu_accuracy": 0.5289256198347108, |
|
"eval_osu_loss": 1.3296300172805786, |
|
"eval_osu_matthews_correlation": 0.08430984949814109, |
|
"eval_osu_runtime": 4.6863, |
|
"eval_osu_samples_per_second": 51.64, |
|
"eval_osu_steps_per_second": 3.414, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_deepset_1_accuracy": 0.42016806722689076, |
|
"eval_deepset_1_loss": 1.3601003885269165, |
|
"eval_deepset_1_matthews_correlation": 0.007340018855649166, |
|
"eval_deepset_1_runtime": 1.0598, |
|
"eval_deepset_1_samples_per_second": 112.29, |
|
"eval_deepset_1_steps_per_second": 7.549, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_deepset_2_accuracy": 0.5128205128205128, |
|
"eval_deepset_2_loss": 1.341286540031433, |
|
"eval_deepset_2_matthews_correlation": 0.09702062677085851, |
|
"eval_deepset_2_runtime": 1.5595, |
|
"eval_deepset_2_samples_per_second": 100.032, |
|
"eval_deepset_2_steps_per_second": 6.412, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_deepset_3_accuracy": 0.5260115606936416, |
|
"eval_deepset_3_loss": 1.3386189937591553, |
|
"eval_deepset_3_matthews_correlation": 0.13147932110355937, |
|
"eval_deepset_3_runtime": 3.0465, |
|
"eval_deepset_3_samples_per_second": 113.572, |
|
"eval_deepset_3_steps_per_second": 7.221, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_deepset_4_accuracy": 0.46601941747572817, |
|
"eval_deepset_4_loss": 1.354163646697998, |
|
"eval_deepset_4_matthews_correlation": 0.04602315745107765, |
|
"eval_deepset_4_runtime": 1.7129, |
|
"eval_deepset_4_samples_per_second": 120.262, |
|
"eval_deepset_4_steps_per_second": 7.589, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_anli_accuracy": 0.314, |
|
"eval_anli_loss": 1.3443490266799927, |
|
"eval_anli_matthews_correlation": -0.03669996966921473, |
|
"eval_anli_runtime": 5.1544, |
|
"eval_anli_samples_per_second": 194.01, |
|
"eval_anli_steps_per_second": 12.223, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.462025316455696e-08, |
|
"loss": 1.381, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2626582278481014e-07, |
|
"loss": 1.3016, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5791139240506326e-07, |
|
"loss": 1.1103, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_osu_accuracy": 0.3677685950413223, |
|
"eval_osu_loss": 1.2251540422439575, |
|
"eval_osu_matthews_correlation": 0.009389673632694094, |
|
"eval_osu_runtime": 4.6743, |
|
"eval_osu_samples_per_second": 51.773, |
|
"eval_osu_steps_per_second": 3.423, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_deepset_1_accuracy": 0.2605042016806723, |
|
"eval_deepset_1_loss": 1.498146414756775, |
|
"eval_deepset_1_matthews_correlation": -0.12070085589088507, |
|
"eval_deepset_1_runtime": 1.0684, |
|
"eval_deepset_1_samples_per_second": 111.378, |
|
"eval_deepset_1_steps_per_second": 7.488, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_deepset_2_accuracy": 0.3076923076923077, |
|
"eval_deepset_2_loss": 1.3761013746261597, |
|
"eval_deepset_2_matthews_correlation": -0.08397491442025497, |
|
"eval_deepset_2_runtime": 1.5607, |
|
"eval_deepset_2_samples_per_second": 99.953, |
|
"eval_deepset_2_steps_per_second": 6.407, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_deepset_3_accuracy": 0.31213872832369943, |
|
"eval_deepset_3_loss": 1.4205697774887085, |
|
"eval_deepset_3_matthews_correlation": -0.06528708661392457, |
|
"eval_deepset_3_runtime": 3.0447, |
|
"eval_deepset_3_samples_per_second": 113.639, |
|
"eval_deepset_3_steps_per_second": 7.226, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_deepset_4_accuracy": 0.25728155339805825, |
|
"eval_deepset_4_loss": 1.5763508081436157, |
|
"eval_deepset_4_matthews_correlation": -0.11908261162663966, |
|
"eval_deepset_4_runtime": 1.7183, |
|
"eval_deepset_4_samples_per_second": 119.883, |
|
"eval_deepset_4_steps_per_second": 7.565, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_anli_accuracy": 0.33, |
|
"eval_anli_loss": 1.1667168140411377, |
|
"eval_anli_matthews_correlation": 0.0012307081400291311, |
|
"eval_anli_runtime": 5.1504, |
|
"eval_anli_samples_per_second": 194.161, |
|
"eval_anli_steps_per_second": 12.232, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.8955696202531644e-07, |
|
"loss": 0.8892, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2120253164556962e-07, |
|
"loss": 0.7838, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_osu_accuracy": 0.6942148760330579, |
|
"eval_osu_loss": 0.9712508916854858, |
|
"eval_osu_matthews_correlation": 0.4897642238477718, |
|
"eval_osu_runtime": 4.6823, |
|
"eval_osu_samples_per_second": 51.684, |
|
"eval_osu_steps_per_second": 3.417, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_deepset_1_accuracy": 0.6134453781512605, |
|
"eval_deepset_1_loss": 1.6580817699432373, |
|
"eval_deepset_1_matthews_correlation": 0.41985911107813456, |
|
"eval_deepset_1_runtime": 1.0652, |
|
"eval_deepset_1_samples_per_second": 111.721, |
|
"eval_deepset_1_steps_per_second": 7.511, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_deepset_2_accuracy": 0.7051282051282052, |
|
"eval_deepset_2_loss": 1.3619518280029297, |
|
"eval_deepset_2_matthews_correlation": 0.5328568142577198, |
|
"eval_deepset_2_runtime": 1.5605, |
|
"eval_deepset_2_samples_per_second": 99.966, |
|
"eval_deepset_2_steps_per_second": 6.408, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_deepset_3_accuracy": 0.5578034682080925, |
|
"eval_deepset_3_loss": 1.4653511047363281, |
|
"eval_deepset_3_matthews_correlation": 0.2932617859503659, |
|
"eval_deepset_3_runtime": 3.0504, |
|
"eval_deepset_3_samples_per_second": 113.429, |
|
"eval_deepset_3_steps_per_second": 7.212, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_deepset_4_accuracy": 0.529126213592233, |
|
"eval_deepset_4_loss": 1.7414307594299316, |
|
"eval_deepset_4_matthews_correlation": 0.3054166536717615, |
|
"eval_deepset_4_runtime": 1.7235, |
|
"eval_deepset_4_samples_per_second": 119.522, |
|
"eval_deepset_4_steps_per_second": 7.543, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_anli_accuracy": 0.351, |
|
"eval_anli_loss": 1.1338268518447876, |
|
"eval_anli_matthews_correlation": 0.030305510765310642, |
|
"eval_anli_runtime": 5.1432, |
|
"eval_anli_samples_per_second": 194.432, |
|
"eval_anli_steps_per_second": 12.249, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.5284810126582275e-07, |
|
"loss": 0.6807, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.844936708860759e-07, |
|
"loss": 0.502, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.991503304270561e-07, |
|
"loss": 0.4006, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_osu_accuracy": 0.7024793388429752, |
|
"eval_osu_loss": 0.7255080938339233, |
|
"eval_osu_matthews_correlation": 0.4761839486992106, |
|
"eval_osu_runtime": 4.6909, |
|
"eval_osu_samples_per_second": 51.589, |
|
"eval_osu_steps_per_second": 3.411, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_deepset_1_accuracy": 0.6134453781512605, |
|
"eval_deepset_1_loss": 1.6959413290023804, |
|
"eval_deepset_1_matthews_correlation": 0.42105052198193255, |
|
"eval_deepset_1_runtime": 1.0672, |
|
"eval_deepset_1_samples_per_second": 111.505, |
|
"eval_deepset_1_steps_per_second": 7.496, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_deepset_2_accuracy": 0.7243589743589743, |
|
"eval_deepset_2_loss": 1.2153555154800415, |
|
"eval_deepset_2_matthews_correlation": 0.5466918371684817, |
|
"eval_deepset_2_runtime": 1.556, |
|
"eval_deepset_2_samples_per_second": 100.254, |
|
"eval_deepset_2_steps_per_second": 6.427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_deepset_3_accuracy": 0.6473988439306358, |
|
"eval_deepset_3_loss": 1.4247812032699585, |
|
"eval_deepset_3_matthews_correlation": 0.4017768477834457, |
|
"eval_deepset_3_runtime": 3.045, |
|
"eval_deepset_3_samples_per_second": 113.628, |
|
"eval_deepset_3_steps_per_second": 7.225, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_deepset_4_accuracy": 0.6504854368932039, |
|
"eval_deepset_4_loss": 1.7334672212600708, |
|
"eval_deepset_4_matthews_correlation": 0.4508615553005039, |
|
"eval_deepset_4_runtime": 1.7091, |
|
"eval_deepset_4_samples_per_second": 120.534, |
|
"eval_deepset_4_steps_per_second": 7.606, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_anli_accuracy": 0.477, |
|
"eval_anli_loss": 1.3187198638916016, |
|
"eval_anli_matthews_correlation": 0.218035345395415, |
|
"eval_anli_runtime": 5.1567, |
|
"eval_anli_samples_per_second": 193.922, |
|
"eval_anli_steps_per_second": 12.217, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9748431165657797e-07, |
|
"loss": 0.3456, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9581829288609986e-07, |
|
"loss": 0.3098, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_osu_accuracy": 0.6900826446280992, |
|
"eval_osu_loss": 0.794772207736969, |
|
"eval_osu_matthews_correlation": 0.4632054751499522, |
|
"eval_osu_runtime": 4.6759, |
|
"eval_osu_samples_per_second": 51.755, |
|
"eval_osu_steps_per_second": 3.422, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_deepset_1_accuracy": 0.5882352941176471, |
|
"eval_deepset_1_loss": 1.9199906587600708, |
|
"eval_deepset_1_matthews_correlation": 0.3787987593817238, |
|
"eval_deepset_1_runtime": 1.0581, |
|
"eval_deepset_1_samples_per_second": 112.466, |
|
"eval_deepset_1_steps_per_second": 7.561, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_deepset_2_accuracy": 0.6923076923076923, |
|
"eval_deepset_2_loss": 1.3611226081848145, |
|
"eval_deepset_2_matthews_correlation": 0.5007307553834116, |
|
"eval_deepset_2_runtime": 1.5603, |
|
"eval_deepset_2_samples_per_second": 99.978, |
|
"eval_deepset_2_steps_per_second": 6.409, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_deepset_3_accuracy": 0.630057803468208, |
|
"eval_deepset_3_loss": 1.5691180229187012, |
|
"eval_deepset_3_matthews_correlation": 0.38258183358832687, |
|
"eval_deepset_3_runtime": 3.052, |
|
"eval_deepset_3_samples_per_second": 113.37, |
|
"eval_deepset_3_steps_per_second": 7.208, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_deepset_4_accuracy": 0.5922330097087378, |
|
"eval_deepset_4_loss": 1.946365237236023, |
|
"eval_deepset_4_matthews_correlation": 0.36618144302865463, |
|
"eval_deepset_4_runtime": 1.7085, |
|
"eval_deepset_4_samples_per_second": 120.574, |
|
"eval_deepset_4_steps_per_second": 7.609, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_anli_accuracy": 0.577, |
|
"eval_anli_loss": 1.1495600938796997, |
|
"eval_anli_matthews_correlation": 0.36758926218287485, |
|
"eval_anli_runtime": 5.1325, |
|
"eval_anli_samples_per_second": 194.837, |
|
"eval_anli_steps_per_second": 12.275, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.941522741156217e-07, |
|
"loss": 0.2936, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9248625534514355e-07, |
|
"loss": 0.275, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9083689676237014e-07, |
|
"loss": 0.2711, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_osu_accuracy": 0.71900826446281, |
|
"eval_osu_loss": 0.8360257744789124, |
|
"eval_osu_matthews_correlation": 0.5055263088346037, |
|
"eval_osu_runtime": 4.6825, |
|
"eval_osu_samples_per_second": 51.682, |
|
"eval_osu_steps_per_second": 3.417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_deepset_1_accuracy": 0.6302521008403361, |
|
"eval_deepset_1_loss": 2.0135045051574707, |
|
"eval_deepset_1_matthews_correlation": 0.43404618683091417, |
|
"eval_deepset_1_runtime": 1.0652, |
|
"eval_deepset_1_samples_per_second": 111.718, |
|
"eval_deepset_1_steps_per_second": 7.51, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_deepset_2_accuracy": 0.7115384615384616, |
|
"eval_deepset_2_loss": 1.3997838497161865, |
|
"eval_deepset_2_matthews_correlation": 0.5105532533477752, |
|
"eval_deepset_2_runtime": 1.5611, |
|
"eval_deepset_2_samples_per_second": 99.928, |
|
"eval_deepset_2_steps_per_second": 6.406, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_deepset_3_accuracy": 0.6676300578034682, |
|
"eval_deepset_3_loss": 1.5432208776474, |
|
"eval_deepset_3_matthews_correlation": 0.42876096695199906, |
|
"eval_deepset_3_runtime": 3.0533, |
|
"eval_deepset_3_samples_per_second": 113.322, |
|
"eval_deepset_3_steps_per_second": 7.205, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_deepset_4_accuracy": 0.6359223300970874, |
|
"eval_deepset_4_loss": 1.9390015602111816, |
|
"eval_deepset_4_matthews_correlation": 0.4108775577859797, |
|
"eval_deepset_4_runtime": 1.7173, |
|
"eval_deepset_4_samples_per_second": 119.953, |
|
"eval_deepset_4_steps_per_second": 7.57, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_anli_accuracy": 0.609, |
|
"eval_anli_loss": 1.042230486869812, |
|
"eval_anli_matthews_correlation": 0.42100205280404024, |
|
"eval_anli_runtime": 5.1501, |
|
"eval_anli_samples_per_second": 194.17, |
|
"eval_anli_steps_per_second": 12.233, |
|
"step": 1500 |
|
} |
|
], |
|
"max_steps": 18955, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.042867740823347e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|