|
{ |
|
"best_metric": 0.81132042094234, |
|
"best_model_checkpoint": "result/dfm-sentence-encoder-medium-v4", |
|
"epoch": 1.0, |
|
"global_step": 49345, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.89867261120681e-06, |
|
"loss": 0.551, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_avg_sts": 0.7288716035836427, |
|
"eval_sickr_spearman": 0.6917684187734319, |
|
"eval_stsb_spearman": 0.7659747883938535, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.797345222413618e-06, |
|
"loss": 0.4721, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_avg_sts": 0.7351199653659765, |
|
"eval_sickr_spearman": 0.6964072145192234, |
|
"eval_stsb_spearman": 0.7738327162127295, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.696017833620428e-06, |
|
"loss": 0.4607, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_avg_sts": 0.7423278666926523, |
|
"eval_sickr_spearman": 0.6978035266691308, |
|
"eval_stsb_spearman": 0.7868522067161736, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.594690444827237e-06, |
|
"loss": 0.4586, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_avg_sts": 0.7441749546870178, |
|
"eval_sickr_spearman": 0.6987989232153342, |
|
"eval_stsb_spearman": 0.7895509861587012, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.493363056034047e-06, |
|
"loss": 0.4627, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.7445984460802536, |
|
"eval_sickr_spearman": 0.69917351777545, |
|
"eval_stsb_spearman": 0.7900233743850572, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.392035667240856e-06, |
|
"loss": 0.4686, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.7510002108817091, |
|
"eval_sickr_spearman": 0.7075781920294968, |
|
"eval_stsb_spearman": 0.7944222297339215, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.290708278447666e-06, |
|
"loss": 0.4686, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_avg_sts": 0.7477270819237568, |
|
"eval_sickr_spearman": 0.7019111464641181, |
|
"eval_stsb_spearman": 0.7935430173833955, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.189380889654475e-06, |
|
"loss": 0.454, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_avg_sts": 0.7419612954851657, |
|
"eval_sickr_spearman": 0.6941440850816625, |
|
"eval_stsb_spearman": 0.7897785058886688, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.088053500861283e-06, |
|
"loss": 0.4649, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_avg_sts": 0.7418233576604656, |
|
"eval_sickr_spearman": 0.6867260657133575, |
|
"eval_stsb_spearman": 0.7969206496075738, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.986726112068093e-06, |
|
"loss": 0.467, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.7446135457895653, |
|
"eval_sickr_spearman": 0.6906753269669034, |
|
"eval_stsb_spearman": 0.7985517646122272, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.885398723274902e-06, |
|
"loss": 0.468, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_avg_sts": 0.7410856251082112, |
|
"eval_sickr_spearman": 0.6825309332515498, |
|
"eval_stsb_spearman": 0.7996403169648725, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.78407133448171e-06, |
|
"loss": 0.4614, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_avg_sts": 0.7509672954480082, |
|
"eval_sickr_spearman": 0.6987787501527303, |
|
"eval_stsb_spearman": 0.8031558407432862, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.68274394568852e-06, |
|
"loss": 0.4596, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_avg_sts": 0.7500520514398668, |
|
"eval_sickr_spearman": 0.6982613591280389, |
|
"eval_stsb_spearman": 0.8018427437516946, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.58141655689533e-06, |
|
"loss": 0.4494, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.741497727059482, |
|
"eval_sickr_spearman": 0.6884413524079153, |
|
"eval_stsb_spearman": 0.7945541017110487, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.480089168102139e-06, |
|
"loss": 0.4535, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_avg_sts": 0.7464036949270956, |
|
"eval_sickr_spearman": 0.696571288761736, |
|
"eval_stsb_spearman": 0.796236101092455, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.378761779308948e-06, |
|
"loss": 0.452, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_avg_sts": 0.7481634305513074, |
|
"eval_sickr_spearman": 0.6957231555725417, |
|
"eval_stsb_spearman": 0.8006037055300731, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.277434390515758e-06, |
|
"loss": 0.4627, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_avg_sts": 0.7469738693319797, |
|
"eval_sickr_spearman": 0.6922042529878814, |
|
"eval_stsb_spearman": 0.8017434856760781, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.176107001722566e-06, |
|
"loss": 0.4579, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.7445587366747802, |
|
"eval_sickr_spearman": 0.689870373737902, |
|
"eval_stsb_spearman": 0.7992470996116584, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.074779612929375e-06, |
|
"loss": 0.4631, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.7415088027050005, |
|
"eval_sickr_spearman": 0.6845037627120188, |
|
"eval_stsb_spearman": 0.7985138426979821, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.973452224136185e-06, |
|
"loss": 0.4584, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_avg_sts": 0.7471105530073405, |
|
"eval_sickr_spearman": 0.6933327917472712, |
|
"eval_stsb_spearman": 0.8008883142674098, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.872124835342994e-06, |
|
"loss": 0.4589, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_avg_sts": 0.745595984233087, |
|
"eval_sickr_spearman": 0.6923495470697317, |
|
"eval_stsb_spearman": 0.7988424213964425, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.770797446549802e-06, |
|
"loss": 0.4587, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_avg_sts": 0.7466024940300805, |
|
"eval_sickr_spearman": 0.692575533401998, |
|
"eval_stsb_spearman": 0.800629454658163, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.669470057756611e-06, |
|
"loss": 0.4661, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_avg_sts": 0.7517425175405497, |
|
"eval_sickr_spearman": 0.6975225927568193, |
|
"eval_stsb_spearman": 0.8059624423242799, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.568142668963422e-06, |
|
"loss": 0.4621, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.7477990028501873, |
|
"eval_sickr_spearman": 0.6881468256938967, |
|
"eval_stsb_spearman": 0.8074511800064779, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.466815280170231e-06, |
|
"loss": 0.4572, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_avg_sts": 0.7490645066698164, |
|
"eval_sickr_spearman": 0.690311203186901, |
|
"eval_stsb_spearman": 0.8078178101527318, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.36548789137704e-06, |
|
"loss": 0.4505, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_avg_sts": 0.7488632831459506, |
|
"eval_sickr_spearman": 0.6918468535620804, |
|
"eval_stsb_spearman": 0.8058797127298208, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.264160502583849e-06, |
|
"loss": 0.4587, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_avg_sts": 0.7492004417544827, |
|
"eval_sickr_spearman": 0.6910344555123561, |
|
"eval_stsb_spearman": 0.8073664279966094, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.162833113790658e-06, |
|
"loss": 0.4567, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_avg_sts": 0.7515448270625236, |
|
"eval_sickr_spearman": 0.6936582024595145, |
|
"eval_stsb_spearman": 0.8094314516655328, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.061505724997467e-06, |
|
"loss": 0.4543, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.7513387605617963, |
|
"eval_sickr_spearman": 0.6940377922541799, |
|
"eval_stsb_spearman": 0.8086397288694127, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.9601783362042765e-06, |
|
"loss": 0.4523, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_avg_sts": 0.7500897857354842, |
|
"eval_sickr_spearman": 0.6939183389049033, |
|
"eval_stsb_spearman": 0.8062612325660653, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.858850947411085e-06, |
|
"loss": 0.4564, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_avg_sts": 0.7472002180219787, |
|
"eval_sickr_spearman": 0.6905755663692164, |
|
"eval_stsb_spearman": 0.803824869674741, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.757523558617895e-06, |
|
"loss": 0.4646, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.7507948330729827, |
|
"eval_sickr_spearman": 0.6941705502185548, |
|
"eval_stsb_spearman": 0.8074191159274106, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.656196169824705e-06, |
|
"loss": 0.4492, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_avg_sts": 0.7529879565790001, |
|
"eval_sickr_spearman": 0.6971369030122712, |
|
"eval_stsb_spearman": 0.8088390101457289, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.554868781031514e-06, |
|
"loss": 0.4575, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.7518569042679815, |
|
"eval_sickr_spearman": 0.6972905545057717, |
|
"eval_stsb_spearman": 0.8064232540301913, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.453541392238322e-06, |
|
"loss": 0.4637, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_avg_sts": 0.7498519912345287, |
|
"eval_sickr_spearman": 0.6938069547806683, |
|
"eval_stsb_spearman": 0.8058970276883892, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.352214003445132e-06, |
|
"loss": 0.4583, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_avg_sts": 0.7516061405002821, |
|
"eval_sickr_spearman": 0.6938626708583365, |
|
"eval_stsb_spearman": 0.8093496101422275, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.250886614651941e-06, |
|
"loss": 0.4645, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.7534092039150044, |
|
"eval_sickr_spearman": 0.6986008429530034, |
|
"eval_stsb_spearman": 0.8082175648770055, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.14955922585875e-06, |
|
"loss": 0.4525, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_avg_sts": 0.7495273023794997, |
|
"eval_sickr_spearman": 0.6942551329881874, |
|
"eval_stsb_spearman": 0.8047994717708119, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.048231837065559e-06, |
|
"loss": 0.4616, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_avg_sts": 0.7498676062871128, |
|
"eval_sickr_spearman": 0.6946699295802069, |
|
"eval_stsb_spearman": 0.8050652829940186, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.946904448272368e-06, |
|
"loss": 0.4607, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_avg_sts": 0.7500055698075132, |
|
"eval_sickr_spearman": 0.6939670904728629, |
|
"eval_stsb_spearman": 0.8060440491421634, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.845577059479177e-06, |
|
"loss": 0.4548, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.7494506896871379, |
|
"eval_sickr_spearman": 0.6917624148857522, |
|
"eval_stsb_spearman": 0.8071389644885236, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.744249670685987e-06, |
|
"loss": 0.4596, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.7491829900995786, |
|
"eval_sickr_spearman": 0.6914101067567036, |
|
"eval_stsb_spearman": 0.8069558734424537, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.642922281892796e-06, |
|
"loss": 0.4484, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_avg_sts": 0.7496264233935017, |
|
"eval_sickr_spearman": 0.692096231040747, |
|
"eval_stsb_spearman": 0.8071566157462565, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.541594893099606e-06, |
|
"loss": 0.4604, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_avg_sts": 0.7475317779125041, |
|
"eval_sickr_spearman": 0.6897084608949542, |
|
"eval_stsb_spearman": 0.8053550949300541, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.440267504306414e-06, |
|
"loss": 0.4661, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.749782311685921, |
|
"eval_sickr_spearman": 0.6907351256881937, |
|
"eval_stsb_spearman": 0.8088294976836483, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.338940115513224e-06, |
|
"loss": 0.4629, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_avg_sts": 0.7493434059552446, |
|
"eval_sickr_spearman": 0.6895577873297428, |
|
"eval_stsb_spearman": 0.8091290245807464, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.2376127267200325e-06, |
|
"loss": 0.46, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_avg_sts": 0.7494797845962304, |
|
"eval_sickr_spearman": 0.6888344389420848, |
|
"eval_stsb_spearman": 0.810125130250376, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.136285337926842e-06, |
|
"loss": 0.457, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_avg_sts": 0.7481539489426248, |
|
"eval_sickr_spearman": 0.6876096938865144, |
|
"eval_stsb_spearman": 0.808698203998735, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.034957949133651e-06, |
|
"loss": 0.4591, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_avg_sts": 0.7487851537577938, |
|
"eval_sickr_spearman": 0.6880744428240295, |
|
"eval_stsb_spearman": 0.809495864691558, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.93363056034046e-06, |
|
"loss": 0.4595, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.749808924875282, |
|
"eval_sickr_spearman": 0.6898596147711799, |
|
"eval_stsb_spearman": 0.8097582349793842, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.83230317154727e-06, |
|
"loss": 0.4623, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_avg_sts": 0.7498742734395706, |
|
"eval_sickr_spearman": 0.6910954550111825, |
|
"eval_stsb_spearman": 0.8086530918679586, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.730975782754078e-06, |
|
"loss": 0.461, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_avg_sts": 0.74840720780684, |
|
"eval_sickr_spearman": 0.6880080158107407, |
|
"eval_stsb_spearman": 0.8088063998029394, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.629648393960888e-06, |
|
"loss": 0.4602, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_avg_sts": 0.7495470437599494, |
|
"eval_sickr_spearman": 0.6893540874285438, |
|
"eval_stsb_spearman": 0.8097400000913552, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5283210051676975e-06, |
|
"loss": 0.4494, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_avg_sts": 0.7498239634592918, |
|
"eval_sickr_spearman": 0.6894677290145463, |
|
"eval_stsb_spearman": 0.8101801979040372, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.426993616374506e-06, |
|
"loss": 0.4621, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.751903069869123, |
|
"eval_sickr_spearman": 0.692736149405207, |
|
"eval_stsb_spearman": 0.8110699903330391, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.325666227581316e-06, |
|
"loss": 0.4589, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_avg_sts": 0.7525324676455694, |
|
"eval_sickr_spearman": 0.6937445143487988, |
|
"eval_stsb_spearman": 0.81132042094234, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.224338838788124e-06, |
|
"loss": 0.4627, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.7538511183199736, |
|
"eval_sickr_spearman": 0.696754047102708, |
|
"eval_stsb_spearman": 0.8109481895372391, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.123011449994934e-06, |
|
"loss": 0.4564, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_avg_sts": 0.7515831116726858, |
|
"eval_sickr_spearman": 0.6939315954889002, |
|
"eval_stsb_spearman": 0.8092346278564714, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.021684061201743e-06, |
|
"loss": 0.455, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_avg_sts": 0.7533852057891757, |
|
"eval_sickr_spearman": 0.6955959692159336, |
|
"eval_stsb_spearman": 0.8111744423624179, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.920356672408552e-06, |
|
"loss": 0.4632, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_avg_sts": 0.7518159970200462, |
|
"eval_sickr_spearman": 0.6946737720683219, |
|
"eval_stsb_spearman": 0.8089582219717706, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.819029283615362e-06, |
|
"loss": 0.4611, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_avg_sts": 0.7533598766129962, |
|
"eval_sickr_spearman": 0.6974833513469443, |
|
"eval_stsb_spearman": 0.8092364018790481, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7177018948221703e-06, |
|
"loss": 0.4534, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_avg_sts": 0.7526633199575018, |
|
"eval_sickr_spearman": 0.6965692714554756, |
|
"eval_stsb_spearman": 0.808757368459528, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.6163745060289802e-06, |
|
"loss": 0.449, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_avg_sts": 0.7523877050131327, |
|
"eval_sickr_spearman": 0.6958016383922917, |
|
"eval_stsb_spearman": 0.8089737716339735, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.5150471172357893e-06, |
|
"loss": 0.4634, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_avg_sts": 0.7514477132225694, |
|
"eval_sickr_spearman": 0.6954868425534663, |
|
"eval_stsb_spearman": 0.8074085838916726, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.4137197284425984e-06, |
|
"loss": 0.4598, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_avg_sts": 0.7515196489317053, |
|
"eval_sickr_spearman": 0.697347375298773, |
|
"eval_stsb_spearman": 0.8056919225646377, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.312392339649407e-06, |
|
"loss": 0.4499, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.7531240484791469, |
|
"eval_sickr_spearman": 0.7002055620520509, |
|
"eval_stsb_spearman": 0.806042534906243, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.211064950856217e-06, |
|
"loss": 0.4618, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_avg_sts": 0.7543101399622849, |
|
"eval_sickr_spearman": 0.701310901789446, |
|
"eval_stsb_spearman": 0.8073093781351238, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.109737562063026e-06, |
|
"loss": 0.4622, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_avg_sts": 0.7535012273240942, |
|
"eval_sickr_spearman": 0.6987900374615681, |
|
"eval_stsb_spearman": 0.8082124171866202, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.0084101732698353e-06, |
|
"loss": 0.4654, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.7528923913364809, |
|
"eval_sickr_spearman": 0.698313232717592, |
|
"eval_stsb_spearman": 0.8074715499553696, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.907082784476644e-06, |
|
"loss": 0.4577, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_avg_sts": 0.7508251788488477, |
|
"eval_sickr_spearman": 0.6952676286065028, |
|
"eval_stsb_spearman": 0.8063827290911926, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.805755395683453e-06, |
|
"loss": 0.4506, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_avg_sts": 0.7509892106103371, |
|
"eval_sickr_spearman": 0.6953001456621763, |
|
"eval_stsb_spearman": 0.8066782755584977, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.704428006890263e-06, |
|
"loss": 0.4613, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_avg_sts": 0.7503441697836055, |
|
"eval_sickr_spearman": 0.6956849228157971, |
|
"eval_stsb_spearman": 0.8050034167514141, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.603100618097072e-06, |
|
"loss": 0.4639, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.7496733652176951, |
|
"eval_sickr_spearman": 0.6941632014600347, |
|
"eval_stsb_spearman": 0.8051835289753555, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.501773229303881e-06, |
|
"loss": 0.4637, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_avg_sts": 0.7492569767417572, |
|
"eval_sickr_spearman": 0.6940407221513675, |
|
"eval_stsb_spearman": 0.8044732313321468, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.4004458405106903e-06, |
|
"loss": 0.4523, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_avg_sts": 0.7480946141530285, |
|
"eval_sickr_spearman": 0.6926867254018273, |
|
"eval_stsb_spearman": 0.8035025029042295, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2991184517174994e-06, |
|
"loss": 0.4547, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_avg_sts": 0.7484090650166574, |
|
"eval_sickr_spearman": 0.6932337035850045, |
|
"eval_stsb_spearman": 0.8035844264483103, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.1977910629243085e-06, |
|
"loss": 0.4598, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_avg_sts": 0.7474650473627502, |
|
"eval_sickr_spearman": 0.6918610707681061, |
|
"eval_stsb_spearman": 0.8030690239573942, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.0964636741311176e-06, |
|
"loss": 0.4601, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_avg_sts": 0.7474876355051218, |
|
"eval_sickr_spearman": 0.6911482411916631, |
|
"eval_stsb_spearman": 0.8038270298185805, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.9951362853379267e-06, |
|
"loss": 0.4582, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.749122521812451, |
|
"eval_sickr_spearman": 0.6930461587670631, |
|
"eval_stsb_spearman": 0.8051988848578389, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8938088965447363e-06, |
|
"loss": 0.4589, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_avg_sts": 0.747485970375817, |
|
"eval_sickr_spearman": 0.6916881107718275, |
|
"eval_stsb_spearman": 0.8032838299798064, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7924815077515454e-06, |
|
"loss": 0.4494, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_avg_sts": 0.7484522252119146, |
|
"eval_sickr_spearman": 0.6921244733283927, |
|
"eval_stsb_spearman": 0.8047799770954366, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6911541189583547e-06, |
|
"loss": 0.4692, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.7489686533203876, |
|
"eval_sickr_spearman": 0.6921774036021774, |
|
"eval_stsb_spearman": 0.8057599030385979, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5898267301651638e-06, |
|
"loss": 0.4512, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_avg_sts": 0.7486704913862867, |
|
"eval_sickr_spearman": 0.6929249596649603, |
|
"eval_stsb_spearman": 0.8044160231076133, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4884993413719729e-06, |
|
"loss": 0.4569, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_avg_sts": 0.7481669304082241, |
|
"eval_sickr_spearman": 0.691963905356285, |
|
"eval_stsb_spearman": 0.8043699554601632, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.3871719525787822e-06, |
|
"loss": 0.4543, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.7481576956303677, |
|
"eval_sickr_spearman": 0.6925726515359116, |
|
"eval_stsb_spearman": 0.8037427397248238, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2858445637855913e-06, |
|
"loss": 0.4556, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_avg_sts": 0.7478866338195715, |
|
"eval_sickr_spearman": 0.6912525647439866, |
|
"eval_stsb_spearman": 0.8045207028951564, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1845171749924006e-06, |
|
"loss": 0.4593, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.7482308793293835, |
|
"eval_sickr_spearman": 0.6917689951466491, |
|
"eval_stsb_spearman": 0.8046927635121179, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.0831897861992097e-06, |
|
"loss": 0.4578, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_avg_sts": 0.7487411634362362, |
|
"eval_sickr_spearman": 0.6923790381660146, |
|
"eval_stsb_spearman": 0.8051032887064578, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.818623974060188e-07, |
|
"loss": 0.4617, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_avg_sts": 0.7495089794416554, |
|
"eval_sickr_spearman": 0.6935791912976488, |
|
"eval_stsb_spearman": 0.8054387675856621, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.80535008612828e-07, |
|
"loss": 0.4534, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_avg_sts": 0.7495111256181921, |
|
"eval_sickr_spearman": 0.6928939315734312, |
|
"eval_stsb_spearman": 0.806128319662953, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.792076198196374e-07, |
|
"loss": 0.4546, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_avg_sts": 0.748989011722548, |
|
"eval_sickr_spearman": 0.6924577611412717, |
|
"eval_stsb_spearman": 0.8055202623038241, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.778802310264466e-07, |
|
"loss": 0.4613, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.7489659458032893, |
|
"eval_sickr_spearman": 0.6927594444894045, |
|
"eval_stsb_spearman": 0.8051724471171742, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.765528422332558e-07, |
|
"loss": 0.4594, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_avg_sts": 0.7486440766299414, |
|
"eval_sickr_spearman": 0.6924414305667828, |
|
"eval_stsb_spearman": 0.8048467226930999, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.752254534400649e-07, |
|
"loss": 0.4584, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_avg_sts": 0.7490758878100736, |
|
"eval_sickr_spearman": 0.6929523854238814, |
|
"eval_stsb_spearman": 0.805199390196266, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.738980646468741e-07, |
|
"loss": 0.4496, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_avg_sts": 0.7489021602575776, |
|
"eval_sickr_spearman": 0.6924429195309273, |
|
"eval_stsb_spearman": 0.8053614009842279, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.725706758536833e-07, |
|
"loss": 0.4521, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_avg_sts": 0.7486292878627302, |
|
"eval_sickr_spearman": 0.6919387850902329, |
|
"eval_stsb_spearman": 0.8053197906352275, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7124328706049245e-07, |
|
"loss": 0.4618, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.7487302574375756, |
|
"eval_sickr_spearman": 0.6921317740558113, |
|
"eval_stsb_spearman": 0.80532874081934, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.991589826730166e-08, |
|
"loss": 0.4501, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_avg_sts": 0.748655762950658, |
|
"eval_sickr_spearman": 0.6920557408222348, |
|
"eval_stsb_spearman": 0.8052557850790812, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 49345, |
|
"train_runtime": 47384.4879, |
|
"train_samples_per_second": 1.041 |
|
} |
|
], |
|
"max_steps": 49345, |
|
"num_train_epochs": 1, |
|
"total_flos": 1976154204533723136, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|