Training in progress, step 405, checkpoint
Browse files- last-checkpoint/2_Dense/model.safetensors +1 -1
- last-checkpoint/README.md +33 -20
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +278 -2
last-checkpoint/2_Dense/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3149984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f642c9d042a42b6fb5e4d89ed562ea76517d1cc374d93aa530166a44d526d80d
|
3 |
size 3149984
|
last-checkpoint/README.md
CHANGED
@@ -58,34 +58,34 @@ model-index:
|
|
58 |
type: sts_dev
|
59 |
metrics:
|
60 |
- type: pearson_cosine
|
61 |
-
value: 0.
|
62 |
name: Pearson Cosine
|
63 |
- type: spearman_cosine
|
64 |
-
value: 0.
|
65 |
name: Spearman Cosine
|
66 |
- type: pearson_euclidean
|
67 |
-
value: 0.
|
68 |
name: Pearson Euclidean
|
69 |
- type: spearman_euclidean
|
70 |
-
value: 0.
|
71 |
name: Spearman Euclidean
|
72 |
- type: pearson_manhattan
|
73 |
-
value: 0.
|
74 |
name: Pearson Manhattan
|
75 |
- type: spearman_manhattan
|
76 |
-
value: 0.
|
77 |
name: Spearman Manhattan
|
78 |
- type: pearson_dot
|
79 |
-
value: 0.
|
80 |
name: Pearson Dot
|
81 |
- type: spearman_dot
|
82 |
-
value: 0.
|
83 |
name: Spearman Dot
|
84 |
- type: pearson_max
|
85 |
-
value: 0.
|
86 |
name: Pearson Max
|
87 |
- type: spearman_max
|
88 |
-
value: 0.
|
89 |
name: Spearman Max
|
90 |
---
|
91 |
|
@@ -188,16 +188,16 @@ You can finetune this model on your own dataset.
|
|
188 |
|
189 |
| Metric | Value |
|
190 |
|:-------------------|:-----------|
|
191 |
-
| pearson_cosine | 0.
|
192 |
-
| spearman_cosine | 0.
|
193 |
-
| pearson_euclidean | 0.
|
194 |
-
| spearman_euclidean | 0.
|
195 |
-
| pearson_manhattan | 0.
|
196 |
-
| spearman_manhattan | 0.
|
197 |
-
| pearson_dot | 0.
|
198 |
-
| spearman_dot | 0.
|
199 |
-
| pearson_max | 0.
|
200 |
-
| **spearman_max** | **0.
|
201 |
|
202 |
<!--
|
203 |
## Bias, Risks and Limitations
|
@@ -469,6 +469,19 @@ You can finetune this model on your own dataset.
|
|
469 |
| 7.4903 | 330 | 0.4221 | 0.0312 | 0.7848 |
|
470 |
| 7.6017 | 335 | - | 0.0311 | 0.7854 |
|
471 |
| 7.7131 | 340 | 0.4268 | 0.0310 | 0.7857 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
|
473 |
|
474 |
### Framework Versions
|
|
|
58 |
type: sts_dev
|
59 |
metrics:
|
60 |
- type: pearson_cosine
|
61 |
+
value: 0.787184477170156
|
62 |
name: Pearson Cosine
|
63 |
- type: spearman_cosine
|
64 |
+
value: 0.78809909542145
|
65 |
name: Spearman Cosine
|
66 |
- type: pearson_euclidean
|
67 |
+
value: 0.7195444208609296
|
68 |
name: Pearson Euclidean
|
69 |
- type: spearman_euclidean
|
70 |
+
value: 0.7115938480269084
|
71 |
name: Spearman Euclidean
|
72 |
- type: pearson_manhattan
|
73 |
+
value: 0.7213936268781151
|
74 |
name: Pearson Manhattan
|
75 |
- type: spearman_manhattan
|
76 |
+
value: 0.7143300985487689
|
77 |
name: Spearman Manhattan
|
78 |
- type: pearson_dot
|
79 |
+
value: 0.7102603851217889
|
80 |
name: Pearson Dot
|
81 |
- type: spearman_dot
|
82 |
+
value: 0.7036724949513745
|
83 |
name: Spearman Dot
|
84 |
- type: pearson_max
|
85 |
+
value: 0.787184477170156
|
86 |
name: Pearson Max
|
87 |
- type: spearman_max
|
88 |
+
value: 0.78809909542145
|
89 |
name: Spearman Max
|
90 |
---
|
91 |
|
|
|
188 |
|
189 |
| Metric | Value |
|
190 |
|:-------------------|:-----------|
|
191 |
+
| pearson_cosine | 0.7872 |
|
192 |
+
| spearman_cosine | 0.7881 |
|
193 |
+
| pearson_euclidean | 0.7195 |
|
194 |
+
| spearman_euclidean | 0.7116 |
|
195 |
+
| pearson_manhattan | 0.7214 |
|
196 |
+
| spearman_manhattan | 0.7143 |
|
197 |
+
| pearson_dot | 0.7103 |
|
198 |
+
| spearman_dot | 0.7037 |
|
199 |
+
| pearson_max | 0.7872 |
|
200 |
+
| **spearman_max** | **0.7881** |
|
201 |
|
202 |
<!--
|
203 |
## Bias, Risks and Limitations
|
|
|
469 |
| 7.4903 | 330 | 0.4221 | 0.0312 | 0.7848 |
|
470 |
| 7.6017 | 335 | - | 0.0311 | 0.7854 |
|
471 |
| 7.7131 | 340 | 0.4268 | 0.0310 | 0.7857 |
|
472 |
+
| 7.8245 | 345 | - | 0.0309 | 0.7861 |
|
473 |
+
| 7.9359 | 350 | 0.4316 | 0.0309 | 0.7866 |
|
474 |
+
| 8.0669 | 355 | - | 0.0309 | 0.7872 |
|
475 |
+
| 8.1783 | 360 | 0.4277 | 0.0309 | 0.7873 |
|
476 |
+
| 8.2897 | 365 | - | 0.0308 | 0.7870 |
|
477 |
+
| 8.4011 | 370 | 0.3925 | 0.0308 | 0.7868 |
|
478 |
+
| 8.5125 | 375 | - | 0.0308 | 0.7866 |
|
479 |
+
| 8.6240 | 380 | 0.4049 | 0.0308 | 0.7869 |
|
480 |
+
| 8.7354 | 385 | - | 0.0308 | 0.7875 |
|
481 |
+
| 8.8468 | 390 | 0.3742 | 0.0308 | 0.7883 |
|
482 |
+
| 8.9582 | 395 | - | 0.0307 | 0.7885 |
|
483 |
+
| 9.0891 | 400 | 0.3498 | 0.0307 | 0.7886 |
|
484 |
+
| 9.2006 | 405 | - | 0.0307 | 0.7881 |
|
485 |
|
486 |
|
487 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 735216376
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f26b706c07e140e2edd57fafcc709e1a43ae165be88a326e339c41e3237937a
|
3 |
size 735216376
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1476823354
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3785556f501aad29fcbea152f12a1290505e2d5120ff9ef892fea6a83710c125
|
3 |
size 1476823354
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aed9b6bd6e6e1beacbd05484944e482a60b207824bbac7beedc13b893e316f6
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79119d62c189f0ce3be5549f3e7ca09f006763bebbe6fc4271cba9cc53baba97
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c92c32fe5af972bd0601b45d325cd53a4b35f93bd22d6abc2cedf42357de0876
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e7ac6dc7d789f396271db6e6f76a74abad025b3dd94a3a591312d8d14d22816
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14e06115d50f393b55f097c540247ea67d69dff792018e4dc25fcd26b97cd6a4
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97535fbca74da1d5f07193406191f57fcd05860183293eb61167310a17ef8004
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08ea52dce94ae2223b6b7b57c45f4cc100fd214b2dabf3e2391a9c5cfc032d9a
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08b0a80781bd7349dfd5c50e57ab7242d5086f72f741cf97b90a29e5a945d966
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0eac241248b44a91e36b8370560e3c209ccb1e7f6634c18a7bc72a219fac2ee
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1469,6 +1469,282 @@
|
|
1469 |
"eval_sts_dev_spearman_manhattan": 0.7101303213368534,
|
1470 |
"eval_sts_dev_spearman_max": 0.7857175803487115,
|
1471 |
"step": 340
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1472 |
}
|
1473 |
],
|
1474 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.200557103064067,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 405,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1469 |
"eval_sts_dev_spearman_manhattan": 0.7101303213368534,
|
1470 |
"eval_sts_dev_spearman_max": 0.7857175803487115,
|
1471 |
"step": 340
|
1472 |
+
},
|
1473 |
+
{
|
1474 |
+
"epoch": 7.8245125348189415,
|
1475 |
+
"eval_loss": 0.030945729464292526,
|
1476 |
+
"eval_runtime": 6.9722,
|
1477 |
+
"eval_samples_per_second": 215.14,
|
1478 |
+
"eval_steps_per_second": 26.964,
|
1479 |
+
"eval_sts_dev_pearson_cosine": 0.7852280992749574,
|
1480 |
+
"eval_sts_dev_pearson_dot": 0.7063015365766652,
|
1481 |
+
"eval_sts_dev_pearson_euclidean": 0.71618048050416,
|
1482 |
+
"eval_sts_dev_pearson_manhattan": 0.7181959951306995,
|
1483 |
+
"eval_sts_dev_pearson_max": 0.7852280992749574,
|
1484 |
+
"eval_sts_dev_spearman_cosine": 0.7861447827888495,
|
1485 |
+
"eval_sts_dev_spearman_dot": 0.7007253260607372,
|
1486 |
+
"eval_sts_dev_spearman_euclidean": 0.7080307843557273,
|
1487 |
+
"eval_sts_dev_spearman_manhattan": 0.710707788624518,
|
1488 |
+
"eval_sts_dev_spearman_max": 0.7861447827888495,
|
1489 |
+
"step": 345
|
1490 |
+
},
|
1491 |
+
{
|
1492 |
+
"epoch": 7.935933147632312,
|
1493 |
+
"grad_norm": 4.521576881408691,
|
1494 |
+
"learning_rate": 2.435206122803966e-06,
|
1495 |
+
"loss": 0.4316,
|
1496 |
+
"step": 350
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 7.935933147632312,
|
1500 |
+
"eval_loss": 0.030903467908501625,
|
1501 |
+
"eval_runtime": 6.8754,
|
1502 |
+
"eval_samples_per_second": 218.169,
|
1503 |
+
"eval_steps_per_second": 27.344,
|
1504 |
+
"eval_sts_dev_pearson_cosine": 0.7857408106817081,
|
1505 |
+
"eval_sts_dev_pearson_dot": 0.7063227803586387,
|
1506 |
+
"eval_sts_dev_pearson_euclidean": 0.7171064497768416,
|
1507 |
+
"eval_sts_dev_pearson_manhattan": 0.7190977579026478,
|
1508 |
+
"eval_sts_dev_pearson_max": 0.7857408106817081,
|
1509 |
+
"eval_sts_dev_spearman_cosine": 0.786647063435545,
|
1510 |
+
"eval_sts_dev_spearman_dot": 0.7004210617791904,
|
1511 |
+
"eval_sts_dev_spearman_euclidean": 0.7090060931384192,
|
1512 |
+
"eval_sts_dev_spearman_manhattan": 0.7117304388117395,
|
1513 |
+
"eval_sts_dev_spearman_max": 0.786647063435545,
|
1514 |
+
"step": 350
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 8.066852367688023,
|
1518 |
+
"eval_loss": 0.03090326115489006,
|
1519 |
+
"eval_runtime": 6.7967,
|
1520 |
+
"eval_samples_per_second": 220.696,
|
1521 |
+
"eval_steps_per_second": 27.661,
|
1522 |
+
"eval_sts_dev_pearson_cosine": 0.7860914327083659,
|
1523 |
+
"eval_sts_dev_pearson_dot": 0.7067109311815922,
|
1524 |
+
"eval_sts_dev_pearson_euclidean": 0.7179978723314155,
|
1525 |
+
"eval_sts_dev_pearson_manhattan": 0.7199506434198831,
|
1526 |
+
"eval_sts_dev_pearson_max": 0.7860914327083659,
|
1527 |
+
"eval_sts_dev_spearman_cosine": 0.7871799411716375,
|
1528 |
+
"eval_sts_dev_spearman_dot": 0.7005966817709771,
|
1529 |
+
"eval_sts_dev_spearman_euclidean": 0.7099849983444726,
|
1530 |
+
"eval_sts_dev_spearman_manhattan": 0.7126081974741519,
|
1531 |
+
"eval_sts_dev_spearman_max": 0.7871799411716375,
|
1532 |
+
"step": 355
|
1533 |
+
},
|
1534 |
+
{
|
1535 |
+
"epoch": 8.178272980501394,
|
1536 |
+
"grad_norm": 3.464381217956543,
|
1537 |
+
"learning_rate": 2.504783440598365e-06,
|
1538 |
+
"loss": 0.4277,
|
1539 |
+
"step": 360
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"epoch": 8.178272980501394,
|
1543 |
+
"eval_loss": 0.030861668288707733,
|
1544 |
+
"eval_runtime": 6.8952,
|
1545 |
+
"eval_samples_per_second": 217.544,
|
1546 |
+
"eval_steps_per_second": 27.265,
|
1547 |
+
"eval_sts_dev_pearson_cosine": 0.7862113365203784,
|
1548 |
+
"eval_sts_dev_pearson_dot": 0.7070142268847368,
|
1549 |
+
"eval_sts_dev_pearson_euclidean": 0.7181137478219999,
|
1550 |
+
"eval_sts_dev_pearson_manhattan": 0.7200573508948256,
|
1551 |
+
"eval_sts_dev_pearson_max": 0.7862113365203784,
|
1552 |
+
"eval_sts_dev_spearman_cosine": 0.7873051906331155,
|
1553 |
+
"eval_sts_dev_spearman_dot": 0.700851803333668,
|
1554 |
+
"eval_sts_dev_spearman_euclidean": 0.7101326235059475,
|
1555 |
+
"eval_sts_dev_spearman_manhattan": 0.7126791959108771,
|
1556 |
+
"eval_sts_dev_spearman_max": 0.7873051906331155,
|
1557 |
+
"step": 360
|
1558 |
+
},
|
1559 |
+
{
|
1560 |
+
"epoch": 8.289693593314762,
|
1561 |
+
"eval_loss": 0.03079277276992798,
|
1562 |
+
"eval_runtime": 7.0041,
|
1563 |
+
"eval_samples_per_second": 214.159,
|
1564 |
+
"eval_steps_per_second": 26.841,
|
1565 |
+
"eval_sts_dev_pearson_cosine": 0.7861051555153227,
|
1566 |
+
"eval_sts_dev_pearson_dot": 0.7077462081618229,
|
1567 |
+
"eval_sts_dev_pearson_euclidean": 0.7175047036545574,
|
1568 |
+
"eval_sts_dev_pearson_manhattan": 0.7194616943503004,
|
1569 |
+
"eval_sts_dev_pearson_max": 0.7861051555153227,
|
1570 |
+
"eval_sts_dev_spearman_cosine": 0.7869754283660466,
|
1571 |
+
"eval_sts_dev_spearman_dot": 0.7018953525077267,
|
1572 |
+
"eval_sts_dev_spearman_euclidean": 0.7093618435488815,
|
1573 |
+
"eval_sts_dev_spearman_manhattan": 0.7120432245619701,
|
1574 |
+
"eval_sts_dev_spearman_max": 0.7869754283660466,
|
1575 |
+
"step": 365
|
1576 |
+
},
|
1577 |
+
{
|
1578 |
+
"epoch": 8.401114206128133,
|
1579 |
+
"grad_norm": 3.629032850265503,
|
1580 |
+
"learning_rate": 2.5743607583927645e-06,
|
1581 |
+
"loss": 0.3925,
|
1582 |
+
"step": 370
|
1583 |
+
},
|
1584 |
+
{
|
1585 |
+
"epoch": 8.401114206128133,
|
1586 |
+
"eval_loss": 0.03077574074268341,
|
1587 |
+
"eval_runtime": 6.9569,
|
1588 |
+
"eval_samples_per_second": 215.613,
|
1589 |
+
"eval_steps_per_second": 27.024,
|
1590 |
+
"eval_sts_dev_pearson_cosine": 0.7860927703016911,
|
1591 |
+
"eval_sts_dev_pearson_dot": 0.7084805810982604,
|
1592 |
+
"eval_sts_dev_pearson_euclidean": 0.7171292733763057,
|
1593 |
+
"eval_sts_dev_pearson_manhattan": 0.7191008391698412,
|
1594 |
+
"eval_sts_dev_pearson_max": 0.7860927703016911,
|
1595 |
+
"eval_sts_dev_spearman_cosine": 0.7868465023058949,
|
1596 |
+
"eval_sts_dev_spearman_dot": 0.7026257860756843,
|
1597 |
+
"eval_sts_dev_spearman_euclidean": 0.7087433915922463,
|
1598 |
+
"eval_sts_dev_spearman_manhattan": 0.7115662090675204,
|
1599 |
+
"eval_sts_dev_spearman_max": 0.7868465023058949,
|
1600 |
+
"step": 370
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"epoch": 8.512534818941504,
|
1604 |
+
"eval_loss": 0.03077036887407303,
|
1605 |
+
"eval_runtime": 6.8481,
|
1606 |
+
"eval_samples_per_second": 219.038,
|
1607 |
+
"eval_steps_per_second": 27.453,
|
1608 |
+
"eval_sts_dev_pearson_cosine": 0.7860543259557101,
|
1609 |
+
"eval_sts_dev_pearson_dot": 0.7090029747286515,
|
1610 |
+
"eval_sts_dev_pearson_euclidean": 0.7168001987123229,
|
1611 |
+
"eval_sts_dev_pearson_manhattan": 0.7187912798445806,
|
1612 |
+
"eval_sts_dev_pearson_max": 0.7860543259557101,
|
1613 |
+
"eval_sts_dev_spearman_cosine": 0.786577121013552,
|
1614 |
+
"eval_sts_dev_spearman_dot": 0.7032207123703509,
|
1615 |
+
"eval_sts_dev_spearman_euclidean": 0.7083026579268292,
|
1616 |
+
"eval_sts_dev_spearman_manhattan": 0.7111138102646555,
|
1617 |
+
"eval_sts_dev_spearman_max": 0.786577121013552,
|
1618 |
+
"step": 375
|
1619 |
+
},
|
1620 |
+
{
|
1621 |
+
"epoch": 8.623955431754874,
|
1622 |
+
"grad_norm": 4.5424346923828125,
|
1623 |
+
"learning_rate": 2.643938076187163e-06,
|
1624 |
+
"loss": 0.4049,
|
1625 |
+
"step": 380
|
1626 |
+
},
|
1627 |
+
{
|
1628 |
+
"epoch": 8.623955431754874,
|
1629 |
+
"eval_loss": 0.030785972252488136,
|
1630 |
+
"eval_runtime": 6.9052,
|
1631 |
+
"eval_samples_per_second": 217.228,
|
1632 |
+
"eval_steps_per_second": 27.226,
|
1633 |
+
"eval_sts_dev_pearson_cosine": 0.786338341456081,
|
1634 |
+
"eval_sts_dev_pearson_dot": 0.7090251722360976,
|
1635 |
+
"eval_sts_dev_pearson_euclidean": 0.7176375494602096,
|
1636 |
+
"eval_sts_dev_pearson_manhattan": 0.7195903686388057,
|
1637 |
+
"eval_sts_dev_pearson_max": 0.786338341456081,
|
1638 |
+
"eval_sts_dev_spearman_cosine": 0.7869461186588641,
|
1639 |
+
"eval_sts_dev_spearman_dot": 0.7030353980707192,
|
1640 |
+
"eval_sts_dev_spearman_euclidean": 0.7093240329985625,
|
1641 |
+
"eval_sts_dev_spearman_manhattan": 0.7120013731894795,
|
1642 |
+
"eval_sts_dev_spearman_max": 0.7869461186588641,
|
1643 |
+
"step": 380
|
1644 |
+
},
|
1645 |
+
{
|
1646 |
+
"epoch": 8.735376044568245,
|
1647 |
+
"eval_loss": 0.03077947534620762,
|
1648 |
+
"eval_runtime": 6.94,
|
1649 |
+
"eval_samples_per_second": 216.137,
|
1650 |
+
"eval_steps_per_second": 27.089,
|
1651 |
+
"eval_sts_dev_pearson_cosine": 0.7867836664964302,
|
1652 |
+
"eval_sts_dev_pearson_dot": 0.7089649699768177,
|
1653 |
+
"eval_sts_dev_pearson_euclidean": 0.7185998785212442,
|
1654 |
+
"eval_sts_dev_pearson_manhattan": 0.7205256023581162,
|
1655 |
+
"eval_sts_dev_pearson_max": 0.7867836664964302,
|
1656 |
+
"eval_sts_dev_spearman_cosine": 0.7875195626790124,
|
1657 |
+
"eval_sts_dev_spearman_dot": 0.7028351666319841,
|
1658 |
+
"eval_sts_dev_spearman_euclidean": 0.7105482738364566,
|
1659 |
+
"eval_sts_dev_spearman_manhattan": 0.7132642042369475,
|
1660 |
+
"eval_sts_dev_spearman_max": 0.7875195626790124,
|
1661 |
+
"step": 385
|
1662 |
+
},
|
1663 |
+
{
|
1664 |
+
"epoch": 8.846796657381615,
|
1665 |
+
"grad_norm": 3.7269480228424072,
|
1666 |
+
"learning_rate": 2.7135153939815623e-06,
|
1667 |
+
"loss": 0.3742,
|
1668 |
+
"step": 390
|
1669 |
+
},
|
1670 |
+
{
|
1671 |
+
"epoch": 8.846796657381615,
|
1672 |
+
"eval_loss": 0.030757909640669823,
|
1673 |
+
"eval_runtime": 6.912,
|
1674 |
+
"eval_samples_per_second": 217.015,
|
1675 |
+
"eval_steps_per_second": 27.199,
|
1676 |
+
"eval_sts_dev_pearson_cosine": 0.7873307957198338,
|
1677 |
+
"eval_sts_dev_pearson_dot": 0.7087450117938812,
|
1678 |
+
"eval_sts_dev_pearson_euclidean": 0.7199394166229915,
|
1679 |
+
"eval_sts_dev_pearson_manhattan": 0.7218118008402783,
|
1680 |
+
"eval_sts_dev_pearson_max": 0.7873307957198338,
|
1681 |
+
"eval_sts_dev_spearman_cosine": 0.7883481466120934,
|
1682 |
+
"eval_sts_dev_spearman_dot": 0.702431533404311,
|
1683 |
+
"eval_sts_dev_spearman_euclidean": 0.7122286167501692,
|
1684 |
+
"eval_sts_dev_spearman_manhattan": 0.7149544811678771,
|
1685 |
+
"eval_sts_dev_spearman_max": 0.7883481466120934,
|
1686 |
+
"step": 390
|
1687 |
+
},
|
1688 |
+
{
|
1689 |
+
"epoch": 8.958217270194986,
|
1690 |
+
"eval_loss": 0.03074067085981369,
|
1691 |
+
"eval_runtime": 7.0786,
|
1692 |
+
"eval_samples_per_second": 211.905,
|
1693 |
+
"eval_steps_per_second": 26.559,
|
1694 |
+
"eval_sts_dev_pearson_cosine": 0.7875281932009626,
|
1695 |
+
"eval_sts_dev_pearson_dot": 0.7091183187974348,
|
1696 |
+
"eval_sts_dev_pearson_euclidean": 0.720306579358833,
|
1697 |
+
"eval_sts_dev_pearson_manhattan": 0.7221545912209083,
|
1698 |
+
"eval_sts_dev_pearson_max": 0.7875281932009626,
|
1699 |
+
"eval_sts_dev_spearman_cosine": 0.7884911216315376,
|
1700 |
+
"eval_sts_dev_spearman_dot": 0.7026504547905195,
|
1701 |
+
"eval_sts_dev_spearman_euclidean": 0.7125846397557779,
|
1702 |
+
"eval_sts_dev_spearman_manhattan": 0.7153917764693033,
|
1703 |
+
"eval_sts_dev_spearman_max": 0.7884911216315376,
|
1704 |
+
"step": 395
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 9.089136490250697,
|
1708 |
+
"grad_norm": 3.8048255443573,
|
1709 |
+
"learning_rate": 2.7830927117759614e-06,
|
1710 |
+
"loss": 0.3498,
|
1711 |
+
"step": 400
|
1712 |
+
},
|
1713 |
+
{
|
1714 |
+
"epoch": 9.089136490250697,
|
1715 |
+
"eval_loss": 0.03073756769299507,
|
1716 |
+
"eval_runtime": 7.1819,
|
1717 |
+
"eval_samples_per_second": 208.858,
|
1718 |
+
"eval_steps_per_second": 26.177,
|
1719 |
+
"eval_sts_dev_pearson_cosine": 0.7875285006609543,
|
1720 |
+
"eval_sts_dev_pearson_dot": 0.709718276464936,
|
1721 |
+
"eval_sts_dev_pearson_euclidean": 0.7202436438310591,
|
1722 |
+
"eval_sts_dev_pearson_manhattan": 0.7220766094080024,
|
1723 |
+
"eval_sts_dev_pearson_max": 0.7875285006609543,
|
1724 |
+
"eval_sts_dev_spearman_cosine": 0.7885939335328866,
|
1725 |
+
"eval_sts_dev_spearman_dot": 0.7032536436958657,
|
1726 |
+
"eval_sts_dev_spearman_euclidean": 0.7124855846354039,
|
1727 |
+
"eval_sts_dev_spearman_manhattan": 0.7153797502128406,
|
1728 |
+
"eval_sts_dev_spearman_max": 0.7885939335328866,
|
1729 |
+
"step": 400
|
1730 |
+
},
|
1731 |
+
{
|
1732 |
+
"epoch": 9.200557103064067,
|
1733 |
+
"eval_loss": 0.03071259893476963,
|
1734 |
+
"eval_runtime": 6.8201,
|
1735 |
+
"eval_samples_per_second": 219.938,
|
1736 |
+
"eval_steps_per_second": 27.566,
|
1737 |
+
"eval_sts_dev_pearson_cosine": 0.787184477170156,
|
1738 |
+
"eval_sts_dev_pearson_dot": 0.7102603851217889,
|
1739 |
+
"eval_sts_dev_pearson_euclidean": 0.7195444208609296,
|
1740 |
+
"eval_sts_dev_pearson_manhattan": 0.7213936268781151,
|
1741 |
+
"eval_sts_dev_pearson_max": 0.787184477170156,
|
1742 |
+
"eval_sts_dev_spearman_cosine": 0.78809909542145,
|
1743 |
+
"eval_sts_dev_spearman_dot": 0.7036724949513745,
|
1744 |
+
"eval_sts_dev_spearman_euclidean": 0.7115938480269084,
|
1745 |
+
"eval_sts_dev_spearman_manhattan": 0.7143300985487689,
|
1746 |
+
"eval_sts_dev_spearman_max": 0.78809909542145,
|
1747 |
+
"step": 405
|
1748 |
}
|
1749 |
],
|
1750 |
"logging_steps": 10,
|