CocoRoF commited on
Commit
bb182c7
·
verified ·
1 Parent(s): 2dec91b

Training in progress, step 405, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b870f7daac0f4a980d7a920673d51b73e853f58b3a99e04a4bfed0ab4836418
3
  size 3149984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f642c9d042a42b6fb5e4d89ed562ea76517d1cc374d93aa530166a44d526d80d
3
  size 3149984
last-checkpoint/README.md CHANGED
@@ -58,34 +58,34 @@ model-index:
58
  type: sts_dev
59
  metrics:
60
  - type: pearson_cosine
61
- value: 0.7848078944075182
62
  name: Pearson Cosine
63
  - type: spearman_cosine
64
- value: 0.7857175803487115
65
  name: Spearman Cosine
66
  - type: pearson_euclidean
67
- value: 0.7156669541008578
68
  name: Pearson Euclidean
69
  - type: spearman_euclidean
70
- value: 0.7074396606352066
71
  name: Spearman Euclidean
72
  - type: pearson_manhattan
73
- value: 0.7176849379592309
74
  name: Pearson Manhattan
75
  - type: spearman_manhattan
76
- value: 0.7101303213368534
77
  name: Spearman Manhattan
78
  - type: pearson_dot
79
- value: 0.7062611613987171
80
  name: Pearson Dot
81
  - type: spearman_dot
82
- value: 0.7006071388870717
83
  name: Spearman Dot
84
  - type: pearson_max
85
- value: 0.7848078944075182
86
  name: Pearson Max
87
  - type: spearman_max
88
- value: 0.7857175803487115
89
  name: Spearman Max
90
  ---
91
 
@@ -188,16 +188,16 @@ You can finetune this model on your own dataset.
188
 
189
  | Metric | Value |
190
  |:-------------------|:-----------|
191
- | pearson_cosine | 0.7848 |
192
- | spearman_cosine | 0.7857 |
193
- | pearson_euclidean | 0.7157 |
194
- | spearman_euclidean | 0.7074 |
195
- | pearson_manhattan | 0.7177 |
196
- | spearman_manhattan | 0.7101 |
197
- | pearson_dot | 0.7063 |
198
- | spearman_dot | 0.7006 |
199
- | pearson_max | 0.7848 |
200
- | **spearman_max** | **0.7857** |
201
 
202
  <!--
203
  ## Bias, Risks and Limitations
@@ -469,6 +469,19 @@ You can finetune this model on your own dataset.
469
  | 7.4903 | 330 | 0.4221 | 0.0312 | 0.7848 |
470
  | 7.6017 | 335 | - | 0.0311 | 0.7854 |
471
  | 7.7131 | 340 | 0.4268 | 0.0310 | 0.7857 |
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
 
474
  ### Framework Versions
 
58
  type: sts_dev
59
  metrics:
60
  - type: pearson_cosine
61
+ value: 0.787184477170156
62
  name: Pearson Cosine
63
  - type: spearman_cosine
64
+ value: 0.78809909542145
65
  name: Spearman Cosine
66
  - type: pearson_euclidean
67
+ value: 0.7195444208609296
68
  name: Pearson Euclidean
69
  - type: spearman_euclidean
70
+ value: 0.7115938480269084
71
  name: Spearman Euclidean
72
  - type: pearson_manhattan
73
+ value: 0.7213936268781151
74
  name: Pearson Manhattan
75
  - type: spearman_manhattan
76
+ value: 0.7143300985487689
77
  name: Spearman Manhattan
78
  - type: pearson_dot
79
+ value: 0.7102603851217889
80
  name: Pearson Dot
81
  - type: spearman_dot
82
+ value: 0.7036724949513745
83
  name: Spearman Dot
84
  - type: pearson_max
85
+ value: 0.787184477170156
86
  name: Pearson Max
87
  - type: spearman_max
88
+ value: 0.78809909542145
89
  name: Spearman Max
90
  ---
91
 
 
188
 
189
  | Metric | Value |
190
  |:-------------------|:-----------|
191
+ | pearson_cosine | 0.7872 |
192
+ | spearman_cosine | 0.7881 |
193
+ | pearson_euclidean | 0.7195 |
194
+ | spearman_euclidean | 0.7116 |
195
+ | pearson_manhattan | 0.7214 |
196
+ | spearman_manhattan | 0.7143 |
197
+ | pearson_dot | 0.7103 |
198
+ | spearman_dot | 0.7037 |
199
+ | pearson_max | 0.7872 |
200
+ | **spearman_max** | **0.7881** |
201
 
202
  <!--
203
  ## Bias, Risks and Limitations
 
469
  | 7.4903 | 330 | 0.4221 | 0.0312 | 0.7848 |
470
  | 7.6017 | 335 | - | 0.0311 | 0.7854 |
471
  | 7.7131 | 340 | 0.4268 | 0.0310 | 0.7857 |
472
+ | 7.8245 | 345 | - | 0.0309 | 0.7861 |
473
+ | 7.9359 | 350 | 0.4316 | 0.0309 | 0.7866 |
474
+ | 8.0669 | 355 | - | 0.0309 | 0.7872 |
475
+ | 8.1783 | 360 | 0.4277 | 0.0309 | 0.7873 |
476
+ | 8.2897 | 365 | - | 0.0308 | 0.7870 |
477
+ | 8.4011 | 370 | 0.3925 | 0.0308 | 0.7868 |
478
+ | 8.5125 | 375 | - | 0.0308 | 0.7866 |
479
+ | 8.6240 | 380 | 0.4049 | 0.0308 | 0.7869 |
480
+ | 8.7354 | 385 | - | 0.0308 | 0.7875 |
481
+ | 8.8468 | 390 | 0.3742 | 0.0308 | 0.7883 |
482
+ | 8.9582 | 395 | - | 0.0307 | 0.7885 |
483
+ | 9.0891 | 400 | 0.3498 | 0.0307 | 0.7886 |
484
+ | 9.2006 | 405 | - | 0.0307 | 0.7881 |
485
 
486
 
487
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d04fa800717f58ef5e6f0860853987a0cb36d6871103a505ec7f18f0c8d1e24
3
  size 735216376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f26b706c07e140e2edd57fafcc709e1a43ae165be88a326e339c41e3237937a
3
  size 735216376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc467df84c2a0e88611b2aab8643e37351c039e2c7f1103496ef14ec92c0c4ad
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3785556f501aad29fcbea152f12a1290505e2d5120ff9ef892fea6a83710c125
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3144a48358cd6bf04cc853f08e410043fb407f87bd6a0c68ca70ad834c31efe
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aed9b6bd6e6e1beacbd05484944e482a60b207824bbac7beedc13b893e316f6
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55077727665855a5b89991696b2904041ae70f3107b9b237a46fbba9a02e4ee3
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79119d62c189f0ce3be5549f3e7ca09f006763bebbe6fc4271cba9cc53baba97
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b76a5815810a65e67b40cd681f2067cb6b3b6c464e81e4400284c03451340b87
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c92c32fe5af972bd0601b45d325cd53a4b35f93bd22d6abc2cedf42357de0876
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8666828916f11f1cabe404ec73ef559b94d85c4b551426cb9e985c001d916437
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e7ac6dc7d789f396271db6e6f76a74abad025b3dd94a3a591312d8d14d22816
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d521e6afe5d08a185f41247427404b38548d3e87b68fe3d300f8b3dcc4a15fe
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e06115d50f393b55f097c540247ea67d69dff792018e4dc25fcd26b97cd6a4
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:626147651aa870bb47ab8b580c498252f78757aa80cc5932d6b3e11e2aef7d99
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97535fbca74da1d5f07193406191f57fcd05860183293eb61167310a17ef8004
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52b253b36f823ae06fbd7e190036c923117ff265d050024b030f5175e9b97961
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ea52dce94ae2223b6b7b57c45f4cc100fd214b2dabf3e2391a9c5cfc032d9a
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:132709e360606a72b2d2be0fe29824d3dfdafc54ed713803a6b6d2b2f7fff3a4
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b0a80781bd7349dfd5c50e57ab7242d5086f72f741cf97b90a29e5a945d966
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d5d490efe88b2712da4404d6da858c3fdbb2ab6c9bd797ff7799d7870cc26d9
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0eac241248b44a91e36b8370560e3c209ccb1e7f6634c18a7bc72a219fac2ee
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.713091922005571,
5
  "eval_steps": 5,
6
- "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1469,6 +1469,282 @@
1469
  "eval_sts_dev_spearman_manhattan": 0.7101303213368534,
1470
  "eval_sts_dev_spearman_max": 0.7857175803487115,
1471
  "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1472
  }
1473
  ],
1474
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.200557103064067,
5
  "eval_steps": 5,
6
+ "global_step": 405,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1469
  "eval_sts_dev_spearman_manhattan": 0.7101303213368534,
1470
  "eval_sts_dev_spearman_max": 0.7857175803487115,
1471
  "step": 340
1472
+ },
1473
+ {
1474
+ "epoch": 7.8245125348189415,
1475
+ "eval_loss": 0.030945729464292526,
1476
+ "eval_runtime": 6.9722,
1477
+ "eval_samples_per_second": 215.14,
1478
+ "eval_steps_per_second": 26.964,
1479
+ "eval_sts_dev_pearson_cosine": 0.7852280992749574,
1480
+ "eval_sts_dev_pearson_dot": 0.7063015365766652,
1481
+ "eval_sts_dev_pearson_euclidean": 0.71618048050416,
1482
+ "eval_sts_dev_pearson_manhattan": 0.7181959951306995,
1483
+ "eval_sts_dev_pearson_max": 0.7852280992749574,
1484
+ "eval_sts_dev_spearman_cosine": 0.7861447827888495,
1485
+ "eval_sts_dev_spearman_dot": 0.7007253260607372,
1486
+ "eval_sts_dev_spearman_euclidean": 0.7080307843557273,
1487
+ "eval_sts_dev_spearman_manhattan": 0.710707788624518,
1488
+ "eval_sts_dev_spearman_max": 0.7861447827888495,
1489
+ "step": 345
1490
+ },
1491
+ {
1492
+ "epoch": 7.935933147632312,
1493
+ "grad_norm": 4.521576881408691,
1494
+ "learning_rate": 2.435206122803966e-06,
1495
+ "loss": 0.4316,
1496
+ "step": 350
1497
+ },
1498
+ {
1499
+ "epoch": 7.935933147632312,
1500
+ "eval_loss": 0.030903467908501625,
1501
+ "eval_runtime": 6.8754,
1502
+ "eval_samples_per_second": 218.169,
1503
+ "eval_steps_per_second": 27.344,
1504
+ "eval_sts_dev_pearson_cosine": 0.7857408106817081,
1505
+ "eval_sts_dev_pearson_dot": 0.7063227803586387,
1506
+ "eval_sts_dev_pearson_euclidean": 0.7171064497768416,
1507
+ "eval_sts_dev_pearson_manhattan": 0.7190977579026478,
1508
+ "eval_sts_dev_pearson_max": 0.7857408106817081,
1509
+ "eval_sts_dev_spearman_cosine": 0.786647063435545,
1510
+ "eval_sts_dev_spearman_dot": 0.7004210617791904,
1511
+ "eval_sts_dev_spearman_euclidean": 0.7090060931384192,
1512
+ "eval_sts_dev_spearman_manhattan": 0.7117304388117395,
1513
+ "eval_sts_dev_spearman_max": 0.786647063435545,
1514
+ "step": 350
1515
+ },
1516
+ {
1517
+ "epoch": 8.066852367688023,
1518
+ "eval_loss": 0.03090326115489006,
1519
+ "eval_runtime": 6.7967,
1520
+ "eval_samples_per_second": 220.696,
1521
+ "eval_steps_per_second": 27.661,
1522
+ "eval_sts_dev_pearson_cosine": 0.7860914327083659,
1523
+ "eval_sts_dev_pearson_dot": 0.7067109311815922,
1524
+ "eval_sts_dev_pearson_euclidean": 0.7179978723314155,
1525
+ "eval_sts_dev_pearson_manhattan": 0.7199506434198831,
1526
+ "eval_sts_dev_pearson_max": 0.7860914327083659,
1527
+ "eval_sts_dev_spearman_cosine": 0.7871799411716375,
1528
+ "eval_sts_dev_spearman_dot": 0.7005966817709771,
1529
+ "eval_sts_dev_spearman_euclidean": 0.7099849983444726,
1530
+ "eval_sts_dev_spearman_manhattan": 0.7126081974741519,
1531
+ "eval_sts_dev_spearman_max": 0.7871799411716375,
1532
+ "step": 355
1533
+ },
1534
+ {
1535
+ "epoch": 8.178272980501394,
1536
+ "grad_norm": 3.464381217956543,
1537
+ "learning_rate": 2.504783440598365e-06,
1538
+ "loss": 0.4277,
1539
+ "step": 360
1540
+ },
1541
+ {
1542
+ "epoch": 8.178272980501394,
1543
+ "eval_loss": 0.030861668288707733,
1544
+ "eval_runtime": 6.8952,
1545
+ "eval_samples_per_second": 217.544,
1546
+ "eval_steps_per_second": 27.265,
1547
+ "eval_sts_dev_pearson_cosine": 0.7862113365203784,
1548
+ "eval_sts_dev_pearson_dot": 0.7070142268847368,
1549
+ "eval_sts_dev_pearson_euclidean": 0.7181137478219999,
1550
+ "eval_sts_dev_pearson_manhattan": 0.7200573508948256,
1551
+ "eval_sts_dev_pearson_max": 0.7862113365203784,
1552
+ "eval_sts_dev_spearman_cosine": 0.7873051906331155,
1553
+ "eval_sts_dev_spearman_dot": 0.700851803333668,
1554
+ "eval_sts_dev_spearman_euclidean": 0.7101326235059475,
1555
+ "eval_sts_dev_spearman_manhattan": 0.7126791959108771,
1556
+ "eval_sts_dev_spearman_max": 0.7873051906331155,
1557
+ "step": 360
1558
+ },
1559
+ {
1560
+ "epoch": 8.289693593314762,
1561
+ "eval_loss": 0.03079277276992798,
1562
+ "eval_runtime": 7.0041,
1563
+ "eval_samples_per_second": 214.159,
1564
+ "eval_steps_per_second": 26.841,
1565
+ "eval_sts_dev_pearson_cosine": 0.7861051555153227,
1566
+ "eval_sts_dev_pearson_dot": 0.7077462081618229,
1567
+ "eval_sts_dev_pearson_euclidean": 0.7175047036545574,
1568
+ "eval_sts_dev_pearson_manhattan": 0.7194616943503004,
1569
+ "eval_sts_dev_pearson_max": 0.7861051555153227,
1570
+ "eval_sts_dev_spearman_cosine": 0.7869754283660466,
1571
+ "eval_sts_dev_spearman_dot": 0.7018953525077267,
1572
+ "eval_sts_dev_spearman_euclidean": 0.7093618435488815,
1573
+ "eval_sts_dev_spearman_manhattan": 0.7120432245619701,
1574
+ "eval_sts_dev_spearman_max": 0.7869754283660466,
1575
+ "step": 365
1576
+ },
1577
+ {
1578
+ "epoch": 8.401114206128133,
1579
+ "grad_norm": 3.629032850265503,
1580
+ "learning_rate": 2.5743607583927645e-06,
1581
+ "loss": 0.3925,
1582
+ "step": 370
1583
+ },
1584
+ {
1585
+ "epoch": 8.401114206128133,
1586
+ "eval_loss": 0.03077574074268341,
1587
+ "eval_runtime": 6.9569,
1588
+ "eval_samples_per_second": 215.613,
1589
+ "eval_steps_per_second": 27.024,
1590
+ "eval_sts_dev_pearson_cosine": 0.7860927703016911,
1591
+ "eval_sts_dev_pearson_dot": 0.7084805810982604,
1592
+ "eval_sts_dev_pearson_euclidean": 0.7171292733763057,
1593
+ "eval_sts_dev_pearson_manhattan": 0.7191008391698412,
1594
+ "eval_sts_dev_pearson_max": 0.7860927703016911,
1595
+ "eval_sts_dev_spearman_cosine": 0.7868465023058949,
1596
+ "eval_sts_dev_spearman_dot": 0.7026257860756843,
1597
+ "eval_sts_dev_spearman_euclidean": 0.7087433915922463,
1598
+ "eval_sts_dev_spearman_manhattan": 0.7115662090675204,
1599
+ "eval_sts_dev_spearman_max": 0.7868465023058949,
1600
+ "step": 370
1601
+ },
1602
+ {
1603
+ "epoch": 8.512534818941504,
1604
+ "eval_loss": 0.03077036887407303,
1605
+ "eval_runtime": 6.8481,
1606
+ "eval_samples_per_second": 219.038,
1607
+ "eval_steps_per_second": 27.453,
1608
+ "eval_sts_dev_pearson_cosine": 0.7860543259557101,
1609
+ "eval_sts_dev_pearson_dot": 0.7090029747286515,
1610
+ "eval_sts_dev_pearson_euclidean": 0.7168001987123229,
1611
+ "eval_sts_dev_pearson_manhattan": 0.7187912798445806,
1612
+ "eval_sts_dev_pearson_max": 0.7860543259557101,
1613
+ "eval_sts_dev_spearman_cosine": 0.786577121013552,
1614
+ "eval_sts_dev_spearman_dot": 0.7032207123703509,
1615
+ "eval_sts_dev_spearman_euclidean": 0.7083026579268292,
1616
+ "eval_sts_dev_spearman_manhattan": 0.7111138102646555,
1617
+ "eval_sts_dev_spearman_max": 0.786577121013552,
1618
+ "step": 375
1619
+ },
1620
+ {
1621
+ "epoch": 8.623955431754874,
1622
+ "grad_norm": 4.5424346923828125,
1623
+ "learning_rate": 2.643938076187163e-06,
1624
+ "loss": 0.4049,
1625
+ "step": 380
1626
+ },
1627
+ {
1628
+ "epoch": 8.623955431754874,
1629
+ "eval_loss": 0.030785972252488136,
1630
+ "eval_runtime": 6.9052,
1631
+ "eval_samples_per_second": 217.228,
1632
+ "eval_steps_per_second": 27.226,
1633
+ "eval_sts_dev_pearson_cosine": 0.786338341456081,
1634
+ "eval_sts_dev_pearson_dot": 0.7090251722360976,
1635
+ "eval_sts_dev_pearson_euclidean": 0.7176375494602096,
1636
+ "eval_sts_dev_pearson_manhattan": 0.7195903686388057,
1637
+ "eval_sts_dev_pearson_max": 0.786338341456081,
1638
+ "eval_sts_dev_spearman_cosine": 0.7869461186588641,
1639
+ "eval_sts_dev_spearman_dot": 0.7030353980707192,
1640
+ "eval_sts_dev_spearman_euclidean": 0.7093240329985625,
1641
+ "eval_sts_dev_spearman_manhattan": 0.7120013731894795,
1642
+ "eval_sts_dev_spearman_max": 0.7869461186588641,
1643
+ "step": 380
1644
+ },
1645
+ {
1646
+ "epoch": 8.735376044568245,
1647
+ "eval_loss": 0.03077947534620762,
1648
+ "eval_runtime": 6.94,
1649
+ "eval_samples_per_second": 216.137,
1650
+ "eval_steps_per_second": 27.089,
1651
+ "eval_sts_dev_pearson_cosine": 0.7867836664964302,
1652
+ "eval_sts_dev_pearson_dot": 0.7089649699768177,
1653
+ "eval_sts_dev_pearson_euclidean": 0.7185998785212442,
1654
+ "eval_sts_dev_pearson_manhattan": 0.7205256023581162,
1655
+ "eval_sts_dev_pearson_max": 0.7867836664964302,
1656
+ "eval_sts_dev_spearman_cosine": 0.7875195626790124,
1657
+ "eval_sts_dev_spearman_dot": 0.7028351666319841,
1658
+ "eval_sts_dev_spearman_euclidean": 0.7105482738364566,
1659
+ "eval_sts_dev_spearman_manhattan": 0.7132642042369475,
1660
+ "eval_sts_dev_spearman_max": 0.7875195626790124,
1661
+ "step": 385
1662
+ },
1663
+ {
1664
+ "epoch": 8.846796657381615,
1665
+ "grad_norm": 3.7269480228424072,
1666
+ "learning_rate": 2.7135153939815623e-06,
1667
+ "loss": 0.3742,
1668
+ "step": 390
1669
+ },
1670
+ {
1671
+ "epoch": 8.846796657381615,
1672
+ "eval_loss": 0.030757909640669823,
1673
+ "eval_runtime": 6.912,
1674
+ "eval_samples_per_second": 217.015,
1675
+ "eval_steps_per_second": 27.199,
1676
+ "eval_sts_dev_pearson_cosine": 0.7873307957198338,
1677
+ "eval_sts_dev_pearson_dot": 0.7087450117938812,
1678
+ "eval_sts_dev_pearson_euclidean": 0.7199394166229915,
1679
+ "eval_sts_dev_pearson_manhattan": 0.7218118008402783,
1680
+ "eval_sts_dev_pearson_max": 0.7873307957198338,
1681
+ "eval_sts_dev_spearman_cosine": 0.7883481466120934,
1682
+ "eval_sts_dev_spearman_dot": 0.702431533404311,
1683
+ "eval_sts_dev_spearman_euclidean": 0.7122286167501692,
1684
+ "eval_sts_dev_spearman_manhattan": 0.7149544811678771,
1685
+ "eval_sts_dev_spearman_max": 0.7883481466120934,
1686
+ "step": 390
1687
+ },
1688
+ {
1689
+ "epoch": 8.958217270194986,
1690
+ "eval_loss": 0.03074067085981369,
1691
+ "eval_runtime": 7.0786,
1692
+ "eval_samples_per_second": 211.905,
1693
+ "eval_steps_per_second": 26.559,
1694
+ "eval_sts_dev_pearson_cosine": 0.7875281932009626,
1695
+ "eval_sts_dev_pearson_dot": 0.7091183187974348,
1696
+ "eval_sts_dev_pearson_euclidean": 0.720306579358833,
1697
+ "eval_sts_dev_pearson_manhattan": 0.7221545912209083,
1698
+ "eval_sts_dev_pearson_max": 0.7875281932009626,
1699
+ "eval_sts_dev_spearman_cosine": 0.7884911216315376,
1700
+ "eval_sts_dev_spearman_dot": 0.7026504547905195,
1701
+ "eval_sts_dev_spearman_euclidean": 0.7125846397557779,
1702
+ "eval_sts_dev_spearman_manhattan": 0.7153917764693033,
1703
+ "eval_sts_dev_spearman_max": 0.7884911216315376,
1704
+ "step": 395
1705
+ },
1706
+ {
1707
+ "epoch": 9.089136490250697,
1708
+ "grad_norm": 3.8048255443573,
1709
+ "learning_rate": 2.7830927117759614e-06,
1710
+ "loss": 0.3498,
1711
+ "step": 400
1712
+ },
1713
+ {
1714
+ "epoch": 9.089136490250697,
1715
+ "eval_loss": 0.03073756769299507,
1716
+ "eval_runtime": 7.1819,
1717
+ "eval_samples_per_second": 208.858,
1718
+ "eval_steps_per_second": 26.177,
1719
+ "eval_sts_dev_pearson_cosine": 0.7875285006609543,
1720
+ "eval_sts_dev_pearson_dot": 0.709718276464936,
1721
+ "eval_sts_dev_pearson_euclidean": 0.7202436438310591,
1722
+ "eval_sts_dev_pearson_manhattan": 0.7220766094080024,
1723
+ "eval_sts_dev_pearson_max": 0.7875285006609543,
1724
+ "eval_sts_dev_spearman_cosine": 0.7885939335328866,
1725
+ "eval_sts_dev_spearman_dot": 0.7032536436958657,
1726
+ "eval_sts_dev_spearman_euclidean": 0.7124855846354039,
1727
+ "eval_sts_dev_spearman_manhattan": 0.7153797502128406,
1728
+ "eval_sts_dev_spearman_max": 0.7885939335328866,
1729
+ "step": 400
1730
+ },
1731
+ {
1732
+ "epoch": 9.200557103064067,
1733
+ "eval_loss": 0.03071259893476963,
1734
+ "eval_runtime": 6.8201,
1735
+ "eval_samples_per_second": 219.938,
1736
+ "eval_steps_per_second": 27.566,
1737
+ "eval_sts_dev_pearson_cosine": 0.787184477170156,
1738
+ "eval_sts_dev_pearson_dot": 0.7102603851217889,
1739
+ "eval_sts_dev_pearson_euclidean": 0.7195444208609296,
1740
+ "eval_sts_dev_pearson_manhattan": 0.7213936268781151,
1741
+ "eval_sts_dev_pearson_max": 0.787184477170156,
1742
+ "eval_sts_dev_spearman_cosine": 0.78809909542145,
1743
+ "eval_sts_dev_spearman_dot": 0.7036724949513745,
1744
+ "eval_sts_dev_spearman_euclidean": 0.7115938480269084,
1745
+ "eval_sts_dev_spearman_manhattan": 0.7143300985487689,
1746
+ "eval_sts_dev_spearman_max": 0.78809909542145,
1747
+ "step": 405
1748
  }
1749
  ],
1750
  "logging_steps": 10,