hotchpotch
commited on
Commit
•
43226e0
1
Parent(s):
9bd0c38
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +133 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- jmteb_config/jmteb.jsonnet +22 -0
- jmteb_config/tasks/amazon_counterfactual_classification.jsonnet +32 -0
- jmteb_config/tasks/amazon_review_classification.jsonnet +32 -0
- jmteb_config/tasks/esci.jsonnet +33 -0
- jmteb_config/tasks/jagovfaqs_22k.jsonnet +33 -0
- jmteb_config/tasks/jaqket.jsonnet +33 -0
- jmteb_config/tasks/jsick.jsonnet +25 -0
- jmteb_config/tasks/jsts.jsonnet +25 -0
- jmteb_config/tasks/livedoor_news.jsonnet +24 -0
- jmteb_config/tasks/massive_intent_classification.jsonnet +32 -0
- jmteb_config/tasks/massive_scenario_classification.jsonnet +32 -0
- jmteb_config/tasks/mewsc16.jsonnet +24 -0
- jmteb_config/tasks/mrtydi.jsonnet +33 -0
- jmteb_config/tasks/nlp_journal_abs_intro.jsonnet +33 -0
- jmteb_config/tasks/nlp_journal_title_abs.jsonnet +33 -0
- jmteb_config/tasks/nlp_journal_title_intro.jsonnet +33 -0
- jmteb_config/tasks/paws_x_ja.jsonnet +25 -0
- model.safetensors +3 -0
- modules.json +14 -0
- result/Classification/scores_amazon_counterfactual_classification.json +23 -0
- result/Classification/scores_amazon_review_classification.json +23 -0
- result/Classification/scores_massive_intent_classification.json +23 -0
- result/Classification/scores_massive_scenario_classification.json +23 -0
- result/Clustering/scores_livedoor_news.json +36 -0
- result/Clustering/scores_mewsc16.json +36 -0
- result/PairClassification/scores_paws_x_ja.json +41 -0
- result/Reranking/scores_esci.json +31 -0
- result/Retrieval/scores_jagovfaqs_22k.json +43 -0
- result/Retrieval/scores_jaqket.json +43 -0
- result/Retrieval/scores_mrtydi.json +43 -0
- result/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
- result/Retrieval/scores_nlp_journal_title_abs.json +43 -0
- result/Retrieval/scores_nlp_journal_title_intro.json +43 -0
- result/STS/scores_jsick.json +31 -0
- result/STS/scores_jsts.json +31 -0
- result/summary.json +62 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +3 -0
- tokenizer_config.json +63 -0
- vocab.txt +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- ja
|
4 |
+
base_model: cl-nagoya/ruri-pt-base
|
5 |
+
tags:
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
license: apache-2.0
|
9 |
+
datasets:
|
10 |
+
- cl-nagoya/ruri-dataset-ft
|
11 |
+
pipeline_tag: sentence-similarity
|
12 |
+
---
|
13 |
+
|
14 |
+
# Ruri: Japanese General Text Embeddings
|
15 |
+
|
16 |
+
|
17 |
+
## Usage
|
18 |
+
|
19 |
+
### Direct Usage (Sentence Transformers)
|
20 |
+
|
21 |
+
First install the Sentence Transformers library:
|
22 |
+
|
23 |
+
```bash
|
24 |
+
pip install -U sentence-transformers fugashi sentencepiece unidic-lite
|
25 |
+
```
|
26 |
+
|
27 |
+
Then you can load this model and run inference.
|
28 |
+
```python
|
29 |
+
import torch.nn.functional as F
|
30 |
+
from sentence_transformers import SentenceTransformer
|
31 |
+
|
32 |
+
# Download from the 🤗 Hub
|
33 |
+
model = SentenceTransformer("cl-nagoya/ruri-base")
|
34 |
+
|
35 |
+
# Don't forget to add the prefix "クエリ: " for query-side or "文章: " for passage-side texts.
|
36 |
+
sentences = [
|
37 |
+
"クエリ: 瑠璃色はどんな色?",
|
38 |
+
"文章: 瑠璃色(るりいろ)は、紫みを帯びた濃い青。名は、半貴石の瑠璃(ラピスラズリ、英: lapis lazuli)による。JIS慣用色名では「こい紫みの青」(略号 dp-pB)と定義している[1][2]。",
|
39 |
+
"クエリ: ワシやタカのように、鋭いくちばしと爪を持った大型の鳥類を総称して「何類」というでしょう?",
|
40 |
+
"文章: ワシ、タカ、ハゲワシ、ハヤブサ、コンドル、フクロウが代表的である。これらの猛禽類はリンネ前後の時代(17~18世紀)には鷲類・鷹類・隼類及び梟類に分類された。ちなみにリンネは狩りをする鳥を単一の目(もく)にまとめ、vultur(コンドル、ハゲワシ)、falco(ワシ、タカ、ハヤブサなど)、strix(フクロウ)、lanius(モズ)の4属を含めている。",
|
41 |
+
]
|
42 |
+
|
43 |
+
embeddings = model.encode(sentences, convert_to_tensor=True)
|
44 |
+
print(embeddings.size())
|
45 |
+
# [4, 768]
|
46 |
+
|
47 |
+
similarities = F.cosine_similarity(embeddings.unsqueeze(0), embeddings.unsqueeze(1), dim=2)
|
48 |
+
print(similarities)
|
49 |
+
# [[1.0000, 0.9421, 0.6844, 0.7167],
|
50 |
+
# [0.9421, 1.0000, 0.6626, 0.6863],
|
51 |
+
# [0.6844, 0.6626, 1.0000, 0.8785],
|
52 |
+
# [0.7167, 0.6863, 0.8785, 1.0000]]
|
53 |
+
```
|
54 |
+
|
55 |
+
## Benchmarks
|
56 |
+
|
57 |
+
### JMTEB
|
58 |
+
Evaluated with [JMTEB](https://github.com/sbintuitions/JMTEB).
|
59 |
+
|
60 |
+
|Model|#Param.|Avg.|Retrieval|STS|Classfification|Reranking|Clustering|PairClassification|
|
61 |
+
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
62 |
+
|[cl-nagoya/sup-simcse-ja-base](https://huggingface.co/cl-nagoya/sup-simcse-ja-base)|111M|68.56|49.64|82.05|73.47|91.83|51.79|62.57|
|
63 |
+
|[cl-nagoya/sup-simcse-ja-large](https://huggingface.co/cl-nagoya/sup-simcse-ja-large)|337M|66.51|37.62|83.18|73.73|91.48|50.56|62.51|
|
64 |
+
|[cl-nagoya/unsup-simcse-ja-base](https://huggingface.co/cl-nagoya/unsup-simcse-ja-base)|111M|65.07|40.23|78.72|73.07|91.16|44.77|62.44|
|
65 |
+
|[cl-nagoya/unsup-simcse-ja-large](https://huggingface.co/cl-nagoya/unsup-simcse-ja-large)|337M|66.27|40.53|80.56|74.66|90.95|48.41|62.49|
|
66 |
+
|[pkshatech/GLuCoSE-base-ja](https://huggingface.co/pkshatech/GLuCoSE-base-ja)|133M|70.44|59.02|78.71|76.82|91.90|49.78|66.39|
|
67 |
+
||||||||||
|
68 |
+
|[sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE)|472M|64.70|40.12|76.56|72.66|91.63|44.88|62.33|
|
69 |
+
|[intfloat/multilingual-e5-small](https://huggingface.co/intfloat/multilingual-e5-small)|118M|69.52|67.27|80.07|67.62|93.03|46.91|62.19|
|
70 |
+
|[intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base)|278M|70.12|68.21|79.84|69.30|92.85|48.26|62.26|
|
71 |
+
|[intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large)|560M|71.65|70.98|79.70|72.89|92.96|51.24|62.15|
|
72 |
+
||||||||||
|
73 |
+
|OpenAI/text-embedding-ada-002|-|69.48|64.38|79.02|69.75|93.04|48.30|62.40|
|
74 |
+
|OpenAI/text-embedding-3-small|-|70.86|66.39|79.46|73.06|92.92|51.06|62.27|
|
75 |
+
|OpenAI/text-embedding-3-large|-|73.97|74.48|82.52|77.58|93.58|53.32|62.35|
|
76 |
+
||||||||||
|
77 |
+
|[Ruri-Small](https://huggingface.co/cl-nagoya/ruri-small)|68M|71.53|69.41|82.79|76.22|93.00|51.19|62.11|
|
78 |
+
|[**Ruri-Base**](https://huggingface.co/cl-nagoya/ruri-base) (this model)|111M|71.91|69.82|82.87|75.58|92.91|54.16|62.38|
|
79 |
+
|[Ruri-Large](https://huggingface.co/cl-nagoya/ruri-large)|337M|73.31|73.02|83.13|77.43|92.99|51.82|62.29|
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
## Model Details
|
86 |
+
|
87 |
+
### Model Description
|
88 |
+
- **Model Type:** Sentence Transformer
|
89 |
+
- **Base model:** [cl-nagoya/ruri-pt-base](https://huggingface.co/cl-nagoya/ruri-pt-base)
|
90 |
+
- **Maximum Sequence Length:** 512 tokens
|
91 |
+
- **Output Dimensionality:** 768
|
92 |
+
- **Similarity Function:** Cosine Similarity
|
93 |
+
- **Language:** Japanese
|
94 |
+
- **License:** Apache 2.0
|
95 |
+
- **Paper:** https://arxiv.org/abs/2409.07737
|
96 |
+
<!-- - **Training Dataset:** Unknown -->
|
97 |
+
|
98 |
+
|
99 |
+
### Full Model Architecture
|
100 |
+
|
101 |
+
```
|
102 |
+
SentenceTransformer(
|
103 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
104 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
105 |
+
)
|
106 |
+
```
|
107 |
+
|
108 |
+
### Framework Versions
|
109 |
+
- Python: 3.10.13
|
110 |
+
- Sentence Transformers: 3.0.0
|
111 |
+
- Transformers: 4.41.2
|
112 |
+
- PyTorch: 2.3.1+cu118
|
113 |
+
- Accelerate: 0.30.1
|
114 |
+
- Datasets: 2.19.1
|
115 |
+
- Tokenizers: 0.19.1
|
116 |
+
|
117 |
+
## Citation
|
118 |
+
|
119 |
+
```bibtex
|
120 |
+
@misc{
|
121 |
+
Ruri,
|
122 |
+
title={{Ruri: Japanese General Text Embeddings}},
|
123 |
+
author={Hayato Tsukagoshi and Ryohei Sasano},
|
124 |
+
year={2024},
|
125 |
+
eprint={2409.07737},
|
126 |
+
archivePrefix={arXiv},
|
127 |
+
primaryClass={cs.CL},
|
128 |
+
url={https://arxiv.org/abs/2409.07737},
|
129 |
+
}
|
130 |
+
```
|
131 |
+
|
132 |
+
## License
|
133 |
+
This model is published under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0).
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "cl-nagoya/ruri-base-pt",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "bfloat16",
|
21 |
+
"transformers_version": "4.41.2",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 32768
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.0",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.3.1+cu118"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
jmteb_config/jmteb.jsonnet
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Classification
|
2 |
+
(import './tasks/amazon_review_classification.jsonnet') +
|
3 |
+
(import './tasks/amazon_counterfactual_classification.jsonnet') +
|
4 |
+
(import './tasks/massive_intent_classification.jsonnet') +
|
5 |
+
(import './tasks/massive_scenario_classification.jsonnet') +
|
6 |
+
// Clustering
|
7 |
+
(import './tasks/livedoor_news.jsonnet') +
|
8 |
+
(import './tasks/mewsc16.jsonnet') +
|
9 |
+
// STS
|
10 |
+
(import './tasks/jsts.jsonnet') +
|
11 |
+
(import './tasks/jsick.jsonnet') +
|
12 |
+
// Pair Classification
|
13 |
+
(import './tasks/paws_x_ja.jsonnet') +
|
14 |
+
// Retrieval
|
15 |
+
(import './tasks/jagovfaqs_22k.jsonnet') +
|
16 |
+
(import './tasks/mrtydi.jsonnet') +
|
17 |
+
(import './tasks/jaqket.jsonnet') +
|
18 |
+
(import './tasks/nlp_journal_title_abs.jsonnet') +
|
19 |
+
(import './tasks/nlp_journal_title_intro.jsonnet') +
|
20 |
+
(import './tasks/nlp_journal_abs_intro.jsonnet') +
|
21 |
+
// Reranking
|
22 |
+
(import './tasks/esci.jsonnet')
|
jmteb_config/tasks/amazon_counterfactual_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
amazon_counterfactual_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
prefix: 'クエリ: ',
|
6 |
+
train_dataset: {
|
7 |
+
class_path: 'HfClassificationDataset',
|
8 |
+
init_args: {
|
9 |
+
path: 'sbintuitions/JMTEB',
|
10 |
+
split: 'train',
|
11 |
+
name: 'amazon_counterfactual_classification',
|
12 |
+
},
|
13 |
+
},
|
14 |
+
val_dataset: {
|
15 |
+
class_path: 'HfClassificationDataset',
|
16 |
+
init_args: {
|
17 |
+
path: 'sbintuitions/JMTEB',
|
18 |
+
split: 'validation',
|
19 |
+
name: 'amazon_counterfactual_classification',
|
20 |
+
},
|
21 |
+
},
|
22 |
+
test_dataset: {
|
23 |
+
class_path: 'HfClassificationDataset',
|
24 |
+
init_args: {
|
25 |
+
path: 'sbintuitions/JMTEB',
|
26 |
+
split: 'test',
|
27 |
+
name: 'amazon_counterfactual_classification',
|
28 |
+
},
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb_config/tasks/amazon_review_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
amazon_review_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
prefix: 'クエリ: ',
|
6 |
+
train_dataset: {
|
7 |
+
class_path: 'HfClassificationDataset',
|
8 |
+
init_args: {
|
9 |
+
path: 'sbintuitions/JMTEB',
|
10 |
+
split: 'train',
|
11 |
+
name: 'amazon_review_classification',
|
12 |
+
},
|
13 |
+
},
|
14 |
+
val_dataset: {
|
15 |
+
class_path: 'HfClassificationDataset',
|
16 |
+
init_args: {
|
17 |
+
path: 'sbintuitions/JMTEB',
|
18 |
+
split: 'validation',
|
19 |
+
name: 'amazon_review_classification',
|
20 |
+
},
|
21 |
+
},
|
22 |
+
test_dataset: {
|
23 |
+
class_path: 'HfClassificationDataset',
|
24 |
+
init_args: {
|
25 |
+
path: 'sbintuitions/JMTEB',
|
26 |
+
split: 'test',
|
27 |
+
name: 'amazon_review_classification',
|
28 |
+
},
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb_config/tasks/esci.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
esci: {
|
3 |
+
class_path: 'RerankingEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: 'クエリ: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRerankingQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'esci-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRerankingQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'esci-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRerankingDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'esci-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/jagovfaqs_22k.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jagovfaqs_22k: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: 'クエリ: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRetrievalQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'jagovfaqs_22k-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRetrievalQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'jagovfaqs_22k-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRetrievalDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'jagovfaqs_22k-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/jaqket.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jaqket: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: 'クエリ: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRetrievalQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'jaqket-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRetrievalQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'jaqket-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRetrievalDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'jaqket-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/jsick.jsonnet
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jsick: {
|
3 |
+
class_path: 'STSEvaluator',
|
4 |
+
init_args: {
|
5 |
+
sentence1_prefix: 'クエリ: ',
|
6 |
+
sentence2_prefix: 'クエリ: ',
|
7 |
+
val_dataset: {
|
8 |
+
class_path: 'HfSTSDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'jsick',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_dataset: {
|
16 |
+
class_path: 'HfSTSDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'jsick',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
},
|
24 |
+
},
|
25 |
+
}
|
jmteb_config/tasks/jsts.jsonnet
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
jsts: {
|
3 |
+
class_path: 'STSEvaluator',
|
4 |
+
init_args: {
|
5 |
+
sentence1_prefix: 'クエリ: ',
|
6 |
+
sentence2_prefix: 'クエリ: ',
|
7 |
+
val_dataset: {
|
8 |
+
class_path: 'HfSTSDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'train',
|
12 |
+
name: 'jsts',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_dataset: {
|
16 |
+
class_path: 'HfSTSDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'jsts',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
},
|
24 |
+
},
|
25 |
+
}
|
jmteb_config/tasks/livedoor_news.jsonnet
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
livedoor_news: {
|
3 |
+
class_path: 'ClusteringEvaluator',
|
4 |
+
init_args: {
|
5 |
+
prefix: 'クエリ: ',
|
6 |
+
val_dataset: {
|
7 |
+
class_path: 'HfClusteringDataset',
|
8 |
+
init_args: {
|
9 |
+
path: 'sbintuitions/JMTEB',
|
10 |
+
split: 'validation',
|
11 |
+
name: 'livedoor_news',
|
12 |
+
},
|
13 |
+
},
|
14 |
+
test_dataset: {
|
15 |
+
class_path: 'HfClusteringDataset',
|
16 |
+
init_args: {
|
17 |
+
path: 'sbintuitions/JMTEB',
|
18 |
+
split: 'test',
|
19 |
+
name: 'livedoor_news',
|
20 |
+
},
|
21 |
+
},
|
22 |
+
},
|
23 |
+
},
|
24 |
+
}
|
jmteb_config/tasks/massive_intent_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
massive_intent_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
prefix: 'クエリ: ',
|
6 |
+
train_dataset: {
|
7 |
+
class_path: 'HfClassificationDataset',
|
8 |
+
init_args: {
|
9 |
+
path: 'sbintuitions/JMTEB',
|
10 |
+
split: 'train',
|
11 |
+
name: 'massive_intent_classification',
|
12 |
+
},
|
13 |
+
},
|
14 |
+
val_dataset: {
|
15 |
+
class_path: 'HfClassificationDataset',
|
16 |
+
init_args: {
|
17 |
+
path: 'sbintuitions/JMTEB',
|
18 |
+
split: 'validation',
|
19 |
+
name: 'massive_intent_classification',
|
20 |
+
},
|
21 |
+
},
|
22 |
+
test_dataset: {
|
23 |
+
class_path: 'HfClassificationDataset',
|
24 |
+
init_args: {
|
25 |
+
path: 'sbintuitions/JMTEB',
|
26 |
+
split: 'test',
|
27 |
+
name: 'massive_intent_classification',
|
28 |
+
},
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb_config/tasks/massive_scenario_classification.jsonnet
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
massive_scenario_classification: {
|
3 |
+
class_path: 'ClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
prefix: 'クエリ: ',
|
6 |
+
train_dataset: {
|
7 |
+
class_path: 'HfClassificationDataset',
|
8 |
+
init_args: {
|
9 |
+
path: 'sbintuitions/JMTEB',
|
10 |
+
split: 'train',
|
11 |
+
name: 'massive_scenario_classification',
|
12 |
+
},
|
13 |
+
},
|
14 |
+
val_dataset: {
|
15 |
+
class_path: 'HfClassificationDataset',
|
16 |
+
init_args: {
|
17 |
+
path: 'sbintuitions/JMTEB',
|
18 |
+
split: 'validation',
|
19 |
+
name: 'massive_scenario_classification',
|
20 |
+
},
|
21 |
+
},
|
22 |
+
test_dataset: {
|
23 |
+
class_path: 'HfClassificationDataset',
|
24 |
+
init_args: {
|
25 |
+
path: 'sbintuitions/JMTEB',
|
26 |
+
split: 'test',
|
27 |
+
name: 'massive_scenario_classification',
|
28 |
+
},
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
}
|
jmteb_config/tasks/mewsc16.jsonnet
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
mewsc16: {
|
3 |
+
class_path: 'ClusteringEvaluator',
|
4 |
+
init_args: {
|
5 |
+
prefix: 'クエリ: ',
|
6 |
+
val_dataset: {
|
7 |
+
class_path: 'HfClusteringDataset',
|
8 |
+
init_args: {
|
9 |
+
path: 'sbintuitions/JMTEB',
|
10 |
+
split: 'validation',
|
11 |
+
name: 'mewsc16_ja',
|
12 |
+
},
|
13 |
+
},
|
14 |
+
test_dataset: {
|
15 |
+
class_path: 'HfClusteringDataset',
|
16 |
+
init_args: {
|
17 |
+
path: 'sbintuitions/JMTEB',
|
18 |
+
split: 'test',
|
19 |
+
name: 'mewsc16_ja',
|
20 |
+
},
|
21 |
+
},
|
22 |
+
},
|
23 |
+
},
|
24 |
+
}
|
jmteb_config/tasks/mrtydi.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
mrtydi: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: 'クエリ: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRetrievalQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'mrtydi-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRetrievalQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'mrtydi-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRetrievalDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'mrtydi-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/nlp_journal_abs_intro.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
nlp_journal_abs_intro: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: '文章: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRetrievalQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'nlp_journal_abs_intro-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRetrievalQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'nlp_journal_abs_intro-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRetrievalDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'nlp_journal_abs_intro-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/nlp_journal_title_abs.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
nlp_journal_title_abs: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: 'クエリ: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRetrievalQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'nlp_journal_title_abs-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRetrievalQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'nlp_journal_title_abs-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRetrievalDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'nlp_journal_title_abs-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/nlp_journal_title_intro.jsonnet
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
nlp_journal_title_intro: {
|
3 |
+
class_path: 'RetrievalEvaluator',
|
4 |
+
init_args: {
|
5 |
+
doc_prefix: '文章: ',
|
6 |
+
query_prefix: 'クエリ: ',
|
7 |
+
val_query_dataset: {
|
8 |
+
class_path: 'HfRetrievalQueryDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'nlp_journal_title_intro-query',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_query_dataset: {
|
16 |
+
class_path: 'HfRetrievalQueryDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'nlp_journal_title_intro-query',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
doc_dataset: {
|
24 |
+
class_path: 'HfRetrievalDocDataset',
|
25 |
+
init_args: {
|
26 |
+
path: 'sbintuitions/JMTEB',
|
27 |
+
split: 'corpus',
|
28 |
+
name: 'nlp_journal_title_intro-corpus',
|
29 |
+
},
|
30 |
+
},
|
31 |
+
},
|
32 |
+
},
|
33 |
+
}
|
jmteb_config/tasks/paws_x_ja.jsonnet
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
paws_x_ja: {
|
3 |
+
class_path: 'PairClassificationEvaluator',
|
4 |
+
init_args: {
|
5 |
+
sentence1_prefix: 'クエリ: ',
|
6 |
+
sentence2_prefix: 'クエリ: ',
|
7 |
+
val_dataset: {
|
8 |
+
class_path: 'HfPairClassificationDataset',
|
9 |
+
init_args: {
|
10 |
+
path: 'sbintuitions/JMTEB',
|
11 |
+
split: 'validation',
|
12 |
+
name: 'paws_x_ja',
|
13 |
+
},
|
14 |
+
},
|
15 |
+
test_dataset: {
|
16 |
+
class_path: 'HfPairClassificationDataset',
|
17 |
+
init_args: {
|
18 |
+
path: 'sbintuitions/JMTEB',
|
19 |
+
split: 'test',
|
20 |
+
name: 'paws_x_ja',
|
21 |
+
},
|
22 |
+
},
|
23 |
+
},
|
24 |
+
},
|
25 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e863b345faba4d2fb42a8bf0270e52c3391225d6eb08f213046d46caddba6df
|
3 |
+
size 222436792
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
result/Classification/scores_amazon_counterfactual_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.7665550732749669,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.9098712446351931,
|
9 |
+
"macro_f1": 0.6139035745285253
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.9206008583690987,
|
13 |
+
"macro_f1": 0.7381028328396749
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.923982869379015,
|
19 |
+
"macro_f1": 0.7665550732749669
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
result/Classification/scores_amazon_review_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.5575876111411316,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.4314,
|
9 |
+
"macro_f1": 0.4209604852624187
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.5702,
|
13 |
+
"macro_f1": 0.5653832808449197
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.562,
|
19 |
+
"macro_f1": 0.5575876111411316
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
result/Classification/scores_massive_intent_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.8141210121425055,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.7757009345794392,
|
9 |
+
"macro_f1": 0.7456574019302791
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.8421052631578947,
|
13 |
+
"macro_f1": 0.8271757887821682
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.8416274377942166,
|
19 |
+
"macro_f1": 0.8141210121425055
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
result/Classification/scores_massive_scenario_classification.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "macro_f1",
|
3 |
+
"metric_value": 0.8848812917656395,
|
4 |
+
"details": {
|
5 |
+
"optimal_classifier_name": "logreg",
|
6 |
+
"val_scores": {
|
7 |
+
"knn_cosine_k_2": {
|
8 |
+
"accuracy": 0.8657156910969012,
|
9 |
+
"macro_f1": 0.8581068338871749
|
10 |
+
},
|
11 |
+
"logreg": {
|
12 |
+
"accuracy": 0.8898180029513035,
|
13 |
+
"macro_f1": 0.887764836229313
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"test_scores": {
|
17 |
+
"logreg": {
|
18 |
+
"accuracy": 0.8860121049092132,
|
19 |
+
"macro_f1": 0.8848812917656395
|
20 |
+
}
|
21 |
+
}
|
22 |
+
}
|
23 |
+
}
|
result/Clustering/scores_livedoor_news.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "v_measure_score",
|
3 |
+
"metric_value": 0.5427223607801758,
|
4 |
+
"details": {
|
5 |
+
"optimal_clustering_model_name": "BisectingKMeans",
|
6 |
+
"val_scores": {
|
7 |
+
"MiniBatchKMeans": {
|
8 |
+
"v_measure_score": 0.5453092926343514,
|
9 |
+
"homogeneity_score": 0.5376167786682042,
|
10 |
+
"completeness_score": 0.5532251395371498
|
11 |
+
},
|
12 |
+
"AgglomerativeClustering": {
|
13 |
+
"v_measure_score": 0.5221218542278205,
|
14 |
+
"homogeneity_score": 0.5145096860981694,
|
15 |
+
"completeness_score": 0.5299626488611732
|
16 |
+
},
|
17 |
+
"BisectingKMeans": {
|
18 |
+
"v_measure_score": 0.5498693214751904,
|
19 |
+
"homogeneity_score": 0.5475063196854639,
|
20 |
+
"completeness_score": 0.552252808804315
|
21 |
+
},
|
22 |
+
"Birch": {
|
23 |
+
"v_measure_score": 0.5208037508658081,
|
24 |
+
"homogeneity_score": 0.5132767763409753,
|
25 |
+
"completeness_score": 0.5285547703444661
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"test_scores": {
|
29 |
+
"BisectingKMeans": {
|
30 |
+
"v_measure_score": 0.5427223607801758,
|
31 |
+
"homogeneity_score": 0.5417341205522448,
|
32 |
+
"completeness_score": 0.5437142131253088
|
33 |
+
}
|
34 |
+
}
|
35 |
+
}
|
36 |
+
}
|
result/Clustering/scores_mewsc16.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "v_measure_score",
|
3 |
+
"metric_value": 0.5404099864321413,
|
4 |
+
"details": {
|
5 |
+
"optimal_clustering_model_name": "AgglomerativeClustering",
|
6 |
+
"val_scores": {
|
7 |
+
"MiniBatchKMeans": {
|
8 |
+
"v_measure_score": 0.502791381026052,
|
9 |
+
"homogeneity_score": 0.5517784337158165,
|
10 |
+
"completeness_score": 0.46179324043437603
|
11 |
+
},
|
12 |
+
"AgglomerativeClustering": {
|
13 |
+
"v_measure_score": 0.5302546097654716,
|
14 |
+
"homogeneity_score": 0.5735135314580632,
|
15 |
+
"completeness_score": 0.4930638394517115
|
16 |
+
},
|
17 |
+
"BisectingKMeans": {
|
18 |
+
"v_measure_score": 0.48656257334532493,
|
19 |
+
"homogeneity_score": 0.5342920872487864,
|
20 |
+
"completeness_score": 0.4466613135580361
|
21 |
+
},
|
22 |
+
"Birch": {
|
23 |
+
"v_measure_score": 0.49305647750510134,
|
24 |
+
"homogeneity_score": 0.5374392451928177,
|
25 |
+
"completeness_score": 0.45544495608862656
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"test_scores": {
|
29 |
+
"AgglomerativeClustering": {
|
30 |
+
"v_measure_score": 0.5404099864321413,
|
31 |
+
"homogeneity_score": 0.5789428395923124,
|
32 |
+
"completeness_score": 0.5066863291321174
|
33 |
+
}
|
34 |
+
}
|
35 |
+
}
|
36 |
+
}
|
result/PairClassification/scores_paws_x_ja.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "binary_f1",
|
3 |
+
"metric_value": 0.6237623762376238,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "euclidean_distances",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_distances": {
|
8 |
+
"accuracy": 0.5725,
|
9 |
+
"accuracy_threshold": 0.6920696496963501,
|
10 |
+
"binary_f1": 0.5979670522257273,
|
11 |
+
"binary_f1_threshold": 1.0
|
12 |
+
},
|
13 |
+
"manhatten_distances": {
|
14 |
+
"accuracy": 0.6015,
|
15 |
+
"accuracy_threshold": 19.63576316833496,
|
16 |
+
"binary_f1": 0.6017636684303351,
|
17 |
+
"binary_f1_threshold": 274.46441650390625
|
18 |
+
},
|
19 |
+
"euclidean_distances": {
|
20 |
+
"accuracy": 0.602,
|
21 |
+
"accuracy_threshold": 0.9731899499893188,
|
22 |
+
"binary_f1": 0.6019760056457304,
|
23 |
+
"binary_f1_threshold": 12.281266212463379
|
24 |
+
},
|
25 |
+
"dot_similarities": {
|
26 |
+
"accuracy": 0.574,
|
27 |
+
"accuracy_threshold": 332.39276123046875,
|
28 |
+
"binary_f1": 0.6014825273561596,
|
29 |
+
"binary_f1_threshold": 263.39337158203125
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"euclidean_distances": {
|
34 |
+
"accuracy": 0.566,
|
35 |
+
"accuracy_threshold": 0.9731899499893188,
|
36 |
+
"binary_f1": 0.6237623762376238,
|
37 |
+
"binary_f1_threshold": 12.281266212463379
|
38 |
+
}
|
39 |
+
}
|
40 |
+
}
|
41 |
+
}
|
result/Reranking/scores_esci.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.9290942178703699,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"ndcg@10": 0.9419326097489188,
|
9 |
+
"ndcg@20": 0.9546274758967366,
|
10 |
+
"ndcg@40": 0.9625015652058491
|
11 |
+
},
|
12 |
+
"dot_score": {
|
13 |
+
"ndcg@10": 0.933159692803982,
|
14 |
+
"ndcg@20": 0.9482607249371672,
|
15 |
+
"ndcg@40": 0.956621759096631
|
16 |
+
},
|
17 |
+
"euclidean_distance": {
|
18 |
+
"ndcg@10": 0.9418339438093611,
|
19 |
+
"ndcg@20": 0.9547832679237122,
|
20 |
+
"ndcg@40": 0.9627457241783169
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"test_scores": {
|
24 |
+
"cosine_similarity": {
|
25 |
+
"ndcg@10": 0.9290942178703699,
|
26 |
+
"ndcg@20": 0.9467035648480672,
|
27 |
+
"ndcg@40": 0.9563220304481116
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
result/Retrieval/scores_jagovfaqs_22k.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.7455660589538348,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "euclidean_distance",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.6042702544603685,
|
9 |
+
"accuracy@3": 0.7853173442527055,
|
10 |
+
"accuracy@5": 0.830944720678561,
|
11 |
+
"accuracy@10": 0.8821292775665399,
|
12 |
+
"ndcg@10": 0.7477862730518441,
|
13 |
+
"mrr@10": 0.7043207426287267
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.4597835624451594,
|
17 |
+
"accuracy@3": 0.6607195086282539,
|
18 |
+
"accuracy@5": 0.7282831237203861,
|
19 |
+
"accuracy@10": 0.80549868382568,
|
20 |
+
"ndcg@10": 0.630976061323317,
|
21 |
+
"mrr@10": 0.5752777429583498
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.6092424685580579,
|
25 |
+
"accuracy@3": 0.7861947937993565,
|
26 |
+
"accuracy@5": 0.8283123720386077,
|
27 |
+
"accuracy@10": 0.8780345130155016,
|
28 |
+
"ndcg@10": 0.7480985513112418,
|
29 |
+
"mrr@10": 0.7060561428432148
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"euclidean_distance": {
|
34 |
+
"accuracy@1": 0.6035087719298246,
|
35 |
+
"accuracy@3": 0.7795321637426901,
|
36 |
+
"accuracy@5": 0.8277777777777777,
|
37 |
+
"accuracy@10": 0.881578947368421,
|
38 |
+
"ndcg@10": 0.7455660589538348,
|
39 |
+
"mrr@10": 0.7017308317089019
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
result/Retrieval/scores_jaqket.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.5012253145754781,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.3407035175879397,
|
9 |
+
"accuracy@3": 0.521608040201005,
|
10 |
+
"accuracy@5": 0.6040201005025125,
|
11 |
+
"accuracy@10": 0.6894472361809045,
|
12 |
+
"ndcg@10": 0.5074962109064866,
|
13 |
+
"mrr@10": 0.44994017707585504
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.31055276381909547,
|
17 |
+
"accuracy@3": 0.507537688442211,
|
18 |
+
"accuracy@5": 0.5738693467336683,
|
19 |
+
"accuracy@10": 0.6804020100502512,
|
20 |
+
"ndcg@10": 0.48656131133927916,
|
21 |
+
"mrr@10": 0.42555116854111785
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.3055276381909548,
|
25 |
+
"accuracy@3": 0.4814070351758794,
|
26 |
+
"accuracy@5": 0.5597989949748744,
|
27 |
+
"accuracy@10": 0.6391959798994975,
|
28 |
+
"ndcg@10": 0.4655083260444005,
|
29 |
+
"mrr@10": 0.4106070032703195
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.3159478435305918,
|
35 |
+
"accuracy@3": 0.526579739217653,
|
36 |
+
"accuracy@5": 0.60481444332999,
|
37 |
+
"accuracy@10": 0.6920762286860582,
|
38 |
+
"ndcg@10": 0.5012253145754781,
|
39 |
+
"mrr@10": 0.4404156915190016
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
result/Retrieval/scores_mrtydi.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.3545113073009125,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "euclidean_distance",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.22306034482758622,
|
9 |
+
"accuracy@3": 0.37176724137931033,
|
10 |
+
"accuracy@5": 0.4536637931034483,
|
11 |
+
"accuracy@10": 0.5549568965517241,
|
12 |
+
"ndcg@10": 0.37815020333355365,
|
13 |
+
"mrr@10": 0.3228995621236997
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.13793103448275862,
|
17 |
+
"accuracy@3": 0.2704741379310345,
|
18 |
+
"accuracy@5": 0.3394396551724138,
|
19 |
+
"accuracy@10": 0.4170258620689655,
|
20 |
+
"ndcg@10": 0.2698064952674162,
|
21 |
+
"mrr@10": 0.22368979200875752
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.22844827586206898,
|
25 |
+
"accuracy@3": 0.38362068965517243,
|
26 |
+
"accuracy@5": 0.4665948275862069,
|
27 |
+
"accuracy@10": 0.5668103448275862,
|
28 |
+
"ndcg@10": 0.38745306818571434,
|
29 |
+
"mrr@10": 0.33128378147235893
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"euclidean_distance": {
|
34 |
+
"accuracy@1": 0.23194444444444445,
|
35 |
+
"accuracy@3": 0.3888888888888889,
|
36 |
+
"accuracy@5": 0.46805555555555556,
|
37 |
+
"accuracy@10": 0.5708333333333333,
|
38 |
+
"ndcg@10": 0.3545113073009125,
|
39 |
+
"mrr@10": 0.3320238095238095
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
result/Retrieval/scores_nlp_journal_abs_intro.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.8689204088388403,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.85,
|
9 |
+
"accuracy@3": 0.93,
|
10 |
+
"accuracy@5": 0.93,
|
11 |
+
"accuracy@10": 0.95,
|
12 |
+
"ndcg@10": 0.9031188595062929,
|
13 |
+
"mrr@10": 0.8877777777777779
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.75,
|
17 |
+
"accuracy@3": 0.87,
|
18 |
+
"accuracy@5": 0.88,
|
19 |
+
"accuracy@10": 0.91,
|
20 |
+
"ndcg@10": 0.8329701303885662,
|
21 |
+
"mrr@10": 0.8079563492063491
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.83,
|
25 |
+
"accuracy@3": 0.92,
|
26 |
+
"accuracy@5": 0.93,
|
27 |
+
"accuracy@10": 0.94,
|
28 |
+
"ndcg@10": 0.8903171995628786,
|
29 |
+
"mrr@10": 0.87375
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.7945544554455446,
|
35 |
+
"accuracy@3": 0.8836633663366337,
|
36 |
+
"accuracy@5": 0.9084158415841584,
|
37 |
+
"accuracy@10": 0.943069306930693,
|
38 |
+
"ndcg@10": 0.8689204088388403,
|
39 |
+
"mrr@10": 0.8452508643721514
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
result/Retrieval/scores_nlp_journal_title_abs.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.9656989703684407,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.9,
|
9 |
+
"accuracy@3": 0.96,
|
10 |
+
"accuracy@5": 0.98,
|
11 |
+
"accuracy@10": 0.99,
|
12 |
+
"ndcg@10": 0.9477320812882918,
|
13 |
+
"mrr@10": 0.9339444444444445
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.82,
|
17 |
+
"accuracy@3": 0.92,
|
18 |
+
"accuracy@5": 0.94,
|
19 |
+
"accuracy@10": 0.96,
|
20 |
+
"ndcg@10": 0.8940025955079818,
|
21 |
+
"mrr@10": 0.8724285714285713
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.89,
|
25 |
+
"accuracy@3": 0.97,
|
26 |
+
"accuracy@5": 0.98,
|
27 |
+
"accuracy@10": 0.99,
|
28 |
+
"ndcg@10": 0.9453171995628784,
|
29 |
+
"mrr@10": 0.9304166666666666
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.9306930693069307,
|
35 |
+
"accuracy@3": 0.9777227722772277,
|
36 |
+
"accuracy@5": 0.9876237623762376,
|
37 |
+
"accuracy@10": 0.995049504950495,
|
38 |
+
"ndcg@10": 0.9656989703684407,
|
39 |
+
"mrr@10": 0.955987741631306
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
result/Retrieval/scores_nlp_journal_title_intro.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "ndcg@10",
|
3 |
+
"metric_value": 0.7531306059721564,
|
4 |
+
"details": {
|
5 |
+
"optimal_distance_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"accuracy@1": 0.57,
|
9 |
+
"accuracy@3": 0.8,
|
10 |
+
"accuracy@5": 0.83,
|
11 |
+
"accuracy@10": 0.9,
|
12 |
+
"ndcg@10": 0.7448902792577736,
|
13 |
+
"mrr@10": 0.6942023809523811
|
14 |
+
},
|
15 |
+
"dot_score": {
|
16 |
+
"accuracy@1": 0.49,
|
17 |
+
"accuracy@3": 0.68,
|
18 |
+
"accuracy@5": 0.71,
|
19 |
+
"accuracy@10": 0.83,
|
20 |
+
"ndcg@10": 0.6537395005077568,
|
21 |
+
"mrr@10": 0.5984801587301588
|
22 |
+
},
|
23 |
+
"euclidean_distance": {
|
24 |
+
"accuracy@1": 0.58,
|
25 |
+
"accuracy@3": 0.75,
|
26 |
+
"accuracy@5": 0.85,
|
27 |
+
"accuracy@10": 0.9,
|
28 |
+
"ndcg@10": 0.7411266935263704,
|
29 |
+
"mrr@10": 0.6896904761904763
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"test_scores": {
|
33 |
+
"cosine_similarity": {
|
34 |
+
"accuracy@1": 0.6237623762376238,
|
35 |
+
"accuracy@3": 0.7896039603960396,
|
36 |
+
"accuracy@5": 0.8242574257425742,
|
37 |
+
"accuracy@10": 0.8811881188118812,
|
38 |
+
"ndcg@10": 0.7531306059721564,
|
39 |
+
"mrr@10": 0.7120059327361306
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
result/STS/scores_jsick.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "spearman",
|
3 |
+
"metric_value": 0.8231772134744029,
|
4 |
+
"details": {
|
5 |
+
"optimal_similarity_metric": "cosine_similarity",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"pearson": 0.8390312744889947,
|
9 |
+
"spearman": 0.8309726355825223
|
10 |
+
},
|
11 |
+
"manhatten_distance": {
|
12 |
+
"pearson": 0.8439757378089565,
|
13 |
+
"spearman": 0.8296746939532708
|
14 |
+
},
|
15 |
+
"euclidean_distance": {
|
16 |
+
"pearson": 0.8439757378089565,
|
17 |
+
"spearman": 0.8296746939532708
|
18 |
+
},
|
19 |
+
"dot_score": {
|
20 |
+
"pearson": 0.8235943624962084,
|
21 |
+
"spearman": 0.8066842966908715
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"test_scores": {
|
25 |
+
"cosine_similarity": {
|
26 |
+
"pearson": 0.8323321086750828,
|
27 |
+
"spearman": 0.8231772134744029
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
result/STS/scores_jsts.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metric_name": "spearman",
|
3 |
+
"metric_value": 0.8342848039994751,
|
4 |
+
"details": {
|
5 |
+
"optimal_similarity_metric": "manhatten_distance",
|
6 |
+
"val_scores": {
|
7 |
+
"cosine_similarity": {
|
8 |
+
"pearson": 0.8402004412140045,
|
9 |
+
"spearman": 0.7947630577888891
|
10 |
+
},
|
11 |
+
"manhatten_distance": {
|
12 |
+
"pearson": 0.8359705278620446,
|
13 |
+
"spearman": 0.7954996671020325
|
14 |
+
},
|
15 |
+
"euclidean_distance": {
|
16 |
+
"pearson": 0.8359705278620446,
|
17 |
+
"spearman": 0.7954996671020325
|
18 |
+
},
|
19 |
+
"dot_score": {
|
20 |
+
"pearson": 0.8146522053769387,
|
21 |
+
"spearman": 0.7576805023715597
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"test_scores": {
|
25 |
+
"manhatten_distance": {
|
26 |
+
"pearson": 0.8665411120423515,
|
27 |
+
"spearman": 0.8342848039994751
|
28 |
+
}
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
result/summary.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Classification": {
|
3 |
+
"amazon_counterfactual_classification": {
|
4 |
+
"macro_f1": 0.7665550732749669
|
5 |
+
},
|
6 |
+
"amazon_review_classification": {
|
7 |
+
"macro_f1": 0.5575876111411316
|
8 |
+
},
|
9 |
+
"massive_intent_classification": {
|
10 |
+
"macro_f1": 0.8141210121425055
|
11 |
+
},
|
12 |
+
"massive_scenario_classification": {
|
13 |
+
"macro_f1": 0.8848812917656395
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"Reranking": {
|
17 |
+
"esci": {
|
18 |
+
"ndcg@10": 0.9290942178703699
|
19 |
+
}
|
20 |
+
},
|
21 |
+
"Retrieval": {
|
22 |
+
"jagovfaqs_22k": {
|
23 |
+
"ndcg@10": 0.7455660589538348
|
24 |
+
},
|
25 |
+
"jaqket": {
|
26 |
+
"ndcg@10": 0.5012253145754781
|
27 |
+
},
|
28 |
+
"mrtydi": {
|
29 |
+
"ndcg@10": 0.3545113073009125
|
30 |
+
},
|
31 |
+
"nlp_journal_abs_intro": {
|
32 |
+
"ndcg@10": 0.8689204088388403
|
33 |
+
},
|
34 |
+
"nlp_journal_title_abs": {
|
35 |
+
"ndcg@10": 0.9656989703684407
|
36 |
+
},
|
37 |
+
"nlp_journal_title_intro": {
|
38 |
+
"ndcg@10": 0.7531306059721564
|
39 |
+
}
|
40 |
+
},
|
41 |
+
"STS": {
|
42 |
+
"jsick": {
|
43 |
+
"spearman": 0.8231772134744029
|
44 |
+
},
|
45 |
+
"jsts": {
|
46 |
+
"spearman": 0.8342848039994751
|
47 |
+
}
|
48 |
+
},
|
49 |
+
"Clustering": {
|
50 |
+
"livedoor_news": {
|
51 |
+
"v_measure_score": 0.5427223607801758
|
52 |
+
},
|
53 |
+
"mewsc16": {
|
54 |
+
"v_measure_score": 0.5404099864321413
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"PairClassification": {
|
58 |
+
"paws_x_ja": {
|
59 |
+
"binary_f1": 0.6237623762376238
|
60 |
+
}
|
61 |
+
}
|
62 |
+
}
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c564888dbafbfebcc7a62d2f4049afe83e6fb5acd50a97082a84407c77ddd8ff
|
3 |
+
size 17082922
|
tokenizer_config.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"4": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"do_subword_tokenize": true,
|
48 |
+
"do_word_tokenize": true,
|
49 |
+
"jumanpp_kwargs": null,
|
50 |
+
"mask_token": "[MASK]",
|
51 |
+
"mecab_kwargs": {
|
52 |
+
"mecab_dic": "unidic_lite"
|
53 |
+
},
|
54 |
+
"model_max_length": 512,
|
55 |
+
"never_split": null,
|
56 |
+
"pad_token": "[PAD]",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"subword_tokenizer_type": "wordpiece",
|
59 |
+
"sudachi_kwargs": null,
|
60 |
+
"tokenizer_class": "BertJapaneseTokenizer",
|
61 |
+
"unk_token": "[UNK]",
|
62 |
+
"word_tokenizer_type": "mecab"
|
63 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|