Update README.md
Browse files
README.md
CHANGED
@@ -17,7 +17,7 @@ import numpy as np
|
|
17 |
model = AutoModelForMaskedLM.from_pretrained("aken12/splade-japanese")
|
18 |
tokenizer = AutoTokenizer.from_pretrained("aken12/splade-japanese")
|
19 |
|
20 |
-
query = "
|
21 |
|
22 |
def encode_query(query, tokenizer, model):
|
23 |
encoded_input = tokenizer(query, return_tensors="pt")
|
@@ -45,4 +45,38 @@ dict_splade = get_topk_tokens(model_output, vocab_dict, topk)
|
|
45 |
|
46 |
for token, value in dict_splade.items():
|
47 |
print(token, value)
|
48 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
model = AutoModelForMaskedLM.from_pretrained("aken12/splade-japanese")
|
18 |
tokenizer = AutoTokenizer.from_pretrained("aken12/splade-japanese")
|
19 |
|
20 |
+
query = "筑波大学では何の研究が行われているか?"
|
21 |
|
22 |
def encode_query(query, tokenizer, model):
|
23 |
encoded_input = tokenizer(query, return_tensors="pt")
|
|
|
45 |
|
46 |
for token, value in dict_splade.items():
|
47 |
print(token, value)
|
48 |
+
```
|
49 |
+
|
50 |
+
## output
|
51 |
+
```
|
52 |
+
に 250
|
53 |
+
が 248
|
54 |
+
は 247
|
55 |
+
の 247
|
56 |
+
、 244
|
57 |
+
と 240
|
58 |
+
を 239
|
59 |
+
。 239
|
60 |
+
も 238
|
61 |
+
で 237
|
62 |
+
から 221
|
63 |
+
や 219
|
64 |
+
な 206
|
65 |
+
筑波 204
|
66 |
+
( 204
|
67 |
+
・ 202
|
68 |
+
て 197
|
69 |
+
へ 191
|
70 |
+
にて 189
|
71 |
+
など 188
|
72 |
+
) 186
|
73 |
+
まで 184
|
74 |
+
た 182
|
75 |
+
この 171
|
76 |
+
- 170
|
77 |
+
「 170
|
78 |
+
より 166
|
79 |
+
その 165
|
80 |
+
: 163
|
81 |
+
」 161
|
82 |
+
```
|