usage added
Browse files
README.md
CHANGED
@@ -8,4 +8,45 @@ tags:
|
|
8 |
- umls
|
9 |
---
|
10 |
|
11 |
-
SapBERT-DE is a model for German biomedical entity linking which is obtained by fine-tuning multilingual entity linking model [`cambridgeltl/SapBERT-UMLS-2020AB-all-lang-from-XLMR`](https://huggingface.co/cambridgeltl/SapBERT-UMLS-2020AB-all-lang-from-XLMR) using a German biomedical entity linking knowledge base named [UMLS-Wikidata](https://zenodo.org/records/11003203).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
- umls
|
9 |
---
|
10 |
|
11 |
+
SapBERT-DE is a model for German biomedical entity linking which is obtained by fine-tuning multilingual entity linking model [`cambridgeltl/SapBERT-UMLS-2020AB-all-lang-from-XLMR`](https://huggingface.co/cambridgeltl/SapBERT-UMLS-2020AB-all-lang-from-XLMR) using a German biomedical entity linking knowledge base named [UMLS-Wikidata](https://zenodo.org/records/11003203).
|
12 |
+
|
13 |
+
|
14 |
+
# Usage
|
15 |
+
```python
|
16 |
+
import numpy as np
|
17 |
+
from tqdm import tqdm
|
18 |
+
import torch
|
19 |
+
from transformers import AutoTokenizer, AutoModel
|
20 |
+
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained("permediq/SapBERT-DE", use_fast=True)
|
22 |
+
model = AutoModel.from_pretrained("permediq/SapBERT-DE").cuda()
|
23 |
+
|
24 |
+
# entity descriptions to embed
|
25 |
+
entity_descriptions = ["Cerebellum", "Zerebellum", "Kleinhirn", "Anaesthesie"]
|
26 |
+
|
27 |
+
bs = 32 # batch size
|
28 |
+
all_embs = []
|
29 |
+
for i in tqdm(np.arange(0, len(entity_descriptions), bs)):
|
30 |
+
toks = tokenizer.batch_encode_plus(entity_descriptions[i:i+bs],
|
31 |
+
padding="max_length",
|
32 |
+
max_length=40, # model trained with 40 max_length
|
33 |
+
truncation=True,
|
34 |
+
return_tensors="pt")
|
35 |
+
toks_cuda = {}
|
36 |
+
for k,v in toks.items():
|
37 |
+
toks_cuda[k] = v.cuda()
|
38 |
+
cls_rep = model(**toks_cuda)[0][:,0,:]
|
39 |
+
all_embs.append(cls_rep.cpu().detach())
|
40 |
+
|
41 |
+
all_embs = torch.cat(all_embs)
|
42 |
+
|
43 |
+
def cos_sim(a, b):
|
44 |
+
a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
|
45 |
+
b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
|
46 |
+
return torch.mm(a_norm, b_norm.transpose(0, 1))
|
47 |
+
|
48 |
+
# cosine similarity of first entity with all the entities
|
49 |
+
print(cos_sim(all_embs[0].unsqueeze(0), all_embs))
|
50 |
+
|
51 |
+
# >>> tensor([[1.0000, 0.9337, 0.6206, 0.2086]])
|
52 |
+
```
|