Spaces:
Running
Running
danielhajialigol
commited on
Commit
·
1841ebe
1
Parent(s):
eca4ff8
fixed model issue
Browse files- all_summaries.csv +2 -2
- app.py +17 -3
- discharge_embeddings.pt +2 -2
- model.py +7 -1
all_summaries.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64b8415ab96d12e61393c8ca12ebb844ab32a57df314984e98e91e1064bebf41
|
3 |
+
size 640698121
|
app.py
CHANGED
@@ -4,9 +4,14 @@ import pandas as pd
|
|
4 |
import torch
|
5 |
|
6 |
from model import MimicTransformer
|
7 |
-
from utils import load_rule, get_attribution, get_diseases, get_drg_link, get_icd_annotations, visualize_attn
|
8 |
from transformers import AutoTokenizer, AutoModel, set_seed, pipeline
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
model_path = 'checkpoint_0_9113.bin'
|
11 |
related_tensor = torch.load('discharge_embeddings.pt')
|
12 |
all_summaries = pd.read_csv('all_summaries.csv')['SUMMARIES'].to_list()
|
@@ -16,7 +21,9 @@ similarity_model = AutoModel.from_pretrained('kamalkraj/BioSimCSE-BioLinkBERT-BA
|
|
16 |
similarity_model.eval()
|
17 |
|
18 |
def read_model(model, path):
|
19 |
-
model.load_state_dict(torch.load(path, map_location=torch.device('cpu'))
|
|
|
|
|
20 |
return model
|
21 |
|
22 |
mimic = MimicTransformer(cutoff=512)
|
@@ -50,8 +57,10 @@ def mean_pooling(model_output, attention_mask):
|
|
50 |
|
51 |
|
52 |
def get_model_results(text):
|
|
|
53 |
inputs = tokenizer(text, return_tensors='pt', padding='max_length', max_length=512, truncation=True)
|
54 |
-
|
|
|
55 |
attribution, reconstructed_text = get_attribution(text=text, tokenizer=tokenizer, model_outputs=outputs, inputs=inputs, k=10)
|
56 |
logits = outputs[0][0]
|
57 |
out = logits.detach().cpu()[0]
|
@@ -93,7 +102,12 @@ def find_related_summaries(text):
|
|
93 |
|
94 |
|
95 |
def run(text, related_discharges=False):
|
|
|
|
|
|
|
|
|
96 |
# initial drg results
|
|
|
97 |
model_results = get_model_results(text=text)
|
98 |
drg_code = model_results['class']
|
99 |
|
|
|
4 |
import torch
|
5 |
|
6 |
from model import MimicTransformer
|
7 |
+
from utils import load_rule, get_attribution, get_diseases, get_drg_link, get_icd_annotations, visualize_attn, clean_text
|
8 |
from transformers import AutoTokenizer, AutoModel, set_seed, pipeline
|
9 |
|
10 |
+
torch.manual_seed(0)
|
11 |
+
set_seed(34)
|
12 |
+
if torch.cuda.is_available():
|
13 |
+
torch.cuda.manual_seed_all(0)
|
14 |
+
|
15 |
model_path = 'checkpoint_0_9113.bin'
|
16 |
related_tensor = torch.load('discharge_embeddings.pt')
|
17 |
all_summaries = pd.read_csv('all_summaries.csv')['SUMMARIES'].to_list()
|
|
|
21 |
similarity_model.eval()
|
22 |
|
23 |
def read_model(model, path):
|
24 |
+
# model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))
|
25 |
+
state_dict = torch.load(path, map_location='cpu')
|
26 |
+
model.load_state_dict({"model."+k: v for k, v in state_dict.items()}, strict=False)
|
27 |
return model
|
28 |
|
29 |
mimic = MimicTransformer(cutoff=512)
|
|
|
57 |
|
58 |
|
59 |
def get_model_results(text):
|
60 |
+
text = clean_text(text)
|
61 |
inputs = tokenizer(text, return_tensors='pt', padding='max_length', max_length=512, truncation=True)
|
62 |
+
with torch.no_grad():
|
63 |
+
outputs = mimic(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, drg_labels=None)
|
64 |
attribution, reconstructed_text = get_attribution(text=text, tokenizer=tokenizer, model_outputs=outputs, inputs=inputs, k=10)
|
65 |
logits = outputs[0][0]
|
66 |
out = logits.detach().cpu()[0]
|
|
|
102 |
|
103 |
|
104 |
def run(text, related_discharges=False):
|
105 |
+
torch.manual_seed(0)
|
106 |
+
set_seed(34)
|
107 |
+
if torch.cuda.is_available():
|
108 |
+
torch.cuda.manual_seed_all(0)
|
109 |
# initial drg results
|
110 |
+
|
111 |
model_results = get_model_results(text=text)
|
112 |
drg_code = model_results['class']
|
113 |
|
discharge_embeddings.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbc05e83aa36756a35bee2f104e3c3dcc8fb1f26442d89ff52916d7052cd036b
|
3 |
+
size 713869074
|
model.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
|
2 |
from torch.utils.data import DataLoader
|
3 |
from torch.nn import Linear, Module
|
4 |
from typing import Dict, List
|
@@ -6,6 +6,11 @@ from collections import Counter, defaultdict
|
|
6 |
from itertools import chain
|
7 |
import torch
|
8 |
|
|
|
|
|
|
|
|
|
|
|
9 |
class MimicTransformer(Module):
|
10 |
def __init__(self, num_labels=738, tokenizer_name='clinical', cutoff=512):
|
11 |
"""
|
@@ -17,6 +22,7 @@ class MimicTransformer(Module):
|
|
17 |
self.config = AutoConfig.from_pretrained(self.tokenizer_name, num_labels=self.num_labels)
|
18 |
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, config=self.config)
|
19 |
self.model = AutoModelForSequenceClassification.from_pretrained(self.tokenizer_name, config=self.config)
|
|
|
20 |
if 'longformer' in self.tokenizer_name:
|
21 |
self.cutoff = self.model.config.max_position_embeddings
|
22 |
else:
|
|
|
1 |
+
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, set_seed
|
2 |
from torch.utils.data import DataLoader
|
3 |
from torch.nn import Linear, Module
|
4 |
from typing import Dict, List
|
|
|
6 |
from itertools import chain
|
7 |
import torch
|
8 |
|
9 |
+
torch.manual_seed(0)
|
10 |
+
set_seed(34)
|
11 |
+
if torch.cuda.is_available():
|
12 |
+
torch.cuda.manual_seed_all(0)
|
13 |
+
|
14 |
class MimicTransformer(Module):
|
15 |
def __init__(self, num_labels=738, tokenizer_name='clinical', cutoff=512):
|
16 |
"""
|
|
|
22 |
self.config = AutoConfig.from_pretrained(self.tokenizer_name, num_labels=self.num_labels)
|
23 |
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, config=self.config)
|
24 |
self.model = AutoModelForSequenceClassification.from_pretrained(self.tokenizer_name, config=self.config)
|
25 |
+
self.model.eval()
|
26 |
if 'longformer' in self.tokenizer_name:
|
27 |
self.cutoff = self.model.config.max_position_embeddings
|
28 |
else:
|