File size: 4,210 Bytes
d0742f2
 
 
 
 
6905f95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0742f2
 
5a08522
d0742f2
eeb16df
d0742f2
 
 
 
 
 
 
 
 
 
50e9520
d0742f2
 
 
 
 
 
 
 
2318a05
d0742f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import streamlit as st
from annotated_text import annotated_text
import transformers

ENTITY_TO_COLOR = {
    'B-Activity': '#8ef',
    'B-Administration': '#faa',
    'B-Age': '#afa',
    'B-Area': '#fea',
    'B-Biological_attribute': '#8ef',
    'B-Biological_structure': '#faa',
    'B-Clinical_event': '#afa',
    'B-Color': '#fea',
    'B-Coreference': '#8ef',
    'B-Date': '#faa',
    'B-Detailed_description': '#afa',
    'B-Diagnostic_procedure': '#fea',
    'B-Disease_disorder': '#8ef',
    'B-Distance': '#faa',
    'B-Dosage': '#afa',
    'B-Duration': '#fea',
    'B-Family_history': '#8ef',
    'B-Frequency': '#faa',
    'B-Height': '#afa',
    'B-History': '#fea',
    'B-Lab_value': '#8ef',
    'B-Mass': '#faa',
    'B-Medication': '#afa',
    'B-Nonbiological_location': '#fea',
    'B-Occupation': '#8ef',
    'B-Other_entity': '#faa',
    'B-Other_event': '#afa',
    'B-Outcome': '#fea',
    'B-Personal_background': '#8ef',
    'B-Qualitative_concept': '#faa',
    'B-Quantitative_concept': '#afa',
    'B-Severity': '#fea',
    'B-Sex': '#8ef',
    'B-Shape': '#faa',
    'B-Sign_symptom': '#afa',
    'B-Subject': '#fea',
    'B-Texture': '#8ef',
    'B-Therapeutic_procedure': '#faa',
    'B-Time': '#afa',
    'B-Volume': '#fea',
    'B-Weight': '#8ef',
    'I-Activity': '#faa',
    'I-Administration': '#afa',
    'I-Age': '#fea',
    'I-Area': '#8ef',
    'I-Biological_attribute': '#faa',
    'I-Biological_structure': '#afa',
    'I-Clinical_event': '#fea',
    'I-Color': '#8ef',
    'I-Coreference': '#faa',
    'I-Date': '#afa',
    'I-Detailed_description': '#fea',
    'I-Diagnostic_procedure': '#8ef',
    'I-Disease_disorder': '#faa',
    'I-Distance': '#afa',
    'I-Dosage': '#fea',
    'I-Duration': '#8ef',
    'I-Family_history': '#faa',
    'I-Frequency': '#afa',
    'I-Height': '#fea',
    'I-History': '#8ef',
    'I-Lab_value': '#faa',
    'I-Mass': '#afa',
    'I-Medication': '#fea',
    'I-Nonbiological_location': '#8ef',
    'I-Occupation': '#faa',
    'I-Other_entity': '#afa',
    'I-Other_event': '#fea',
    'I-Outcome': '#8ef',
    'I-Personal_background': '#faa',
    'I-Qualitative_concept': '#afa',
    'I-Quantitative_concept': '#fea',
    'I-Severity': '#8ef',
    'I-Shape': '#faa',
    'I-Sign_symptom': '#afa',
    'I-Subject': '#fea',
    'I-Texture': '#8ef',
    'I-Therapeutic_procedure': '#faa',
    'I-Time': '#afa',
    'I-Volume': '#fea',
    'I-Weight': '#8ef',
    'O': '#000'
}

@st.cache_data
def get_pipe():
    model_name = "nassga/nassGanBioMedical"
    model = transformers.AutoModelForTokenClassification.from_pretrained(model_name)
    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
    pipe = transformers.pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
    return pipe

def parse_text(text, prediction):
    start = 0
    parsed_text = []
    for p in prediction:
        parsed_text.append(text[start:p["start"]])
        parsed_text.append((p["word"], p["entity_group"], ENTITY_TO_COLOR.get(p["entity_group"], "#000")))
        start = p["end"]
    parsed_text.append(text[start:])
    return parsed_text

st.set_page_config(page_title="Named Entity Recognition")
st.title("Named Entity Recognition")
st.write("Type text into the text box and then press 'Predict' to get the named entities.")

default_text = "A 53-year-old French woman with a previous diagnosis of Crohn's disease was admitted to the University Hospital in 2016 due to persistent chronic diarrhea, with an average of 4 stools per day during daytime, without associated vomiting, abdominal pain, or fever. She reported significant weight loss over the past 6 months.Initial laboratory tests showed normal results, while serum pancreatic enzyme levels were mildly elevated."

text = st.text_area('Enter text here:', value=default_text)
submit = st.button('Predict')

with st.spinner("Loading model..."):
    pipe = get_pipe()

if (submit and len(text.strip()) > 0) or len(text.strip()) > 0:

    prediction = pipe(text)

    parsed_text = parse_text(text, prediction)

    st.header("Prediction:")
    annotated_text(*parsed_text)

    st.header('Raw values:')
    st.json(prediction)