Markins's picture
Update app.py
8fc63ee
raw
history blame
No virus
4.13 kB
import streamlit as st
import torch
import spacy
# from spacy.lang.en import English
# from utils import spacy_function, make_predictions, example_input
from Dataset import CustomDataSetManager
from Embeddings import get_embeddings
from Model import MachineModel
from Tokenizer import Tokenizer
from LabelEncoder import LabelEncoder
from MakePredictions import make_predictions
from RandomAbstract import Choose_Random_text
MODEL_PATH = 'final_model.pt'
TOKENIZER_PATH = 'tokenizer.json'
LABEL_ENOCDER_PATH = "label_encoder.json"
EMBEDDING_FILE_PATH = 'glove.6B.300d.txt'
@st.cache()
def create_utils(model_path, tokenizer_path, label_encoder_path, embedding_file_path):
tokenizer = Tokenizer.load(fp=tokenizer_path)
label_encoder = LabelEncoder.load(fp=label_encoder_path)
embedding_matrix = get_embeddings(embedding_file_path, tokenizer, 300)
model = MachineModel(embedding_dim=300, vocab_size=len(tokenizer), hidden_dim=128, n_layers=3, linear_output=128, num_classes=len(label_encoder), pretrained_embeddings=embedding_matrix)
model.load_state_dict(torch.load(model_path, map_location='cpu'))
print(model)
return model, tokenizer, label_encoder
def model_prediction(abstract, model, tokenizer, label_encoder):
objective = ''
background = ''
method = ''
conclusion = ''
result = ''
lines, pred = make_predictions(abstract, model, tokenizer, label_encoder)
# pred, lines = make_predictions(abstract)
for i, line in enumerate(lines):
if pred[i] == 'OBJECTIVE':
objective = objective + line
elif pred[i] == 'BACKGROUND':
background = background + line
elif pred[i] == 'METHODS':
method = method + line
elif pred[i] == 'RESULTS':
result = result + line
elif pred[i] == 'CONCLUSIONS':
conclusion = conclusion + line
return objective, background, method, conclusion, result
def main():
st.set_page_config(
page_title="Using BERT Models for Sequential Text Classification in Biomedical Abstracts",
page_icon="πŸ“„",
layout="wide",
initial_sidebar_state="expanded"
)
st.title('Using BERT Models for Sequential Text Classification in Biomedical Abstracts πŸ“„ πŸ”₯')
st.caption('A custom NLP classification model based on a transformer that is able to classify and simplify research abstracts and then classify it into smaller parts that can be made more simple and easy to understand')
st.caption('Developed by Purbayan Majumder, Aman Khan and Rubina Das')
# creating model, tokenizer and labelEncoder
col1, col2 = st.columns(2)
with col1:
st.write('#### Enter Abstract Here !!')
abstract = st.text_area(label='', height=200)
agree = st.checkbox('Show Example Abstract')
predict = st.button('Extract !')
if agree:
example_input = Choose_Random_text()
st.info(example_input)
# make prediction button logic
if predict:
with col2:
with st.spinner('Waiting for prediction processing data to the utils....'):
fetch_model, tokenizer, label_encoder = create_utils(MODEL_PATH, TOKENIZER_PATH, LABEL_ENOCDER_PATH, EMBEDDING_FILE_PATH)
objective, background, methods, conclusion, result = model_prediction(abstract, fetch_model, tokenizer, label_encoder)
st.markdown(f'### Objective : ')
st.info(objective)
# st.write(f'{objective}')
st.markdown(f'### Background : ')
st.info(background)
# st.write(f'{background}')
st.markdown(f'### Methods : ')
st.info(methods)
# st.write(f'{methods}')
st.markdown(f'### Result : ')
st.info(result)
# st.write(f'{result}')
st.markdown(f'### Conclusion : ')
st.info(conclusion)
# st.write(f'{conclusion}')
if __name__=='__main__':
main()