Spaces:

Markins
/

Using_BERT_Models_for_Sequential_Text_Classification_in_Biomedical_Abstracts

Sleeping

App Files Files Community

Using_BERT_Models_for_Sequential_Text_Classification_in_Biomedical_Abstracts / app.py

Markins

Update app.py

8fc63ee about 1 year ago

raw

history blame

No virus

4.13 kB

	import streamlit as st
	import torch
	import spacy
	# from spacy.lang.en import English
	# from utils import spacy_function, make_predictions, example_input

	from Dataset import CustomDataSetManager
	from Embeddings import get_embeddings
	from Model import MachineModel
	from Tokenizer import Tokenizer
	from LabelEncoder import LabelEncoder
	from MakePredictions import make_predictions
	from RandomAbstract import Choose_Random_text

	MODEL_PATH = 'final_model.pt'
	TOKENIZER_PATH = 'tokenizer.json'
	LABEL_ENOCDER_PATH = "label_encoder.json"
	EMBEDDING_FILE_PATH = 'glove.6B.300d.txt'

	@st.cache()
	def create_utils(model_path, tokenizer_path, label_encoder_path, embedding_file_path):
	tokenizer = Tokenizer.load(fp=tokenizer_path)
	label_encoder = LabelEncoder.load(fp=label_encoder_path)
	embedding_matrix = get_embeddings(embedding_file_path, tokenizer, 300)
	model = MachineModel(embedding_dim=300, vocab_size=len(tokenizer), hidden_dim=128, n_layers=3, linear_output=128, num_classes=len(label_encoder), pretrained_embeddings=embedding_matrix)
	model.load_state_dict(torch.load(model_path, map_location='cpu'))
	print(model)
	return model, tokenizer, label_encoder

	def model_prediction(abstract, model, tokenizer, label_encoder):
	objective = ''
	background = ''
	method = ''
	conclusion = ''
	result = ''

	lines, pred = make_predictions(abstract, model, tokenizer, label_encoder)
	# pred, lines = make_predictions(abstract)

	for i, line in enumerate(lines):
	if pred[i] == 'OBJECTIVE':
	objective = objective + line

	elif pred[i] == 'BACKGROUND':
	background = background + line

	elif pred[i] == 'METHODS':
	method = method + line

	elif pred[i] == 'RESULTS':
	result = result + line

	elif pred[i] == 'CONCLUSIONS':
	conclusion = conclusion + line

	return objective, background, method, conclusion, result



	def main():

	st.set_page_config(
	page_title="Using BERT Models for Sequential Text Classification in Biomedical Abstracts",
	page_icon="📄",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	st.title('Using BERT Models for Sequential Text Classification in Biomedical Abstracts 📄 🔥')
	st.caption('A custom NLP classification model based on a transformer that is able to classify and simplify research abstracts and then classify it into smaller parts that can be made more simple and easy to understand')
	st.caption('Developed by Purbayan Majumder, Aman Khan and Rubina Das')
	# creating model, tokenizer and labelEncoder

	col1, col2 = st.columns(2)

	with col1:
	st.write('#### Enter Abstract Here !!')
	abstract = st.text_area(label='', height=200)

	agree = st.checkbox('Show Example Abstract')
	predict = st.button('Extract !')

	if agree:
	example_input = Choose_Random_text()
	st.info(example_input)

	# make prediction button logic
	if predict:
	with col2:
	with st.spinner('Waiting for prediction processing data to the utils....'):
	fetch_model, tokenizer, label_encoder = create_utils(MODEL_PATH, TOKENIZER_PATH, LABEL_ENOCDER_PATH, EMBEDDING_FILE_PATH)
	objective, background, methods, conclusion, result = model_prediction(abstract, fetch_model, tokenizer, label_encoder)

	st.markdown(f'### Objective : ')
	st.info(objective)
	# st.write(f'{objective}')
	st.markdown(f'### Background : ')
	st.info(background)
	# st.write(f'{background}')
	st.markdown(f'### Methods : ')
	st.info(methods)
	# st.write(f'{methods}')
	st.markdown(f'### Result : ')
	st.info(result)
	# st.write(f'{result}')
	st.markdown(f'### Conclusion : ')
	st.info(conclusion)
	# st.write(f'{conclusion}')



	if __name__=='__main__':
	main()