P8_PPS / app.py
CASY85's picture
Update app.py
caae238 verified
raw
history blame
2.11 kB
import streamlit as st
import joblib
from sentence_transformers import SentenceTransformer
import numpy as np
# Load the pre-trained embedding model
@st.cache_resource # Cache the embedding model to save loading time
def load_embedding_model():
return SentenceTransformer('neuml/pubmedbert-base-embeddings')
# Load the multilabel classification model
@st.cache_resource # Cache the loaded model
def load_multilabel_model():
with open("Int_MLC.pkl", "rb") as file:
return joblib.load(file)
# Embed text
def get_embeddings(title, abstract, embedding_model):
# Concatenate title and abstract
combined_text = title + " " + abstract
return embedding_model.encode(combined_text)
# Map predicted binary outputs to labels
LABELS = ["device", "screening", "drug", "surgery", "imaging", "telemedicine"]
def decode_predictions(predictions):
return [label for label, pred in zip(LABELS, predictions) if pred == 1]
# Main Streamlit app
def main():
st.title("Multilabel Classifier for Titles and Abstracts")
# Input fields
title = st.text_input("Enter the Title:")
abstract = st.text_area("Enter the Abstract:")
# Load models
embedding_model = load_embedding_model()
multilabel_model = load_multilabel_model()
# Predict button
if st.button("Predict Labels"):
if title.strip() == "" or abstract.strip() == "":
st.error("Both Title and Abstract are required!")
else:
# Get embeddings
embeddings = get_embeddings(title, abstract, embedding_model)
# Make prediction
predictions = multilabel_model.predict([embeddings])[0] # Input should be a 2D array
# Decode predictions
predicted_labels = decode_predictions(predictions)
# Display results
if predicted_labels:
st.success(f"The predicted labels are: {', '.join(predicted_labels)}")
else:
st.warning("No relevant labels were predicted.")
if __name__ == "__main__":
main()