File size: 2,109 Bytes
5020dd3
 
 
ccb91a2
5020dd3
 
 
 
 
 
ccb91a2
5020dd3
ccb91a2
caae238
5020dd3
 
 
 
 
 
 
 
ccb91a2
 
 
 
 
 
5020dd3
 
ccb91a2
5020dd3
 
 
 
 
 
 
ccb91a2
5020dd3
 
ccb91a2
5020dd3
 
 
 
 
 
 
ccb91a2
5020dd3
ccb91a2
 
 
 
 
 
 
 
5020dd3
 
 
ccb91a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
import joblib
from sentence_transformers import SentenceTransformer
import numpy as np

# Load the pre-trained embedding model
@st.cache_resource  # Cache the embedding model to save loading time
def load_embedding_model():
    return SentenceTransformer('neuml/pubmedbert-base-embeddings')

# Load the multilabel classification model
@st.cache_resource  # Cache the loaded model
def load_multilabel_model():
    with open("Int_MLC.pkl", "rb") as file:
        return joblib.load(file)

# Embed text
def get_embeddings(title, abstract, embedding_model):
    # Concatenate title and abstract
    combined_text = title + " " + abstract
    return embedding_model.encode(combined_text)

# Map predicted binary outputs to labels
LABELS = ["device", "screening", "drug", "surgery", "imaging", "telemedicine"]

def decode_predictions(predictions):
    return [label for label, pred in zip(LABELS, predictions) if pred == 1]

# Main Streamlit app
def main():
    st.title("Multilabel Classifier for Titles and Abstracts")
    
    # Input fields
    title = st.text_input("Enter the Title:")
    abstract = st.text_area("Enter the Abstract:")
    
    # Load models
    embedding_model = load_embedding_model()
    multilabel_model = load_multilabel_model()
    
    # Predict button
    if st.button("Predict Labels"):
        if title.strip() == "" or abstract.strip() == "":
            st.error("Both Title and Abstract are required!")
        else:
            # Get embeddings
            embeddings = get_embeddings(title, abstract, embedding_model)
            
            # Make prediction
            predictions = multilabel_model.predict([embeddings])[0]  # Input should be a 2D array
            
            # Decode predictions
            predicted_labels = decode_predictions(predictions)
            
            # Display results
            if predicted_labels:
                st.success(f"The predicted labels are: {', '.join(predicted_labels)}")
            else:
                st.warning("No relevant labels were predicted.")

if __name__ == "__main__":
    main()