File size: 1,341 Bytes
acf0ee9
6b02e3d
 
acf0ee9
6b02e3d
 
 
acf0ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")

# define the Streamlit app
def app():
    # set the app title
    st.title("Sentence Similarity Checker")
    
    # get the input sentences from the user
    sentence1 = st.text_input("Enter the first sentence:")
    sentence2 = st.text_input("Enter the second sentence:")
    
    # check if both sentences are not empty
    if sentence1 and sentence2:
        # tokenize the sentences and get the output logits for the sentence pair classification task
        inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
        outputs = model(**inputs).logits
        
        # calculate the softmax probabilities for the two classes (similar or dissimilar)
        probs = outputs.softmax(dim=1)
        
        # the probability of the sentences being similar is the second element of the output array
        similarity_score = probs[0][1].item()
        
        # display the similarity score to the user
        st.write("Similarity score:", similarity_score)