emiliosheinz's picture
add streamlit
acf0ee9
raw
history blame
1.34 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")
# define the Streamlit app
def app():
# set the app title
st.title("Sentence Similarity Checker")
# get the input sentences from the user
sentence1 = st.text_input("Enter the first sentence:")
sentence2 = st.text_input("Enter the second sentence:")
# check if both sentences are not empty
if sentence1 and sentence2:
# tokenize the sentences and get the output logits for the sentence pair classification task
inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
outputs = model(**inputs).logits
# calculate the softmax probabilities for the two classes (similar or dissimilar)
probs = outputs.softmax(dim=1)
# the probability of the sentences being similar is the second element of the output array
similarity_score = probs[0][1].item()
# display the similarity score to the user
st.write("Similarity score:", similarity_score)