emiliosheinz commited on
Commit
acf0ee9
1 Parent(s): 1bdf753

add streamlit

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -1,22 +1,30 @@
 
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
 
 
3
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
4
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")
5
 
6
- # example sentences
7
- sentence1 = "O Brasil é o maior país da América do Sul"
8
- sentence2 = "A Argentina é o segundo maior país da América do Sul"
9
-
10
- # tokenize the sentences
11
- inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
12
-
13
- # get the output logits for the sentence pair classification task
14
- outputs = model(**inputs).logits
15
-
16
- # calculate the softmax probabilities for the two classes (similar or dissimilar)
17
- probs = outputs.softmax(dim=1)
18
-
19
- # the probability of the sentences being similar is the second element of the output array
20
- similarity_score = probs[0][1].item()
21
-
22
- print("Similarity score:", similarity_score)
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
 
4
+ # load the pre-trained model and tokenizer
5
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
6
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")
7
 
8
+ # define the Streamlit app
9
+ def app():
10
+ # set the app title
11
+ st.title("Sentence Similarity Checker")
12
+
13
+ # get the input sentences from the user
14
+ sentence1 = st.text_input("Enter the first sentence:")
15
+ sentence2 = st.text_input("Enter the second sentence:")
16
+
17
+ # check if both sentences are not empty
18
+ if sentence1 and sentence2:
19
+ # tokenize the sentences and get the output logits for the sentence pair classification task
20
+ inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
21
+ outputs = model(**inputs).logits
22
+
23
+ # calculate the softmax probabilities for the two classes (similar or dissimilar)
24
+ probs = outputs.softmax(dim=1)
25
+
26
+ # the probability of the sentences being similar is the second element of the output array
27
+ similarity_score = probs[0][1].item()
28
+
29
+ # display the similarity score to the user
30
+ st.write("Similarity score:", similarity_score)