Spaces:

brisklyapp
/

strings-similarity

Runtime error

emiliosheinz commited on Feb 25, 2023

Commit

be8daf6

•

1 Parent(s): 2f5a8f1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import streamlit as st
-import torch
 from transformers import AutoTokenizer, AutoModel
-# load the pre-trained model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
-model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
 # set the app title
 st.title("Brazilian Portuguese Sentence Similarity Checker")
@@ -15,19 +13,9 @@ sentence2 = st.text_input("Enter the second sentence:")
 # check if both sentences are not empty
 if sentence1 and sentence2:
-    # tokenize the sentences and get their IDs
-    input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt')
-    # pass the IDs through the model to get the embeddings
-    with torch.no_grad():
-        embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
-    # check if both sentences have embeddings
-    if embeddings.shape[0] == 2:
-        # calculate the cosine similarity between the embeddings
-        similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item()
-        # display the predicted similarity to the user
-        st.write("Similarity score between the sentences:", similarity)
-    else:
-        st.write("Unable to calculate similarity.")

 import streamlit as st
 from transformers import AutoTokenizer, AutoModel
+# load the pre-trained model
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 # set the app title
 st.title("Brazilian Portuguese Sentence Similarity Checker")
 # check if both sentences are not empty
 if sentence1 and sentence2:
+    embedding_1= model.encode(sentence1, convert_to_tensor=True)
+    embedding_2 = model.encode(sentence2, convert_to_tensor=True)
+    similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
+    st.write("Similarity score between the sentences:", similarity)