emiliosheinz commited on
Commit
be8daf6
1 Parent(s): 2f5a8f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -19
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import streamlit as st
2
- import torch
3
  from transformers import AutoTokenizer, AutoModel
4
 
5
- # load the pre-trained model and tokenizer
6
- tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
7
- model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
8
 
9
  # set the app title
10
  st.title("Brazilian Portuguese Sentence Similarity Checker")
@@ -15,19 +13,9 @@ sentence2 = st.text_input("Enter the second sentence:")
15
 
16
  # check if both sentences are not empty
17
  if sentence1 and sentence2:
18
- # tokenize the sentences and get their IDs
19
- input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt')
20
-
21
- # pass the IDs through the model to get the embeddings
22
- with torch.no_grad():
23
- embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
24
 
25
- # check if both sentences have embeddings
26
- if embeddings.shape[0] == 2:
27
- # calculate the cosine similarity between the embeddings
28
- similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item()
29
-
30
- # display the predicted similarity to the user
31
- st.write("Similarity score between the sentences:", similarity)
32
- else:
33
- st.write("Unable to calculate similarity.")
 
1
  import streamlit as st
 
2
  from transformers import AutoTokenizer, AutoModel
3
 
4
+ # load the pre-trained model
5
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
6
 
7
  # set the app title
8
  st.title("Brazilian Portuguese Sentence Similarity Checker")
 
13
 
14
  # check if both sentences are not empty
15
  if sentence1 and sentence2:
16
+ embedding_1= model.encode(sentence1, convert_to_tensor=True)
17
+ embedding_2 = model.encode(sentence2, convert_to_tensor=True)
18
+
19
+ similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
 
 
20
 
21
+ st.write("Similarity score between the sentences:", similarity)