emiliosheinz's picture
create app.py with static string comparison
6b02e3d
raw
history blame
927 Bytes
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")
# example sentences
sentence1 = "O Brasil é o maior país da América do Sul"
sentence2 = "A Argentina é o segundo maior país da América do Sul"
# tokenize the sentences
inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
# get the output logits for the sentence pair classification task
outputs = model(**inputs).logits
# calculate the softmax probabilities for the two classes (similar or dissimilar)
probs = outputs.softmax(dim=1)
# the probability of the sentences being similar is the second element of the output array
similarity_score = probs[0][1].item()
print("Similarity score:", similarity_score)