from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased") model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased") # example sentences sentence1 = "O Brasil é o maior país da América do Sul" sentence2 = "A Argentina é o segundo maior país da América do Sul" # tokenize the sentences inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt") # get the output logits for the sentence pair classification task outputs = model(**inputs).logits # calculate the softmax probabilities for the two classes (similar or dissimilar) probs = outputs.softmax(dim=1) # the probability of the sentences being similar is the second element of the output array similarity_score = probs[0][1].item() print("Similarity score:", similarity_score)