File size: 5,120 Bytes
79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb f6502eb 79ec5bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
---
language:
- pt
- en
license: mit
base_model:
- google/bert_uncased_L-2_H-128_A-2
---
```python
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
model_id = "cnmoro/BertTiny-Reranker-EnPt"
model = AutoModelForSequenceClassification.from_pretrained(
model_id,
num_labels=2
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
template = "Query: {query}\nSentence: {document}"
def rank(query, documents, normalize_scores=True):
texts = [template.format(query=query, document=document) for document in documents]
inputs = tokenizer(
texts,
add_special_tokens=True,
max_length=512,
truncation=True,
padding=True,
return_tensors="pt",
)
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)
model.eval()
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)
# Get the predicted classes and confidence scores
predicted_classes = torch.argmax(probabilities, dim=1).tolist()
confidences = probabilities.max(dim=1).values.tolist()
# Construct the results
results = [
{"prediction": pred, "confidence": conf}
for pred, conf in zip(predicted_classes, confidences)
]
final_results = []
for document, result in zip(documents, results):
# If the prediction is 0, then get the score as 1 - confidence
if result['prediction'] == 0:
result['confidence'] = 1 - result['confidence']
final_results.append((document, result['confidence']))
# Sort by the confidence score, descending
sorted_results = sorted(final_results, key=lambda x: x[1], reverse=True)
if normalize_scores:
total_score = sum([result[1] for result in sorted_results])
sorted_results = [(result[0], result[1] / total_score) for result in sorted_results]
return sorted_results
# Sample - 1
query = "O que é o Pantanal?"
documents = [
"É um dos ecossistemas mais ricos em biodiversidade do mundo, abrigando uma grande variedade de espécies animais e vegetais.",
"Sua beleza natural, com rios e lagos interligados, atrai turistas de todo o mundo.",
"O Pantanal sofre com impactos ambientais, como a exploração mineral e o desmatamento.",
"O Pantanal é uma extensa planície alagável localizada na América do Sul, principalmente no Brasil, mas também em partes da Bolívia e Paraguai.",
"É um local com importância histórica e cultural para as populações locais.",
"O Pantanal é um importante habitat para diversas espécies de animais, inclusive aves migratórias."
]
rank(query, documents)
# [('O Pantanal é uma extensa planície alagável localizada na América do Sul, principalmente no Brasil, mas também em partes da Bolívia e Paraguai.',
# 0.34217479171361365),
# ('O Pantanal é um importante habitat para diversas espécies de animais, inclusive aves migratórias.',
# 0.3310142292228493),
# ('O Pantanal sofre com impactos ambientais, como a exploração mineral e o desmatamento.',
# 0.2649093801652631),
# ('É um local com importância histórica e cultural para as populações locais.',
# 0.027501075607910826),
# ('É um dos ecossistemas mais ricos em biodiversidade do mundo, abrigando uma grande variedade de espécies animais e vegetais.',
# 0.02122344629201432),
# ('Sua beleza natural, com rios e lagos interligados, atrai turistas de todo o mundo.',
# 0.013177076998348802)]
# Sample - 2
query = "What is the speed of light?"
documents = [
"Isaac Newton's laws of motion and gravity laid the groundwork for classical mechanics.",
"The theory of relativity, proposed by Albert Einstein, has revolutionized our understanding of space, time, and gravity.",
"The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.",
"The speed of light in a vacuum is approximately 299,792 kilometers per second (km/s), or about 186,282 miles per second.",
"Light can be described as both a wave and a particle, a concept known as wave-particle duality."
]
rank(query, documents)
# [('The speed of light in a vacuum is approximately 299,792 kilometers per second (km/s), or about 186,282 miles per second.',
# 0.2666552015232382),
# ('The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.',
# 0.25073310834800405),
# ('Light can be described as both a wave and a particle, a concept known as wave-particle duality.',
# 0.23357900324953587),
# ('The theory of relativity, proposed by Albert Einstein, has revolutionized our understanding of space, time, and gravity.',
# 0.1280649276771122),
# ("Isaac Newton's laws of motion and gravity laid the groundwork for classical mechanics.",
# 0.12096775920210973)]
``` |