cnmoro
/

BertMini-Reranker-EnPt

Safetensors

Portuguese

English

bert

Model card Files Files and versions Community

cnmoro commited on Dec 4, 2024

Commit

207b59b

•

1 Parent(s): 644b43d

Create README.md

Browse files

Files changed (1) hide show

README.md +116 -0

README.md ADDED Viewed

	@@ -0,0 +1,116 @@

+---
+language:
+- pt
+- en
+license: mit
+base_model:
+- google/bert_uncased_L-4_H-256_A-4
+---
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+model_id = "cnmoro/BertMini-Reranker-EnPt"
+model = AutoModelForSequenceClassification.from_pretrained(
+    model_id,
+    num_labels=2
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+template = "Query: {query}\nSentence: {document}"
+def rank(query, documents, normalize_scores=True):
+    texts = [template.format(query=query, document=document) for document in documents]
+    inputs = tokenizer(
+        texts,
+        add_special_tokens=True,
+        max_length=512,
+        truncation=True,
+        padding=True,
+        return_tensors="pt",
+    )
+    input_ids = inputs["input_ids"].to(device)
+    attention_mask = inputs["attention_mask"].to(device)
+    model.eval()
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)
+        # Get the predicted classes and confidence scores
+        predicted_classes = torch.argmax(probabilities, dim=1).tolist()
+        confidences = probabilities.max(dim=1).values.tolist()
+    # Construct the results
+    results = [
+        {"prediction": pred, "confidence": conf}
+        for pred, conf in zip(predicted_classes, confidences)
+    ]
+    final_results = []
+    for document, result in zip(documents, results):
+        # If the prediction is 0, then get the score as 1 - confidence
+        if result['prediction'] == 0:
+            result['confidence'] = 1 - result['confidence']
+        final_results.append((document, result['confidence']))
+    # Sort by the confidence score, descending
+    sorted_results = sorted(final_results, key=lambda x: x[1], reverse=True)
+    if normalize_scores:
+        total_score = sum([result[1] for result in sorted_results])
+        sorted_results = [(result[0], result[1] / total_score) for result in sorted_results]
+    return sorted_results
+# Sample - 1
+query = "O que é o Pantanal?"
+documents = [
+    "É um dos ecossistemas mais ricos em biodiversidade do mundo, abrigando uma grande variedade de espécies animais e vegetais.",
+    "Sua beleza natural, com rios e lagos interligados, atrai turistas de todo o mundo.",
+    "O Pantanal sofre com impactos ambientais, como a exploração mineral e o desmatamento.",
+    "O Pantanal é uma extensa planície alagável localizada na América do Sul, principalmente no Brasil, mas também em partes da Bolívia e Paraguai.",
+    "É um local com importância histórica e cultural para as populações locais.",
+    "O Pantanal é um importante habitat para diversas espécies de animais, inclusive aves migratórias."
+]
+rank(query, documents)
+# [('O Pantanal é uma extensa planície alagável localizada na América do Sul, principalmente no Brasil, mas também em partes da Bolívia e Paraguai.',
+#   0.36703487634136817),
+#  ('O Pantanal é um importante habitat para diversas espécies de animais, inclusive aves migratórias.',
+#   0.36591911362645174),
+#  ('O Pantanal sofre com impactos ambientais, como a exploração mineral e o desmatamento.',
+#   0.13708830048931145),
+#  ('É um local com importância histórica e cultural para as populações locais.',
+#   0.0718928987255767),
+#  ('Sua beleza natural, com rios e lagos interligados, atrai turistas de todo o mundo.',
+#   0.02968024567026795),
+#  ('É um dos ecossistemas mais ricos em biodiversidade do mundo, abrigando uma grande variedade de espécies animais e vegetais.',
+#   0.02838456514702401)]
+# Sample - 2
+query = "What is the speed of light?"
+documents = [
+    "Isaac Newton's laws of motion and gravity laid the groundwork for classical mechanics.",
+    "The theory of relativity, proposed by Albert Einstein, has revolutionized our understanding of space, time, and gravity.",
+    "The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.",
+    "The speed of light in a vacuum is approximately 299,792 kilometers per second (km/s), or about 186,282 miles per second.",
+    "Light can be described as both a wave and a particle, a concept known as wave-particle duality."
+]
+rank(query, documents)
+# [('The speed of light in a vacuum is approximately 299,792 kilometers per second (km/s), or about 186,282 miles per second.',
+#   0.33902196713184685),
+#  ("Isaac Newton's laws of motion and gravity laid the groundwork for classical mechanics.",
+#   0.2309855191720416),
+#  ('The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.',
+#   0.20293087063400417),
+#  ('Light can be described as both a wave and a particle, a concept known as wave-particle duality.',
+#   0.188980879354878),
+#  ('The theory of relativity, proposed by Albert Einstein, has revolutionized our understanding of space, time, and gravity.',
+#   0.03808076370722937)]
+```