|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
|
|
model_name = "BAAI/bge-reranker-v2-m3" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
query = "What is the capital of France?" |
|
candidates = [ |
|
"Paris is the capital of France.", |
|
"Berlin is the capital of Germany.", |
|
"Madrid is the capital of Spain." |
|
] |
|
|
|
|
|
scores = [] |
|
for candidate in candidates: |
|
inputs = tokenizer(query, candidate, return_tensors="pt", truncation=True) |
|
with torch.no_grad(): |
|
logits = model(**inputs).logits |
|
scores.append(logits.item()) |
|
|
|
|
|
ranked_candidates = [x for _, x in sorted(zip(scores, candidates), reverse=True)] |
|
|
|
|
|
for i, candidate in enumerate(ranked_candidates): |
|
print(f"Rank {i + 1}: {candidate} (Score: {scores[i]})") |
|
|