GVAL-leaderboard / evaluation.py
Peiyan's picture
Upload 2 files
085ecee verified
raw
history blame
1.25 kB
# evaluation.py
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score
def evaluate_model(model_path, test_data):
"""
Evaluates the model based on test data and returns the score.
This function assumes a classification task and requires the test data in a specific format.
Args:
- model_path: Path to the model file (e.g., PyTorch model file)
- test_data: A list of tuples (text, label) for evaluation
Returns:
- score: Evaluation score (e.g., accuracy)
"""
# Load model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()
all_preds = []
all_labels = []
for text, label in test_data:
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
logits = outputs.logits
predictions = torch.argmax(logits, dim=-1).item()
all_preds.append(predictions)
all_labels.append(label)
# Calculate accuracy (or any other metric)
accuracy = accuracy_score(all_labels, all_preds)
return accuracy