GenAI_project / scripts /evaluate.py
jaothan's picture
Upload 24 files
fa64206 verified
import wandb
from datasets import load_metric
from transformers import pipeline
import yaml
# Charger la configuration
with open('config/config.yaml', 'r') as f:
config = yaml.safe_load(f)
# Charger le mod�le fine-tuned
model_name = "results_student" # Remplacer par le chemin vers le mod�le student
tokenizer_name = "distilbert-base-uncased"
# Configuration de l'�valuation
bleu = load_metric("bleu")
rouge = load_metric("rouge")
# Initialiser wandb
wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity'])
def evaluate_model(model_name, tokenizer_name):
nlp = pipeline("text-classification", model=model_name, tokenizer=tokenizer_name)
# Simuler des exemples pour l'�valuation
examples = [
{"reference": "This is a great movie.", "candidate": "This is a fantastic movie."},
{"reference": "I love this film.", "candidate": "I enjoy this movie."}
]
references = [e["reference"] for e in examples]
candidates = [nlp(e["candidate"])[0]["label"] for e in examples]
# Calcul des scores BLEU et ROUGE
bleu_score = bleu.compute(predictions=candidates, references=references)
rouge_score = rouge.compute(predictions=candidates, references=references)
# Enregistrer les scores sur wandb
wandb.log({
"bleu_score": bleu_score,
"rouge_score": rouge_score
})
# �valuer les mod�les
evaluate_model(model_name, tokenizer_name)