Spaces:
Sleeping
Sleeping
import wandb | |
from datasets import load_metric | |
from transformers import pipeline | |
import yaml | |
# Charger la configuration | |
with open('config/config.yaml', 'r') as f: | |
config = yaml.safe_load(f) | |
# Charger le mod�le fine-tuned | |
model_name = "results_student" # Remplacer par le chemin vers le mod�le student | |
tokenizer_name = "distilbert-base-uncased" | |
# Configuration de l'�valuation | |
bleu = load_metric("bleu") | |
rouge = load_metric("rouge") | |
# Initialiser wandb | |
wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity']) | |
def evaluate_model(model_name, tokenizer_name): | |
nlp = pipeline("text-classification", model=model_name, tokenizer=tokenizer_name) | |
# Simuler des exemples pour l'�valuation | |
examples = [ | |
{"reference": "This is a great movie.", "candidate": "This is a fantastic movie."}, | |
{"reference": "I love this film.", "candidate": "I enjoy this movie."} | |
] | |
references = [e["reference"] for e in examples] | |
candidates = [nlp(e["candidate"])[0]["label"] for e in examples] | |
# Calcul des scores BLEU et ROUGE | |
bleu_score = bleu.compute(predictions=candidates, references=references) | |
rouge_score = rouge.compute(predictions=candidates, references=references) | |
# Enregistrer les scores sur wandb | |
wandb.log({ | |
"bleu_score": bleu_score, | |
"rouge_score": rouge_score | |
}) | |
# �valuer les mod�les | |
evaluate_model(model_name, tokenizer_name) | |