File size: 1,491 Bytes
fa64206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import wandb
from datasets import load_metric
from transformers import pipeline
import yaml

# Charger la configuration
with open('config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Charger le mod�le fine-tuned
model_name = "results_student"  # Remplacer par le chemin vers le mod�le student
tokenizer_name = "distilbert-base-uncased"

# Configuration de l'�valuation
bleu = load_metric("bleu")
rouge = load_metric("rouge")

# Initialiser wandb
wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity'])

def evaluate_model(model_name, tokenizer_name):
    nlp = pipeline("text-classification", model=model_name, tokenizer=tokenizer_name)
    
    # Simuler des exemples pour l'�valuation
    examples = [
        {"reference": "This is a great movie.", "candidate": "This is a fantastic movie."},
        {"reference": "I love this film.", "candidate": "I enjoy this movie."}
    ]
    
    references = [e["reference"] for e in examples]
    candidates = [nlp(e["candidate"])[0]["label"] for e in examples]
    
    # Calcul des scores BLEU et ROUGE
    bleu_score = bleu.compute(predictions=candidates, references=references)
    rouge_score = rouge.compute(predictions=candidates, references=references)
    
    # Enregistrer les scores sur wandb
    wandb.log({
        "bleu_score": bleu_score,
        "rouge_score": rouge_score
    })

# �valuer les mod�les
evaluate_model(model_name, tokenizer_name)