George-Ogden commited on
Commit
1f99ce2
1 Parent(s): 202717b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +54 -0
README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - bookcorpus
5
+ - wikipedia
6
+ language:
7
+ - en
8
+ metrics:
9
+ - glue
10
+ pipeline_tag: text-classification
11
+ ---
12
+ from transformers import (
13
+ default_data_collator,
14
+ AutoTokenizer,
15
+ AutoModelForSequenceClassification,
16
+ Trainer,
17
+ )
18
+ from datasets import load_dataset
19
+
20
+ import functools
21
+
22
+ from utils import compute_metrics, preprocess_function
23
+
24
+ model_name = "George-Ogden/bert-large-cased-finetuned-mnli"
25
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
26
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
27
+ trainer = Trainer(
28
+ model=model,
29
+ eval_dataset="mnli",
30
+ tokenizer=tokenizer,
31
+ compute_metrics=compute_metrics,
32
+ data_collator=default_data_collator,
33
+ )
34
+
35
+ raw_datasets = load_dataset(
36
+ "glue",
37
+ "mnli",
38
+ ).map(functools.partial(preprocess_function, tokenizer), batched=True)
39
+
40
+ tasks = ["mnli", "mnli-mm"]
41
+ eval_datasets = [
42
+ raw_datasets["validation_matched"],
43
+ raw_datasets["validation_mismatched"],
44
+ ]
45
+
46
+ for layers in reversed(range(model.num_layers + 1)):
47
+ for eval_dataset, task in zip(eval_datasets, tasks):
48
+ metrics = trainer.evaluate(eval_dataset=eval_dataset)
49
+ metrics["eval_samples"] = len(eval_dataset)
50
+
51
+ if task == "mnli-mm":
52
+ metrics = {k + "_mm": v for k, v in metrics.items()}
53
+
54
+ trainer.log_metrics(metrics)