NorGLM commited on
Commit
192a496
1 Parent(s): 7cf0ad9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +92 -0
README.md CHANGED
@@ -1,3 +1,95 @@
1
  ---
2
  license: cc-by-nc-sa-4.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-nc-sa-4.0
3
+ language:
4
+ - 'no'
5
  ---
6
+
7
+ # Model Card
8
+
9
+ NorGPT-369M-NO-BoolQ-peft is trained on top of [NorGPT-369M](https://huggingface.co/NorGLM/NorGPT-369M) model on [NO-BoolQ](https://huggingface.co/datasets/NorGLM/NO-BoolQ) dataset.
10
+
11
+ Data format:
12
+ ```
13
+ input: {passage}[SEP]{question}
14
+ label: {True, False} -> {1,0}
15
+ ```
16
+
17
+ ## Run the Model
18
+ ```python
19
+ from peft import PeftModel, PeftConfig
20
+ from transformers import AutoModelForCausalLM, AutoTokenizer
21
+ import torch
22
+
23
+ torch_device = "cuda" if torch.cuda.is_available() else "cpu"
24
+
25
+ source_model_id = "NorGLM/NorGPT-369M"
26
+ peft_model_id = "NorGLM/NorGPT-369M-NO-BoolQ-peft"
27
+
28
+ config = PeftConfig.from_pretrained(peft_model_id)
29
+ model = AutoModelForCausalLM.from_pretrained(source_model_id, device_map='balanced')
30
+
31
+ tokenizer_max_len = 2048
32
+ tokenizer_config = {'pretrained_model_name_or_path': source_model_id,
33
+ 'max_len': tokenizer_max_len}
34
+ tokenizer = tokenizer = AutoTokenizer.from_pretrained(**tokenizer_config)
35
+ tokenizer.pad_token = tokenizer.eos_token
36
+
37
+ model = PeftModel.from_pretrained(model, peft_model_id)
38
+ ```
39
+
40
+ ## Inference Example
41
+ Load the model to evaluate on the validation set:
42
+ ```python
43
+
44
+ def getDataSetFromFiles(df):
45
+ # convert dataset
46
+ df["text"] = df[["passage", "question"]].apply(lambda x: " [SEP] ".join(x.astype(str)), axis =1)
47
+ df = df.drop(["idx", "passage", "question"], axis=1)
48
+ #df['label'] = df['label'].replace({1:'contradiction', -1:'entailment', 0:'neutral'})
49
+ df["label"] = df.label.map({True: 1, False: 0})
50
+ return Dataset.from_pandas(df)
51
+
52
+ print("--LOADING EVAL DATAS---")
53
+ eval_data = load_dataset("NorGLM/NO-BoolQ", data_files="val.jsonl")
54
+ eval_data = getDataSetFromFiles(eval_data["train"].to_pandas())
55
+
56
+ print("--MAKING PREDICTIONS---")
57
+ model.eval()
58
+
59
+ y_true = []
60
+ y_pred = []
61
+ count = 0
62
+
63
+ for data in eval_data:
64
+ count = count + 1
65
+ if count % 100 == 0:
66
+ print(count)
67
+ inputs = tokenizer(data['text'], return_tensors="pt").to(torch_device)
68
+
69
+ with torch.no_grad():
70
+ logits = model(**inputs).logits
71
+ #print(logits)
72
+
73
+ predicted_class_id = logits.argmax().item()
74
+
75
+ y_true.append(data['label'])
76
+ y_pred.append(predicted_class_id)
77
+
78
+ print(y_pred)
79
+
80
+ print(f"Lenght of true_values: {len(y_true)}")
81
+ print(f"Lenght of predicted_values: {len(y_pred)}")
82
+
83
+ y_true = np.array(y_true)
84
+ y_pred = np.array(y_pred)
85
+
86
+ F_score = f1_score(y_true, y_pred, average="macro")
87
+ print(f"F1 score: {F_score}")
88
+
89
+ accuracy = accuracy_score(y_true, y_pred)
90
+ print(f"Accuracy: {accuracy}")
91
+
92
+ ```
93
+
94
+ ## Note
95
+ More training details will be released soon!