OnurSahh commited on
Commit
ecda826
1 Parent(s): cffc53f

Upload NLPEvaluation_SIGMOID.py

Browse files
Files changed (1) hide show
  1. NLPEvaluation_SIGMOID.py +88 -0
NLPEvaluation_SIGMOID.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+
5
+ def auth(username, password):
6
+ if username == "SIGMOID" and password == "2A4S39H7E7GR1172":
7
+ return True
8
+ else:
9
+ return False
10
+
11
+
12
+ def predict(df):
13
+ # LOAD TRAINER AND TOKENIZER AND TOKENIZE DATA
14
+ from transformers import AutoModel, AutoTokenizer, TrainingArguments, Trainer, BertForSequenceClassification
15
+ from datasets import Dataset
16
+ import numpy as np
17
+ model = BertForSequenceClassification.from_pretrained("sentiment_model", num_labels = 6)
18
+ tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
19
+
20
+ df_ids = df.pop('id')
21
+ test_dataset = Dataset.from_dict(df)
22
+
23
+ from transformers import AutoTokenizer
24
+
25
+ def tokenize_function(examples):
26
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
27
+
28
+ tokenized_test_datasets = test_dataset.map(tokenize_function, batched=True)
29
+
30
+ trainer = Trainer(
31
+ model=model, # the instantiated Transformers model to be trained
32
+ )
33
+
34
+ # PREDICT TEXT VALUES USING LOADED MODEL AND EDIT DATAFRAME'S OFFANSIVE AND TARGET COLUMNS
35
+ preds = trainer.predict(tokenized_test_datasets)
36
+ max_indices = np.argmax(preds[0], axis=1)
37
+
38
+ df['offansive'] = None
39
+ df['target'] = None
40
+
41
+ for i in range(len(df)):
42
+ if max_indices[i] == 0:
43
+ df['offansive'][i] = 1
44
+ df["target"][i] = 'INSULT'
45
+
46
+ elif max_indices[i] == 1:
47
+ df['offansive'][i] = 1
48
+ df["target"][i] = 'RACIST'
49
+
50
+ elif max_indices[i] == 2:
51
+ df['offansive'][i] = 1
52
+ df["target"][i] = 'SEXIST'
53
+
54
+ elif max_indices[i] == 3:
55
+ df['offansive'][i] = 1
56
+ df["target"][i] = 'PROFANITY'
57
+
58
+ elif max_indices[i] == 4:
59
+ df['offansive'][i] = 0
60
+ df["target"][i] = 'OTHER'
61
+
62
+ elif max_indices[i] == 5:
63
+ df['offansive'][i] = 1
64
+ df["target"][i] = 'OTHER'
65
+
66
+ df['id'] = df_ids
67
+ # *********** END ***********
68
+ return df
69
+
70
+ def get_file(file):
71
+ output_file = "output_SIGMOID.csv"
72
+
73
+ # For windows users, replace path seperator
74
+ file_name = file.name.replace("\\", "/")
75
+
76
+ df = pd.read_csv(file_name, sep="|")
77
+
78
+ predict(df)
79
+ df.to_csv(output_file, index=False, sep="|")
80
+ return (output_file)
81
+
82
+
83
+
84
+ # Launch the interface with user password
85
+ iface = gr.Interface(get_file, "file", "file")
86
+
87
+ if __name__ == "__main__":
88
+ iface.launch(share=True, auth=auth)