lunadebruyne commited on
Commit
9a74e03
·
1 Parent(s): f625e51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  import torch
3
  import numpy as np
4
 
 
 
 
5
  from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
6
  from transformers import TrainingArguments, Trainer
7
 
@@ -12,6 +15,8 @@ description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse
12
  description2 = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
13
 
14
  inference_modelpath = "model/checkpoint-128"
 
 
15
  output_dir = "model"
16
  model_config = {
17
  "model_weights": "pdelobelle/robbert-v2-dutch-base",
@@ -24,6 +29,7 @@ model_config = {
24
  tokenizer = AutoTokenizer.from_pretrained(model_config["model_weights"])
25
  model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
26
 
 
27
  # Function for encoding (tokenizing) data
28
  def encode_data(data):
29
  text = data["text"]
@@ -52,6 +58,7 @@ trainer = Trainer(
52
  model = model,
53
  args = test_args)
54
 
 
55
  def inference_dataset(file_object):
56
  #input_file = open(file_object.name, 'r')
57
  input_file = file_object
@@ -76,6 +83,32 @@ def inference_dataset(file_object):
76
  f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
77
  f.close()
78
  return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  def what_happened(text, file_object, option_list):
81
  if file_object:
 
2
  import torch
3
  import numpy as np
4
 
5
+ import pandas as pd
6
+ from tqdm import tqdm
7
+
8
  from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
9
  from transformers import TrainingArguments, Trainer
10
 
 
15
  description2 = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
16
 
17
  inference_modelpath = "model/checkpoint-128"
18
+
19
+ """
20
  output_dir = "model"
21
  model_config = {
22
  "model_weights": "pdelobelle/robbert-v2-dutch-base",
 
29
  tokenizer = AutoTokenizer.from_pretrained(model_config["model_weights"])
30
  model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
31
 
32
+
33
  # Function for encoding (tokenizing) data
34
  def encode_data(data):
35
  text = data["text"]
 
58
  model = model,
59
  args = test_args)
60
 
61
+
62
  def inference_dataset(file_object):
63
  #input_file = open(file_object.name, 'r')
64
  input_file = file_object
 
83
  f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
84
  f.close()
85
  return output
86
+ """
87
+
88
+ def inference_dataset(file_object):
89
+ tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
90
+ model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
91
+ data_path = open(file_object, 'r')
92
+ df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
93
+ ids = df["id"].tolist()
94
+ texts = df["text"].tolist()
95
+ preds = []
96
+ for text in tqdm(texts): # progressbar
97
+ inputs = tokenizer(text, return_tensors="pt")
98
+ with torch.no_grad(): # run model
99
+ logits = model(**inputs).logits
100
+ predicted_class_id = logits.argmax().item()
101
+ prediction = model.config.id2label[predicted_class_id]
102
+ preds.append(prediction)
103
+ predictions_content = list(zip(ids, texts, preds))
104
+ # write predictions to file
105
+ output = "output.txt"
106
+ f = open(output, 'w')
107
+ f.write("id\ttext\tprediction\n")
108
+ for line in predictions_content:
109
+ f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
110
+ f.close()
111
+ return output
112
 
113
  def what_happened(text, file_object, option_list):
114
  if file_object: