j-hartmann commited on
Commit
7a7f7dc
1 Parent(s): 5409c41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -23
app.py CHANGED
@@ -3,11 +3,6 @@ import pandas as pd
3
  import numpy as np
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
5
 
6
- # load tokenizer and model, create trainer
7
- model_name = "j-hartmann/emotion-english-distilroberta-base"
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
10
- trainer = Trainer(model=model)
11
 
12
  # summary function - test for single gradio function interfrace
13
  def bulk_function(filename):
@@ -22,11 +17,47 @@ def bulk_function(filename):
22
  def __getitem__(self, idx):
23
  return {k: v[idx] for k, v in self.tokenized_texts.items()}
24
 
25
- # read file lines
26
- with open(filename.name, "r") as f:
27
- lines = f.readlines()
28
- # expects unnamed:0 or index, col name -> strip both
29
- lines_s = [item.split("\n")[0].split(",")[-1] for item in lines][1:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Tokenize texts and create prediction data set
32
  tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
@@ -39,10 +70,13 @@ def bulk_function(filename):
39
  preds = predictions.predictions.argmax(-1)
40
  labels = pd.Series(preds).map(model.config.id2label)
41
  scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
 
 
 
 
42
  # scores raw
43
  temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True))
44
 
45
- # work in progress
46
  # container
47
  anger = []
48
  disgust = []
@@ -54,24 +88,27 @@ def bulk_function(filename):
54
 
55
  # extract scores (as many entries as exist in pred_texts)
56
  for i in range(len(lines_s)):
57
- anger.append(temp[i][0])
58
- disgust.append(temp[i][1])
59
- fear.append(temp[i][2])
60
- joy.append(temp[i][3])
61
- neutral.append(temp[i][4])
62
- sadness.append(temp[i][5])
63
- surprise.append(temp[i][6])
64
 
65
  # define df
66
- df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
67
-
68
  # save results to csv
69
  YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
70
- df.to_csv(YOUR_FILENAME)
71
 
72
  # return dataframe for space output
73
  return YOUR_FILENAME
74
 
75
- gr.Interface(bulk_function, [gr.inputs.File(file_count="single", type="file", label="csv", optional=False),],["file"],
76
- examples=[['emotion_examples.csv'],],
 
 
 
77
  ).launch(debug=True)
 
3
  import numpy as np
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
5
 
 
 
 
 
 
6
 
7
  # summary function - test for single gradio function interfrace
8
  def bulk_function(filename):
 
17
  def __getitem__(self, idx):
18
  return {k: v[idx] for k, v in self.tokenized_texts.items()}
19
 
20
+ # load tokenizer and model, create trainer
21
+ model_name = "j-hartmann/emotion-english-distilroberta-base"
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
23
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
24
+ trainer = Trainer(model=model)
25
+ print(filename, type(filename))
26
+ print(filename.name)
27
+
28
+
29
+ # check type of input file
30
+ if filename.name.split(".")[1] == "csv":
31
+ print("entered")
32
+ # read file, drop index if exists
33
+ df_input = pd.read_csv(filename.name, index_col=False)
34
+ if df_input.columns[0] == "Unnamed: 0":
35
+ df_input = df_input.drop("Unnamed: 0", axis=1)
36
+ elif filename.name.split(".")[1] == "xlsx":
37
+ df_input = pd.read_excel(filename.name, index_col=False)
38
+ # handle Unnamed
39
+ if df_input.columns[0] == "Unnamed: 0":
40
+ df_input = df_input.drop("Unnamed: 0", axis=1)
41
+ else:
42
+ return
43
+
44
+
45
+ # read csv
46
+ # even if index given, drop it
47
+ #df_input = pd.read_csv(filename.name, index_col=False)
48
+ #print("df_input", df_input)
49
+
50
+ # expect csv format to be in:
51
+ # 1: ID
52
+ # 2: Texts
53
+ # no index
54
+ # store ids in ordered list
55
+ ids = df_input[df_input.columns[0]].to_list()
56
+
57
+ # store sentences in ordered list
58
+ # expects sentences to be in second col
59
+ # of csv with two cols
60
+ lines_s = df_input[df_input.columns[1]].to_list()
61
 
62
  # Tokenize texts and create prediction data set
63
  tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
 
70
  preds = predictions.predictions.argmax(-1)
71
  labels = pd.Series(preds).map(model.config.id2label)
72
  scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
73
+
74
+ # round scores
75
+ scores_rounded = [round(score, 2) for score in scores]
76
+
77
  # scores raw
78
  temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True))
79
 
 
80
  # container
81
  anger = []
82
  disgust = []
 
88
 
89
  # extract scores (as many entries as exist in pred_texts)
90
  for i in range(len(lines_s)):
91
+ anger.append(round(temp[i][0], 3))
92
+ disgust.append(round(temp[i][1], 3))
93
+ fear.append(round(temp[i][2], 3))
94
+ joy.append(round(temp[i][3], 3))
95
+ neutral.append(round(temp[i][4], 3))
96
+ sadness.append(round(temp[i][5], 3))
97
+ surprise.append(round(temp[i][6], 3))
98
 
99
  # define df
100
+ df = pd.DataFrame(list(zip(ids,lines_s,labels,scores_rounded, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=[df_input.columns[0], df_input.columns[1],'label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
101
+ print(df)
102
  # save results to csv
103
  YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
104
+ df.to_csv(YOUR_FILENAME, index=False)
105
 
106
  # return dataframe for space output
107
  return YOUR_FILENAME
108
 
109
+ gr.Interface(bulk_function, inputs=[gr.inputs.File(file_count="single", type="file", label="Upload file", optional=False),],
110
+ outputs=[gr.outputs.File(label="Output file")],
111
+ # examples=[["YOUR_FILENAME.csv"]], # computes, doesn't export df so far
112
+ theme="huggingface",
113
+ allow_flagging=False,
114
  ).launch(debug=True)