Spaces:

lunadebruyne
/

EmotioNL

Running

App Files Files Community

lunadebruyne commited on Feb 17, 2023

Commit

9a74e03

1 Parent(s): f625e51

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -0

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import gradio as gr
 import torch
 import numpy as np
 from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
 from transformers import TrainingArguments, Trainer
@@ -12,6 +15,8 @@ description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse
 description2 = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
 inference_modelpath = "model/checkpoint-128"
 output_dir = "model"
 model_config = {
     "model_weights": "pdelobelle/robbert-v2-dutch-base",
@@ -24,6 +29,7 @@ model_config = {
 tokenizer = AutoTokenizer.from_pretrained(model_config["model_weights"])
 model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
 # Function for encoding (tokenizing) data
 def encode_data(data):
   text = data["text"]
@@ -52,6 +58,7 @@ trainer = Trainer(
               model = model,
               args = test_args)
 def inference_dataset(file_object):
   #input_file = open(file_object.name, 'r')
   input_file = file_object
@@ -76,6 +83,32 @@ def inference_dataset(file_object):
       f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
   f.close()
   return output
 def what_happened(text, file_object, option_list):
     if file_object:

 import torch
 import numpy as np
+import pandas as pd
+from tqdm import tqdm
 from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
 from transformers import TrainingArguments, Trainer
 description2 = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
 inference_modelpath = "model/checkpoint-128"
+"""
 output_dir = "model"
 model_config = {
     "model_weights": "pdelobelle/robbert-v2-dutch-base",
 tokenizer = AutoTokenizer.from_pretrained(model_config["model_weights"])
 model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
 # Function for encoding (tokenizing) data
 def encode_data(data):
   text = data["text"]
               model = model,
               args = test_args)
 def inference_dataset(file_object):
   #input_file = open(file_object.name, 'r')
   input_file = file_object
       f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
   f.close()
   return output
+"""
+def inference_dataset(file_object):
+    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
+    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
+    data_path = open(file_object, 'r')
+    df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
+    ids = df["id"].tolist()
+    texts = df["text"].tolist()
+    preds = []
+    for text in tqdm(texts): # progressbar
+        inputs = tokenizer(text, return_tensors="pt")
+        with torch.no_grad(): # run model
+            logits = model(**inputs).logits
+        predicted_class_id = logits.argmax().item()
+        prediction = model.config.id2label[predicted_class_id]
+        preds.append(prediction)
+    predictions_content = list(zip(ids, texts, preds))
+    # write predictions to file
+    output = "output.txt"
+    f = open(output, 'w')
+    f.write("id\ttext\tprediction\n")
+    for line in predictions_content:
+        f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
+    f.close()
+    return output
 def what_happened(text, file_object, option_list):
     if file_object: