Spaces:
Running
Running
File size: 4,302 Bytes
ca85e27 e589d46 f625e51 9a74e03 cf5ccc0 e589d46 ca85e27 e589d46 bd33424 ebe4bfb e589d46 9a74e03 bd33424 9ef6ad9 7663872 9ef6ad9 7663872 c56c10b bd33424 352ac27 9a74e03 b1b7eec 9a74e03 1743490 a6d4163 1743490 9ef6ad9 1c876f9 1743490 a6d4163 b760f7b e589d46 5907edf b38925d e589d46 6d31d85 6e432b1 bd33424 b760f7b bd33424 6e432b1 6d31d85 bd33424 6d31d85 658bd4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import gradio as gr
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return output
def frequencies(preds):
preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
for pred in preds:
preds_dict[pred] = preds_dict[pred] + 1
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
return plt
def inference_dataset(file_object, option_list):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
data_path = open(file_object.name, 'r')
df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
ids = df["id"].tolist()
texts = df["text"].tolist()
preds = []
for text in tqdm(texts): # progressbar
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
prediction = model.config.id2label[predicted_class_id]
preds.append(prediction)
predictions_content = list(zip(ids, texts, preds))
# write predictions to file
output = "output.txt"
f = open(output, 'w')
f.write("id\ttext\tprediction\n")
for line in predictions_content:
f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
output1 = output
output2 = output3 = output4 = output5 = "This option was not selected."
if "emotion frequencies" in option_list:
output2 = frequencies(preds)
else:
output2 = None
if "emotion distribution over time" in option_list:
output3 = "This option was selected."
if "peaks" in option_list:
output4 = "This option was selected."
if "topics" in option_list:
output5 = "This option was selected."
return [output1, output2, output3, output4, output5]
iface_sentence = gr.Interface(
fn=inference_sentence,
description = description_sentence,
inputs = gr.Textbox(
label="Enter a sentence",
lines=1),
outputs="text")
inputs = [gr.File(
label="Upload a dataset"),
gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")]
outputs = [gr.File(),
gr.Plot(label="Emotion frequencies"),
gr.Textbox(label="Emotion distribution over time"),
gr.Textbox(label="Peaks"),
gr.Textbox(label="Topics")]
iface_dataset = gr.Interface(
fn = inference_dataset,
description = description_dataset,
inputs=inputs,
outputs = outputs)
iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])
iface.queue().launch() |