Spaces:
Running
Running
File size: 22,733 Bytes
ab937a1 9f90723 ab937a1 9f90723 ab937a1 eed1c43 ab937a1 ee4947a ab937a1 199582b 99d3569 199582b a657537 c50fdc0 a657537 199582b ae832b8 efb03fe 08fd507 efb03fe 66f07c2 efb03fe 66f07c2 efb03fe 8c09604 efb03fe 199582b 823fbc6 199582b 823fbc6 199582b 749c3f4 199582b 7f79a98 199582b 7f79a98 199582b 7f79a98 199582b 7f79a98 199582b 823fbc6 ab937a1 199582b f291543 199582b f291543 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
import gradio as gr
import torch
import numpy as np
import pickle
import pandas as pd
from tqdm import tqdm
import altair as alt
import matplotlib.pyplot as plt
from datetime import date, timedelta
from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
"""
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return output
def frequencies(preds):
preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
for pred in preds:
preds_dict[pred] = preds_dict[pred] + 1
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
return plt
def inference_dataset(file_object, option_list):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
data_path = open(file_object.name, 'r')
df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
ids = df["id"].tolist()
texts = df["text"].tolist()
preds = []
for text in tqdm(texts): # progressbar
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
prediction = model.config.id2label[predicted_class_id]
preds.append(prediction)
predictions_content = list(zip(ids, texts, preds))
# write predictions to file
output = "output.txt"
f = open(output, 'w')
f.write("id\ttext\tprediction\n")
for line in predictions_content:
f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
output1 = output
output2 = output3 = output4 = output5 = "This option was not selected."
if "emotion frequencies" in option_list:
output2 = frequencies(preds)
else:
output2 = None
if "emotion distribution over time" in option_list:
output3 = "This option was selected."
if "peaks" in option_list:
output4 = "This option was selected."
if "topics" in option_list:
output5 = "This option was selected."
return [output1, output2, output3, output4, output5]
iface_sentence = gr.Interface(
fn=inference_sentence,
description = description_sentence,
inputs = gr.Textbox(
label="Enter a sentence",
lines=1),
outputs="text")
inputs = [gr.File(
label="Upload a dataset"),
gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")]
outputs = [gr.File(),
gr.Plot(label="Emotion frequencies"),
gr.Textbox(label="Emotion distribution over time"),
gr.Textbox(label="Peaks"),
gr.Textbox(label="Topics")]
iface_dataset = gr.Interface(
fn = inference_dataset,
description = description_dataset,
inputs=inputs,
outputs = outputs)
iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])
iface.queue().launch()
"""
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return "Predicted emotion:\n" + output
"""
def inference_sentence(text):
output = "This sentence will be processed:\n" + text
return output
"""
def unavailable(input_file, input_checks):
output = "As we are currently updating this demo, submitting your own data is unavailable for the moment. However, you can try out the showcase mode π"
return gr.update(visible=True), gr.update(value=output, label="Oops!", visible=True)
def showcase(input_file):
output = "showcase/example_predictions.txt"
return gr.update(visible=True), gr.update(visible=False), gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
def file(input_file, input_checks):
#output = "output.txt"
#f = open(output, 'w')
#f.write("The predictions come here.")
#f.close()
output = "showcase/example_predictions.txt"
if "emotion frequencies" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
elif "emotion distribution over time" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def freq(output_file, input_checks):
#simple = pd.DataFrame({
#'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'],
#'Frequency': [10, 8, 2, 15, 3, 4]})
f = open("showcase/example_predictions.txt", 'r')
data = f.read().split("\n")
f.close()
data = [line.split("\t") for line in data[1:-1]]
freq_dict = {}
for line in data:
if line[1] not in freq_dict.keys():
freq_dict[line[1]] = 1
else:
freq_dict[line[1]] += 1
simple = pd.DataFrame({
'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'],
'Frequency': [freq_dict['neutral'], freq_dict['anger'], freq_dict['fear'], freq_dict['joy'], freq_dict['love'], freq_dict['sadness']]})
domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']
range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed']
n = max(simple['Frequency'])
plot = alt.Chart(simple).mark_bar().encode(
x=alt.X("Emotion category", sort=['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']),
y=alt.Y("Frequency", axis=alt.Axis(grid=False), scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])),
color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=None),
tooltip=['Emotion category', 'Frequency']).properties(
width=600).configure_axis(
grid=False).interactive()
if "emotion distribution over time" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.update(value=plot, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def dist(output_file, input_checks):
#data = pd.DataFrame({
#'Date': ['1/1', '1/1', '1/1', '1/1', '1/1', '1/1', '2/1', '2/1', '2/1', '2/1', '2/1', '2/1', '3/1', '3/1', '3/1', '3/1', '3/1', '3/1'],
#'Frequency': [3, 5, 1, 8, 2, 3, 4, 7, 1, 12, 4, 2, 3, 6, 3, 10, 3, 4],
#'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness', 'neutral', 'anger', 'fear', 'joy', 'love', 'sadness', 'neutral', 'anger', 'fear', 'joy', 'love', 'sadness']})
f = open("showcase/data.txt", 'r')
data = f.read().split("\n")
f.close()
data = [line.split("\t") for line in data[1:-1]]
freq_dict = {}
for line in data:
dat = str(date(2000+int(line[0].split("/")[2]), int(line[0].split("/")[1]), int(line[0].split("/")[0])))
if dat not in freq_dict.keys():
freq_dict[dat] = {}
if line[1] not in freq_dict[dat].keys():
freq_dict[dat][line[1]] = 1
else:
freq_dict[dat][line[1]] += 1
else:
if line[1] not in freq_dict[dat].keys():
freq_dict[dat][line[1]] = 1
else:
freq_dict[dat][line[1]] += 1
start_date = date(2000+int(data[0][0].split("/")[2]), int(data[0][0].split("/")[1]), int(data[0][0].split("/")[0]))
end_date = date(2000+int(data[-1][0].split("/")[2]), int(data[-1][0].split("/")[1]), int(data[-1][0].split("/")[0]))
delta = end_date - start_date # returns timedelta
date_range = [str(start_date + timedelta(days=i)) for i in range(delta.days + 1)]
dates = [dat for dat in date_range for i in range(6)]
frequency = [freq_dict[dat][emotion] if (dat in freq_dict.keys() and emotion in freq_dict[dat].keys()) else 0 for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']]
categories = [emotion for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']]
data = pd.DataFrame({
'Date': dates,
'Frequency': frequency,
'Emotion category': categories})
domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']
range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed']
n = max(data['Frequency'])
highlight = alt.selection(
type='single', on='mouseover', fields=["Emotion category"], nearest=True)
base = alt.Chart(data).encode(
x ="Date:T",
y=alt.Y("Frequency", scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])),
color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(orient='bottom', direction='horizontal')))
points = base.mark_circle().encode(
opacity=alt.value(0),
tooltip=[
alt.Tooltip('Emotion category', title='Emotion category'),
alt.Tooltip('Date:T', title='Date'),
alt.Tooltip('Frequency', title='Frequency')
]).add_selection(highlight)
lines = base.mark_line().encode(
size=alt.condition(~highlight, alt.value(1), alt.value(3)))
plot = (points + lines).properties(width=600, height=350).interactive()
if "peaks" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.Plot.update(value=plot, visible=True), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.Plot.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.Plot.update(value=plot, visible=True), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def peaks(output_file, input_checks):
plot = pickle.load(open('showcase/peaks_covid.p', 'rb'))
if "topics" in input_checks or (output_file.name).startswith('/tmp/example_predictions'):
return gr.Plot.update(value=plot, visible=True), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.Plot.update(value=plot, visible=True), gr.update(visible=False) # no next_button becomes available
def topics(output_file, input_checks):
plot = pickle.load(open('showcase/vis_classes_covid.p', 'rb'))
plot.update_layout(width=600, height=400)
return gr.Plot.update(value=plot, visible=True) # no next_button becomes available
# This demo was made to demonstrate the EmotioNL model, a transformer-based classification model that analyses emotions in Dutch texts. The model uses [RobBERT](https://github.com/iPieter/RobBERT), which was further fine-tuned on the [EmotioNL dataset](https://lt3.ugent.be/resources/emotionl/). The resulting model is a classifier that, given a sentence, predicts one of the following emotion categories: _anger_, _fear_, _joy_, _love_, _sadness_ or _neutral_. The demo can be used either in **sentence mode**, which allows you to enter a sentence for which an emotion will be predicted; or in **dataset mode**, which allows you to upload a dataset or see the full functuonality of with example data.
with gr.Blocks() as demo:
with gr.Column(scale=1, min_width=50):
gr.Markdown("""
""")
with gr.Column(scale=5):
gr.Markdown("""
<div style="text-align: center"><h1>EmotioNL: A framework for Dutch emotion detection</h1></div>
<div style="display: block;margin-left: auto;margin-right: auto;width: 60%;"><img alt="EmotioNL logo" src="https://users.ugent.be/~lundbruy/EmotioNL.png" width="100%"></div>
<div style="display: block;margin-left: auto;margin-right: auto;width: 75%;">This demo was made to demonstrate the EmotioNL model, a transformer-based classification model that analyses emotions in Dutch texts. The model uses <a href="https://github.com/iPieter/RobBERT">RobBERT</a>, which was further fine-tuned on the <a href="https://lt3.ugent.be/resources/emotionl/">EmotioNL dataset</a>. The resulting model is a classifier that, given a sentence, predicts one of the following emotion categories: <i>anger</i>, <i>fear</i>, <i>joy</i>, <i>love</i>, <i>sadness</i> or <i>neutral</i>. The demo can be used either in <b>sentence mode</b>, which allows you to enter a sentence for which an emotion will be predicted; or in <b>dataset mode</b>, which allows you to upload a dataset or see the full functionality with example data.</div>
""")
with gr.Tab("Sentence"):
gr.Markdown("""
""")
with gr.Row():
with gr.Column():
input = gr.Textbox(
label="Enter a sentence",
value="Jaaah! Volgende vakantie Barcelona en na het zomerseizoen naar de Algarve",
lines=1)
send_btn = gr.Button("Send")
output = gr.Textbox()
send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
with gr.Tab("Dataset"):
gr.Markdown("""
_As we are currently updating this demo, submitting your own data is unavailable for the moment._
_Try out the showcase mode._
""")
with gr.Row():
with gr.Column():
demo_btn = gr.Button("Showcase with example data", variant="primary")
with gr.Column():
gr.Markdown("""
**<font size="4">Run in showcase mode or use your own data</font>**
Try out the demo in showcase mode, which uses example data (609,206 tweets about the COVID-19 pandemic) with all the options provided by the demo, or upload your own dataset.
""")
with gr.Row():
with gr.Column():
input_file = gr.File(
label="Upload a dataset")
input_checks = gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")
send_btn = gr.Button("Submit data")
with gr.Column():
gr.Markdown("""
**<font size="4">Data format</font>**
The data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected. For now, we only accept files with maximum 400 sentences and a limit of 300 tokens per sentence.
**<font size="4">Options</font>**
**Emotion frequencies** outputs a bar plot with the prediction frequencies of each emotion category (anger, fear, joy, love, sadness or neutral).
**Emotion distribution over time** outputs a line plot that visualises the frequency of predicted emotions over time for each emotion category.
**Peaks** outputs a step graph that only shows the significant fluctuations (upwards and downwards) in emotion frequencies over time.
**Topics** uses [BERTopic](https://maartengr.github.io/BERTopic/index.html) to find topics in the datasets, and outputs a bar plot that shows the emotion distribution per topic.
""")
with gr.Row():
gr.Markdown("""
___
""")
with gr.Row():
with gr.Column():
output_markdown = gr.Markdown("""
**<font size="4">Output</font>**
""", visible=False)
message = gr.Textbox(label="Message", visible=False)
output_file = gr.File(label="Predictions", visible=False)
next_button_freq = gr.Button("Show emotion frequencies", visible=False)
output_plot = gr.Plot(show_label=False, visible=False).style(container=True)
next_button_dist = gr.Button("Show emotion distribution over time", visible=False)
output_dist = gr.Plot(show_label=False, visible=False)
next_button_peaks = gr.Button("Show peaks", visible=False)
output_peaks = gr.Plot(show_label=False, visible=False)
next_button_topics = gr.Button("Show topics", visible=False)
output_topics = gr.Plot(show_label=False, visible=False)
#send_btn.click(fn=file, inputs=[input_file,input_checks], outputs=[output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
next_button_freq.click(fn=freq, inputs=[output_file,input_checks], outputs=[output_plot,next_button_dist,next_button_peaks,next_button_topics])
next_button_dist.click(fn=dist, inputs=[output_file,input_checks], outputs=[output_dist,next_button_peaks,next_button_topics])
next_button_peaks.click(fn=peaks, inputs=[output_file,input_checks], outputs=[output_peaks,next_button_topics])
next_button_topics.click(fn=topics, inputs=[output_file,input_checks], outputs=output_topics)
send_btn.click(fn=unavailable, inputs=[input_file,input_checks], outputs=[output_markdown,message])
demo_btn.click(fn=showcase, inputs=[input_file], outputs=[output_markdown,message,output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
with gr.Row():
with gr.Column():
gr.Markdown("""
<font size="2">Both this demo and the dataset have been created by [LT3](https://lt3.ugent.be/), the Language and Translation Technology Team of Ghent University. The EmotioNL project has been carried out with support from the Research Foundation β Flanders (FWO). For any questions, please contact luna.debruyne@ugent.be.</font>
<div style="display: grid;grid-template-columns:150px auto;"> <img style="margin-right: 1em" alt="LT3 logo" src="https://lt3.ugent.be/static/images/logo_v2_single.png" width="136" height="58"> <img style="margin-right: 1em" alt="FWO logo" src="https://www.fwo.be/images/logo_desktop.png" height="58"></div>
""")
with gr.Column(scale=1, min_width=50):
gr.Markdown("""
""")
demo.launch()
# <div style="display: grid;grid-template-columns:80px 150px auto;"><img style="margin-right: 1em" alt="UGent logo" src="https://lt3.ugent.be/static/images/logo_ugent_en.svg" height="58"> <img style="margin-right: 1em" alt="LT3 logo" src="https://lt3.ugent.be/static/images/logo_v2_single.png" width="136" height="58"> <img style="margin-right: 1em" alt="FWO logo" src="https://www.fwo.be/images/logo_desktop.png" height="58"></div> |