import gradio as gr import torch import numpy as np import pickle import pandas as pd from tqdm import tqdm import altair as alt import matplotlib.pyplot as plt from datetime import date, timedelta from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification def inference_sentence(text): tokenizer = AutoTokenizer.from_pretrained(inference_modelpath) model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath) for text in tqdm([text]): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): # run model logits = model(**inputs).logits predicted_class_id = logits.argmax().item() output = model.config.id2label[predicted_class_id] return "Predicted emotion:\n" + output def freq(file_output): f = open(file_output, 'r') data = f.read().split("\n") f.close() data = [line.split(",") for line in data[1:-1]] freq_dict = {} for line in data: if line[1] not in freq_dict.keys(): freq_dict[line[1]] = 1 else: freq_dict[line[1]] += 1 simple = pd.DataFrame({ 'Emotion category': ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'], 'Frequency': [freq_dict['0'], freq_dict['1'], freq_dict['2'], freq_dict['3'], freq_dict['4'], freq_dict['5']]}) domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'] range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed'] n = max(simple['Frequency']) plot = alt.Chart(simple).mark_bar().encode( x=alt.X("Emotion category", sort=['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']), y=alt.Y("Frequency", axis=alt.Axis(grid=False), scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])), color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=None), tooltip=['Emotion category', 'Frequency']).properties( width=600).configure_axis( grid=False).interactive() return plot def dist(file_output): f = open(file_output, 'r') data = f.read().split("\n") f.close() data = [line.split(",") for line in data[1:-1]] freq_dict = {} mapping_dict = {'0': 'neutral', '1': 'anger', '2': 'fear', '3': 'joy', '4': 'love', '5': 'sadness'} for line in data: dat = str(date(int(line[0][:4]), int(line[0][4:6]), int(line[0][6:8]))) if dat not in freq_dict.keys(): freq_dict[dat] = {} if mapping_dict[line[1]] not in freq_dict[dat].keys(): freq_dict[dat][mapping_dict[line[1]]] = 1 else: freq_dict[dat][mapping_dict[line[1]]] += 1 else: if mapping_dict[line[1]] not in freq_dict[dat].keys(): freq_dict[dat][mapping_dict[line[1]]] = 1 else: freq_dict[dat][mapping_dict[line[1]]] += 1 start_date = date(int(data[0][0][:4]), int(data[0][0][4:6]), int(data[0][0][6:8])) end_date = date(int(data[-1][0][:4]), int(data[-1][0][4:6]), int(data[-1][0][6:8])) delta = end_date - start_date # returns timedelta date_range = [str(start_date + timedelta(days=i)) for i in range(delta.days + 1)] dates = [dat for dat in date_range for i in range(6)] frequency = [freq_dict[dat][emotion] if (dat in freq_dict.keys() and emotion in freq_dict[dat].keys()) else 0 for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']] categories = [emotion for dat in date_range for emotion in ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness']] data = pd.DataFrame({ 'Date': dates, 'Frequency': frequency, 'Emotion category': categories}) domain = ['neutral', 'anger', 'fear', 'joy', 'love', 'sadness'] range_ = ['#999999', '#b22222', '#663399', '#ffcc00', '#db7093', '#6495ed'] n = max(data['Frequency']) highlight = alt.selection( type='single', on='mouseover', fields=["Emotion category"], nearest=True) base = alt.Chart(data).encode( x ="Date:T", y=alt.Y("Frequency", scale=alt.Scale(domain=[0, (n + 9) // 10 * 10])), color=alt.Color("Emotion category", scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(orient='bottom', direction='horizontal'))) points = base.mark_circle().encode( opacity=alt.value(0), tooltip=[ alt.Tooltip('Emotion category', title='Emotion category'), alt.Tooltip('Date:T', title='Date'), alt.Tooltip('Frequency', title='Frequency') ]).add_selection(highlight) lines = base.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(3))) plot = (points + lines).properties(width=600, height=350).interactive() return plot def showcase(dataset): # predictions file if dataset == "The Voice of Holland": file_output = "output/predictions_tvoh.txt" elif dataset == "Floodings": file_output = "output/predictions_floodings.txt" elif dataset == "COVID-19": file_output = "output/predictions_covid.txt" elif dataset == "Childcare Benefits": file_output = "output/predictions_toeslagen.txt" # freq bar plot freq_output = freq(file_output) # dist plot dist_output = dist(file_output) # peaks if dataset == "The Voice of Holland": peaks_output = pickle.load(open('output/peaks_tvoh.p', 'rb')) elif dataset == "Floodings": peaks_output = pickle.load(open('output/peaks_floodings.p', 'rb')) elif dataset == "COVID-19": peaks_output = pickle.load(open('output/peaks_covid.p', 'rb')) elif dataset == "Childcare Benefits": peaks_output = pickle.load(open('output/peaks_toeslagen.p', 'rb')) # topics if dataset == "The Voice of Holland": topics_output = pickle.load(open('output/topics_tvoh.p', 'rb')) elif dataset == "Floodings": topics_output = pickle.load(open('output/topics_floodings.p', 'rb')) elif dataset == "COVID-19": topics_output = pickle.load(open('output/topics_covid.p', 'rb')) elif dataset == "Childcare Benefits": topics_output = pickle.load(open('output/topics_toeslagen.p', 'rb')) return gr.update(visible=True), gr.update(value=file_output, visible=True), gr.update(value=freq_output,visible=True), gr.update(value=dist_output,visible=True), gr.update(value=peaks_output,visible=True), gr.update(value=topics_output,visible=True) inference_modelpath = "model/checkpoint-128" with gr.Blocks() as demo: with gr.Column(scale=1, min_width=50): gr.Markdown(""" """) with gr.Column(scale=5): gr.Markdown("""

EmotioNL: A framework for Dutch emotion detection

EmotioNL logo
This demo was made to demonstrate the EmotioNL model, a transformer-based classification model that analyses emotions in Dutch texts. The model uses RobBERT, which was further fine-tuned on the EmotioNL dataset. The resulting model is a classifier that, given a sentence, predicts one of the following emotion categories: anger, fear, joy, love, sadness or neutral. The demo can be used either in sentence mode, which allows you to enter a sentence for which an emotion will be predicted; or in showcase mode, which allows you to see the full functionality with example data.
""") with gr.Tab("Sentence"): gr.Markdown(""" """) with gr.Row(): with gr.Column(): input = gr.Textbox( label="Enter a sentence", value="Jaaah! Volgende vakantie Barcelona en na het zomerseizoen naar de Algarve", lines=1) send_btn = gr.Button("Send") output = gr.Textbox() send_btn.click(fn=inference_sentence, inputs=input, outputs=output) with gr.Tab("Showcase"): with gr.Row(): with gr.Column(): gr.Markdown(""" **Run the demo on the data of a specific crisis case** Select the desired dataset and click the button to run the demo. """) with gr.Column(): gr.Markdown(""" """) with gr.Column(): gr.Markdown(""" **Output** After having clicked on the run button, scroll down to see the output (running may take a while): """) with gr.Row(): with gr.Column(): # demo1_btn = gr.Button("The Voice of Holland", variant="primary") # demo2_btn = gr.Button("Floodings", variant="primary") # demo3_btn = gr.Button("COVID-19", variant="primary") # demo4_btn = gr.Button("Childcare Benefits", variant="primary") dataset = gr.Dropdown(["The Voice of Holland", "Floodings", "COVID-19", "Childcare Benefits"], show_label=False) run_btn = gr.Button("Run", variant="primary") with gr.Column(): gr.Markdown(""" **The Voice of Holland:** 18,502 tweets about a scandal about sexual misconduct in the Dutch reality TV singing competition 'The Voice of Holland'. **Floodings:** 9,923 tweets about the floodings that affected Belgium and the Netherlands in the Summer of 2021. **COVID-19:** 609,206 tweets about the COVID-19 pandemic, posted in the first eight months of the crisis. **Chilcare Benefits:** 66,961 tweets about the political scandal concerning false allegations of fraud regarding childcare allowance in the Netherlands. """) with gr.Column(): gr.Markdown(""" **Predictions:** file with the predicted emotion label for each instance in the dataset. **Emotion frequencies:** bar plot with the prediction frequencies of each emotion category (anger, fear, joy, love, sadness or neutral). **Emotion distribution over time:** line plot that visualises the frequency of predicted emotions over time for each emotion category. **Peaks:** step graph that only shows the significant fluctuations (upwards and downwards) in emotion frequencies over time. **Topics:** a bar plot that shows the emotion distribution for different topics in the dataset. Topics are extracted using [BERTopic](https://maartengr.github.io/BERTopic/index.html). """) with gr.Row(): gr.Markdown(""" ___ """) with gr.Row(): with gr.Column(): output_markdown = gr.Markdown(""" **Output** """, visible=False) message = gr.Textbox(label="Message", visible=False) output_file = gr.File(label="Predictions", visible=False) output_plot = gr.Plot(show_label=False, visible=False).style(container=True) output_dist = gr.Plot(show_label=False, visible=False) output_peaks = gr.Plot(show_label=False, visible=False) output_topics = gr.Plot(show_label=False, visible=False) run_btn.click(fn=showcase, inputs=[dataset], outputs=[output_markdown, output_file, output_plot, output_dist, output_peaks, output_topics]) with gr.Row(): with gr.Column(): gr.Markdown(""" Both this demo and the dataset have been created by [LT3](https://lt3.ugent.be/), the Language and Translation Technology Team of Ghent University. The EmotioNL project has been carried out with support from the Research Foundation – Flanders (FWO). For any questions, please contact luna.debruyne@ugent.be.
LT3 logo FWO logo
""") with gr.Column(scale=1, min_width=50): gr.Markdown(""" """) demo.launch()