import gradio as gr
import torch
import numpy as np

import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt

from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification


description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."

inference_modelpath = "model/checkpoint-128"

def inference_sentence(text):
    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
    for text in tqdm([text]):
        inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad(): # run model
        logits = model(**inputs).logits
        predicted_class_id = logits.argmax().item()
    output = model.config.id2label[predicted_class_id]
    return output

def frequencies(preds):
	preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
	for pred in preds:
		preds_dict[pred] = preds_dict[pred] + 1
	bars = list(preds_dict.keys())
	height = list(preds_dict.values())

	x_pos = np.arange(len(bars))
	plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
	plt.xticks(x_pos, bars)
	return plt
    
def inference_dataset(file_object, option_list):
    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
    data_path = open(file_object.name, 'r')
    df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
    ids = df["id"].tolist()
    texts = df["text"].tolist()
    preds = []
    for text in tqdm(texts): # progressbar
        inputs = tokenizer(text, return_tensors="pt")
        with torch.no_grad(): # run model
            logits = model(**inputs).logits
        predicted_class_id = logits.argmax().item()
        prediction = model.config.id2label[predicted_class_id]
        preds.append(prediction)
    predictions_content = list(zip(ids, texts, preds))
    # write predictions to file
    output = "output.txt"
    f = open(output, 'w')
    f.write("id\ttext\tprediction\n")
    for line in predictions_content:
        f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
    output1 = output
    output2 = output3 = output4 = output5 = "This option was not selected."
    if "emotion frequencies" in option_list:
        output2 = frequencies(preds)
    else:
        output2 = None
    if "emotion distribution over time" in option_list:
        output3 = "This option was selected."
    if "peaks" in option_list:
        output4 = "This option was selected."
    if "topics" in option_list:
        output5 = "This option was selected."
    return [output1, output2, output3, output4, output5]

iface_sentence = gr.Interface(
            fn=inference_sentence,
            description = description_sentence,
            inputs = gr.Textbox(
                    label="Enter a sentence",
                    lines=1),
            outputs="text")

inputs = [gr.File(
            label="Upload a dataset"),
          gr.CheckboxGroup(
            ["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
            label = "Select options")]

outputs = [gr.File(),
           gr.Plot(label="Emotion frequencies"),
           gr.Textbox(label="Emotion distribution over time"),
           gr.Textbox(label="Peaks"),
           gr.Textbox(label="Topics")]

iface_dataset = gr.Interface(
            fn = inference_dataset,
            description = description_dataset,
            inputs=inputs,
            outputs = outputs)

iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])

iface.queue().launch()