import torch
import torch.nn as nn

from transformers import AutoTokenizer
from transformers.configuration_utils import PretrainedConfig
from transformers.models.distilbert.modeling_distilbert import DistilBertPreTrainedModel, DistilBertModel


import gradio as gr

class IMBDModel(DistilBertPreTrainedModel):

    def __init__(self, config : PretrainedConfig):
        super(IMBDModel, self).__init__(config)

        self.distilbert = DistilBertModel(config)

        # # freeze whole model
        # for params in self.distilbert.parameters():
        #     params.requires_grad = False

        # layers = self.distilbert.transformer.layer
        # print("Total Layers:", len(layers))

        # # Enable trainable few layers.
        # for layer_num in [5]:
        #     for params in layers[layer_num].parameters():
        #         params.requires_grad = True

        self.fc = nn.Linear(config.dim, 1)

        self.post_init()

    def forward(self, x):

        output = self.distilbert(**x)
        
        pooled_output = output.last_hidden_state[:, 0]

        x = self.fc(pooled_output)
        
        return x, output.attentions
    

infer_path = "./model/fold0_epoch01_loss0.1403_val_loss0.1994_roc_auc0.9779/"
    
pretrained_tokenizer = AutoTokenizer.from_pretrained(infer_path)
    
pretrained_model = IMBDModel.from_pretrained(infer_path, local_files_only=True, output_attentions=True)
pretrained_model.eval()

print("Model loaded.")

def get_attentions(attentions):
        
    # last layer attentions
    layer_layer_att = attentions[-1] # [batch, heads, seq_len, seq_len]
    cls_att = layer_layer_att[:,:,0,:] # attentions of [CLS] token
    cls_att_mean = cls_att.mean(dim=1) # mean over heads
    
    cls_att_mean = cls_att_mean[0]
    
    # min-max scaled because we are using for opicity (0 - 1)
    cls_att_mean = (cls_att_mean - cls_att_mean.min()) / (cls_att_mean.max() - cls_att_mean.min())
    
    return cls_att_mean

def wrap_text(word, score):
    return f"<span style='background-color:rgba(0, 0, 255, {score:.2f});padding:2px;'>{word}</span>"
    
def prediction(text):

    tokens = pretrained_tokenizer(text, truncation=True, max_length=512)
    tokens = {k:torch.tensor([v]) for k, v in tokens.items()}
    word_tokens= pretrained_tokenizer.convert_ids_to_tokens(tokens['input_ids'][0])
    
    with torch.no_grad():
        
        scores, attentions = pretrained_model(tokens)
        scores = torch.sigmoid(scores).numpy()
        
        scores = scores[0][0]
        
        if scores >= 0.6:
            label = "Positive"
        elif 0.4 <= scores < 0.6:
            label = "Neutral"
        else:
            label = "Negative"
            
    att_op = get_attentions(attentions)
    
    html = "".join([wrap_text(w,s) for w,s in zip(word_tokens, att_op)])
    html = f"<p style='word-wrap: break-word;'>{html}</p>"

    return f"{label} feedback", f"{scores:.2f}", html

examples = [
    
"""
Infinity war is one of the best MCU protects. It has a great story, great acting, and awesome looking. If you aren't a Marvel fan or haven't watched most of the previous MCU movies this however, won't be something for you. Let's start with Thanos, definitely one of the best villains, he has a motive, is well played, you can even say that Infinity war tells his story and not the story of a hero. But also most of the other cast members were great in their role and again, if you love Marvel, watch this movie.
"""
, 

"""
This is truly bottom of the barrel stuff. Nobody asked for this show but it was shoved on us hapless souls anyway.

Walters came across as obnoxious, vacuous and full of herself. There was no effort made at all towards character development in the first episode which looks like a poorly crafted music video from the 90s.

The first episode obviously serves as a placeholder for more shlock. This show is trying to be sex and the city with a pg13 rating and female Shrek.

Marvel's idea of setting up strong female characters with exposition dumps instead of focusing on having them go through a journey to realize their true potential.
"""

]

demo = gr.Interface(
    fn=prediction, 
    inputs=gr.Textbox(lines=5, placeholder="Text to analyze..."), 
    outputs=["text", "text", "html"],
    examples=examples
)

demo.launch(server_name="0.0.0.0")