import torch import torch.nn as nn from transformers import AutoTokenizer from transformers.configuration_utils import PretrainedConfig from transformers.models.distilbert.modeling_distilbert import DistilBertPreTrainedModel, DistilBertModel import gradio as gr class IMBDModel(DistilBertPreTrainedModel): def __init__(self, config : PretrainedConfig): super(IMBDModel, self).__init__(config) self.distilbert = DistilBertModel(config) # # freeze whole model # for params in self.distilbert.parameters(): # params.requires_grad = False # layers = self.distilbert.transformer.layer # print("Total Layers:", len(layers)) # # Enable trainable few layers. # for layer_num in [5]: # for params in layers[layer_num].parameters(): # params.requires_grad = True self.fc = nn.Linear(config.dim, 1) self.post_init() def forward(self, x): output = self.distilbert(**x) pooled_output = output.last_hidden_state[:, 0] x = self.fc(pooled_output) return x, output.attentions infer_path = "./model/fold0_epoch01_loss0.1403_val_loss0.1994_roc_auc0.9779/" pretrained_tokenizer = AutoTokenizer.from_pretrained(infer_path) pretrained_model = IMBDModel.from_pretrained(infer_path, local_files_only=True, output_attentions=True) pretrained_model.eval() print("Model loaded.") def get_attentions(attentions): # last layer attentions layer_layer_att = attentions[-1] # [batch, heads, seq_len, seq_len] cls_att = layer_layer_att[:,:,0,:] # attentions of [CLS] token cls_att_mean = cls_att.mean(dim=1) # mean over heads cls_att_mean = cls_att_mean[0] # min-max scaled because we are using for opicity (0 - 1) cls_att_mean = (cls_att_mean - cls_att_mean.min()) / (cls_att_mean.max() - cls_att_mean.min()) return cls_att_mean def wrap_text(word, score): return f"{word}" def prediction(text): tokens = pretrained_tokenizer(text, truncation=True, max_length=512) tokens = {k:torch.tensor([v]) for k, v in tokens.items()} word_tokens= pretrained_tokenizer.convert_ids_to_tokens(tokens['input_ids'][0]) with torch.no_grad(): scores, attentions = pretrained_model(tokens) scores = torch.sigmoid(scores).numpy() scores = scores[0][0] if scores >= 0.6: label = "Positive" elif 0.4 <= scores < 0.6: label = "Neutral" else: label = "Negative" att_op = get_attentions(attentions) html = "".join([wrap_text(w,s) for w,s in zip(word_tokens, att_op)]) html = f"
{html}
" return f"{label} feedback", f"{scores:.2f}", html examples = [ """ Infinity war is one of the best MCU protects. It has a great story, great acting, and awesome looking. If you aren't a Marvel fan or haven't watched most of the previous MCU movies this however, won't be something for you. Let's start with Thanos, definitely one of the best villains, he has a motive, is well played, you can even say that Infinity war tells his story and not the story of a hero. But also most of the other cast members were great in their role and again, if you love Marvel, watch this movie. """ , """ This is truly bottom of the barrel stuff. Nobody asked for this show but it was shoved on us hapless souls anyway. Walters came across as obnoxious, vacuous and full of herself. There was no effort made at all towards character development in the first episode which looks like a poorly crafted music video from the 90s. The first episode obviously serves as a placeholder for more shlock. This show is trying to be sex and the city with a pg13 rating and female Shrek. Marvel's idea of setting up strong female characters with exposition dumps instead of focusing on having them go through a journey to realize their true potential. """ ] demo = gr.Interface( fn=prediction, inputs=gr.Textbox(lines=5, placeholder="Text to analyze..."), outputs=["text", "text", "html"], examples=examples ) demo.launch(server_name="0.0.0.0")