hate_classifier / app.py
nebiyu29's picture
added one dimension into my tokenized data
1994400 verified
from transformers import AutoModelForSequenceClassification,AutoTokenizer
from torch.nn.functional import softmax
import torch
import gradio as gr
import json
model_name="nebiyu29/hate_classifier"
tokenizer=AutoTokenizer.from_pretrained(model_name)
model=AutoModelForSequenceClassification.from_pretrained(model_name)
#this where the model is active and we need to make the gradiends in active
def model_classifier(text):
model.eval()
with torch.no_grad():
if len(text)==0:
return f"the input text is {text}"
else:
encoded_input=tokenizer(text,return_tensors="pt",truncation=True,padding=True,max_length=512) #this is where the encoding happens
input_ids=encoded_input["input_ids"]
attention_mask=encoded_input["attention_mask"]
#turning the inputs into tensors
inputs_ids=torch.tensor(input_ids).unsqueeze(dim=0)
attention_mask=torch.tensor(attention_mask).unsqueeze(dim=0)
logits=model(input_ids,attention_mask).logits #this is the logits of the labels
probs_label=softmax(logits,dim=-1) #turning the probability distribution into normalize form
id2label=model.config.id2label
return_probs={id2label[i]:probs.item() for i,probs in enumerate(probs_label[0])}
return json.dumps(list(return_probs.items()))
#lets define how the output looks like
#output_format=gr.Dataframe(row_count=(3,"dynamic"),col_count=(2,"dynamic"),label="label probabilities",headers=["label","probabilities"])
#the output looks like a json format
output_format=gr.Textbox(label="label probabilities")
#lets write something that accepts input as text and returns the most likely out come out of 3
demo=gr.Interface(
fn=model_classifier,
inputs=gr.Textbox(lines=5,label="Enter you text"),
outputs=output_format,
title="Hate Classifier Demo App"
)
demo.launch(share=True)