Alex Martin
Update app.py
5a9eb5a unverified
import streamlit as st
import pandas as pd
import numpy as np
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from PIL import Image
st.markdown("Link to the app - [milestone2-app](https://huggingface.co/spaces/aim9061/sentiment-analysis)")
st.title("Toxic Tweets Sentiment Analysis")
def create_dict(prob, text):
sorted_indices = np.argsort(prob)[-2:]
info = {"text": text,
"label1": toxic_tweet_cats[sorted_indices[1]],
"percentage1": str(round(prob[sorted_indices[1]], 3)),
"label2": toxic_tweet_cats[sorted_indices[0]],
"percentage2": str(round(prob[sorted_indices[0]], 3))}
return info
def get_cats(text):
tokenizer = AutoTokenizer.from_pretrained("aim9061/fine-tuned-toxic-tweet-dilbert")
token = tokenizer(text, return_tensors="pt")
model = AutoModelForSequenceClassification.from_pretrained("aim9061/fine-tuned-toxic-tweet-dilbert")
outputs = model(**token)
prob = torch.sigmoid(outputs.logits).detach().numpy()[0]
data = create_dict(prob, text)
res = pd.DataFrame([data])
st.table(res)
words = "Take that, you funking cat-dragon! You smell really bad!"
text = st.text_area("Insert text for analysis below.", words)
toxic_tweet_cats = ["Toxic", "Severe Toxic", "Obscene", "Threat", "Insult", "Identity Hate", "Not Toxic"]
model_list = ["aim9061/fine-tuned-toxic-tweet-dilbert", "distilbert-base-uncased-finetuned-sst-2-english", "bert-base-cased", "openai/clip-vit-base-patch32", "emilyalsentzer/Bio_ClinicalBERT",
"sentence-transformers/all-mpnet-base-v2", "facebook/bart-large-cnn", "openai/clip-vit-base-patch16", "speechbrain/spkrec-ecapa-voxceleb",
"albert-base-v2"]
model = st.selectbox("", model_list)
sub = st.write("Pick the model to use for analyzing the text!")
button = st.button("Analyze!")
pipe = pipeline("text-classification")
if(button):
if model == "aim9061/fine-tuned-toxic-tweet-dilbert":
get_cats(text)
pipe = pipeline("text-classification", model)
results = pipe(text)
st.write(results)
#TODO: DOCUMENT CODE