import streamlit as st import pandas as pd import numpy as np import torch from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification from PIL import Image st.markdown("Link to the app - [milestone2-app](https://huggingface.co/spaces/aim9061/sentiment-analysis)") st.title("Toxic Tweets Sentiment Analysis") def create_dict(prob, text): sorted_indices = np.argsort(prob)[-2:] info = {"text": text, "label1": toxic_tweet_cats[sorted_indices[1]], "percentage1": str(round(prob[sorted_indices[1]], 3)), "label2": toxic_tweet_cats[sorted_indices[0]], "percentage2": str(round(prob[sorted_indices[0]], 3))} return info def get_cats(text): tokenizer = AutoTokenizer.from_pretrained("aim9061/fine-tuned-toxic-tweet-dilbert") token = tokenizer(text, return_tensors="pt") model = AutoModelForSequenceClassification.from_pretrained("aim9061/fine-tuned-toxic-tweet-dilbert") outputs = model(**token) prob = torch.sigmoid(outputs.logits).detach().numpy()[0] data = create_dict(prob, text) res = pd.DataFrame([data]) st.table(res) words = "Take that, you funking cat-dragon! You smell really bad!" text = st.text_area("Insert text for analysis below.", words) toxic_tweet_cats = ["Toxic", "Severe Toxic", "Obscene", "Threat", "Insult", "Identity Hate", "Not Toxic"] model_list = ["aim9061/fine-tuned-toxic-tweet-dilbert", "distilbert-base-uncased-finetuned-sst-2-english", "bert-base-cased", "openai/clip-vit-base-patch32", "emilyalsentzer/Bio_ClinicalBERT", "sentence-transformers/all-mpnet-base-v2", "facebook/bart-large-cnn", "openai/clip-vit-base-patch16", "speechbrain/spkrec-ecapa-voxceleb", "albert-base-v2"] model = st.selectbox("", model_list) sub = st.write("Pick the model to use for analyzing the text!") button = st.button("Analyze!") pipe = pipeline("text-classification") if(button): if model == "aim9061/fine-tuned-toxic-tweet-dilbert": get_cats(text) pipe = pipeline("text-classification", model) results = pipe(text) st.write(results) #TODO: DOCUMENT CODE