File size: 2,113 Bytes
62872aa
 
 
b916553
2aebf04
62872aa
 
13fe56e
62872aa
 
2530da6
 
 
da0c231
2530da6
da0c231
2530da6
 
 
 
 
1e1ae33
0ce143e
2530da6
bf1a463
2530da6
 
5a9eb5a
4416bb5
2939714
 
 
 
da0c231
 
2939714
 
 
 
 
 
 
 
 
 
 
 
 
 
2530da6
d2159ad
2530da6
62872aa
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
import pandas as pd
import numpy as np
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from PIL import Image

st.markdown("Link to the app - [milestone2-app](https://huggingface.co/spaces/aim9061/sentiment-analysis)")
st.title("Toxic Tweets Sentiment Analysis")

def create_dict(prob, text):
  sorted_indices = np.argsort(prob)[-2:]
  info = {"text": text,
         "label1": toxic_tweet_cats[sorted_indices[1]],
         "percentage1": str(round(prob[sorted_indices[1]], 3)),
         "label2": toxic_tweet_cats[sorted_indices[0]],
         "percentage2": str(round(prob[sorted_indices[0]], 3))}
  return info
def get_cats(text):
  tokenizer = AutoTokenizer.from_pretrained("aim9061/fine-tuned-toxic-tweet-dilbert")
  token = tokenizer(text, return_tensors="pt")
  model = AutoModelForSequenceClassification.from_pretrained("aim9061/fine-tuned-toxic-tweet-dilbert")
  outputs = model(**token)
  
  prob = torch.sigmoid(outputs.logits).detach().numpy()[0]
  
  data = create_dict(prob, text)
  res = pd.DataFrame([data])
  st.table(res)

words = "Take that, you funking cat-dragon! You smell really bad!"
text = st.text_area("Insert text for analysis below.", words)

toxic_tweet_cats = ["Toxic", "Severe Toxic", "Obscene", "Threat", "Insult", "Identity Hate", "Not Toxic"]

model_list = ["aim9061/fine-tuned-toxic-tweet-dilbert", "distilbert-base-uncased-finetuned-sst-2-english", "bert-base-cased", "openai/clip-vit-base-patch32", "emilyalsentzer/Bio_ClinicalBERT",
              "sentence-transformers/all-mpnet-base-v2", "facebook/bart-large-cnn", "openai/clip-vit-base-patch16", "speechbrain/spkrec-ecapa-voxceleb", 
             "albert-base-v2"]
model = st.selectbox("", model_list)
sub = st.write("Pick the model to use for analyzing the text!")
button = st.button("Analyze!")
pipe = pipeline("text-classification")
if(button):
    if model == "aim9061/fine-tuned-toxic-tweet-dilbert":
      get_cats(text)
    pipe = pipeline("text-classification", model)
    results = pipe(text)
    st.write(results)

  
#TODO: DOCUMENT CODE