File size: 4,480 Bytes
33dbef6
29e4959
228c4b8
29e4959
33dbef6
 
 
 
 
 
 
 
f2a478c
8585ad0
d60cca6
 
f2a478c
e57eba3
 
 
 
c8575c8
 
e57eba3
 
25894ff
53ce71f
 
 
 
 
 
 
f2a478c
33dbef6
ea50b02
228c4b8
 
 
a07ca53
f2a478c
 
93f97ce
f2a478c
 
c8575c8
 
 
 
 
 
 
 
 
 
 
29e4959
33dbef6
ea50b02
 
 
53ce71f
 
 
 
 
 
 
 
 
 
 
 
 
 
ea50b02
33dbef6
636ed81
c8575c8
 
cedf911
25894ff
c8575c8
 
 
 
 
 
 
ea50b02
8ac658c
f936c34
33dbef6
d60cca6
 
0b11182
c8575c8
3cbd9e3
 
 
0b11182
 
 
 
 
 
 
 
d60cca6
e57eba3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st  #Web App
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification


#title
st.title("Sentiment Analysis")


def analyze(input, model):
    return "This is a sample output"

# load my fine-tuned model
fine_tuned = "res"
labels = {'LABEL_0': 'toxic', 'LABEL_1': 'severe_toxic', 'LABEL_2': 'obscene', 'LABEL_3': 'threat',
          'LABEL_4': 'insult', 'LABEL_5': 'identity_hate'}


# make a dictionary of the labels and values
def unpack(result):
    output = {}
    for res in result:
        output[labels[res['label']]] = res['score']
    return output

def add_to_table(input, result, output):
    highest = (max(result, key=result.get), result[max(result, key=result.get)])
    result.pop(max(result, key=result.get))
    output.append([input, highest[0], highest[1], max(result, key=result.get), 
                   result[max(result, key=result.get)]])




#text insert
input = st.text_area("Insert text to be analyzed", value="you stink", 
                     height=None, max_chars=None, key=None, help=None, on_change=None, 
                     args=None, kwargs=None, placeholder=None, disabled=False, 
                     label_visibility="visible")

option = st.selectbox(
    'Choose a transformer model:',
    ('Default', 'Fine-Tuned' , 'Roberta'))


# init classifiers

model = AutoModelForSequenceClassification.from_pretrained(fine_tuned)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
ft_classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer, top_k=None)

model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
rob_classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

def_classifier = pipeline('sentiment-analysis')



output = []
output.append(['Tweet', 'Highest', 'Score', 'Second Highest', 'Score'])
strings = [ "D'aww! He matches this background colour I'm seemingly stuck with. Thanks.  (talk) 21:51, January 11, 2016 (UTC)",
            "Hey man, I'm really not trying to edit war. It's just that this guy is constantly removing relevant information and talking to me through edits instead of my talk page. He seems to care more about the formatting than the actual info.",
            "Why can't you believe how fat Artie is? Did you see him on his recent appearence on the Tonight Show with Jay Leno? He looks absolutely AWFUL! If I had to put money on it, I'd say that Artie Lange is a can't miss candidate for the 2007 Dead pool!  \
                Kindly keep your malicious fingers off of my above comment, . Everytime you remove it, I will repost it!!!",
            "Thank you. This would make my life complete.  ",
            "Would you both shut up, you don't run wikipedia, especially a stupid kid.", 
            "Please stop. If you continue to vandalize Wikipedia, as you did to Homosexuality, you will be blocked from editing.",
            "== Arabs are committing genocide in Iraq, but no protests in Europe. ==  May Europe also burn in hell.",
            ":yeah, thanks for reviving the tradition of pissing all over articles because you want to live out your ethnic essentialism. Why let mere facts get into the way of enjoying that.",
            "==Category:Italian Jews==  :Category:Italian Jews, which you created, has been nominated for possible deletion, merging, or renaming. If you would like to participate in the discussion, you are invited to add your comments at the category's entry on the Categories for discussion page. Thank you.",
            "KSchwartz is an annoying person who often smells of rotten fish and burnt animal hair.  He has a passing interest in Wikipedia."
            ]



if st.button('Analyze'):
    if option == 'Fine-Tuned':
        result = ft_classifier(input)
        result = result[0]
        result = unpack(result)
        add_to_table(input, result, output)
    elif option == 'Roberta':
        result = rob_classifier(input)
        result = result[0]
        st.write(result)
    elif option == 'Default':
        result = def_classifier(input)
        result = result[0]
        st.write(result)
else:
    st.write('Excited to analyze!')



for string in strings:
    item = ft_classifier(string)
    item = item[0]
    item = unpack(item)
    add_to_table(string, item, output)

st.table(output)