File size: 3,677 Bytes
8cdbe40
 
 
f9cf78c
51c5019
c45446a
8cdbe40
2f1aee4
8cdbe40
747d201
123b344
4b44a9b
 
123b344
 
8cdbe40
2f1aee4
f541c0d
747d201
41ee349
 
 
2f1aee4
e38acc3
 
 
 
 
 
 
41ee349
c45446a
2f1aee4
b56d254
 
 
 
 
 
b7ec123
b56d254
2f1aee4
b7ec123
2f1aee4
f9cf78c
 
 
 
 
2f1aee4
 
f9cf78c
 
2f1aee4
f9cf78c
 
 
b7ec123
2f1aee4
f9cf78c
 
 
 
f42d3ef
f9cf78c
2d41505
2f1aee4
8cdbe40
 
123b344
2f1aee4
123b344
 
 
b5d21b7
f541c0d
2f1aee4
41ee349
 
 
b56d254
 
2f1aee4
b56d254
 
 
b7ec123
f42d3ef
2f1aee4
31c8161
 
f42d3ef
b7ec123
f42d3ef
 
8cdbe40
 
 
745ea4a
8cdbe40
2f1aee4
8cdbe40
 
b7ec123
8cdbe40
 
2f1aee4
8cdbe40
 
728a13e
8cdbe40
51c5019
2f1aee4
51c5019
 
8cdbe40
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import streamlit as st
from transformers import pipeline
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
from PIL import Image
import torch

#Bertweet obtain label and score
def bertweet(data):
    specific_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
    result = specific_model(data)
    label = result[0]['label']
    score = result[0]['score']

    return label, score 

#Roberta obtain labels and score
def roberta(data):
    specific_model = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")
    result = specific_model(data)
    label = result[0]['label']
    score = result[0]['score']
    #Change name of labels
    if(label == 'LABEL_0'):
        label = 'Negative'
    elif(label == 'LABEL_1'):
        label = 'Neutral'
    else:
        label = 'Positive'

    return label, score 

#Siebert obtain labels and score
def siebert(data):
    specific_model = pipeline(model='siebert/sentiment-roberta-large-english')
    result = specific_model(data)
    label = result[0]['label']
    score = result[0]['score']

    return label, score

#Finetuned model obtain max and second highest labels and scores
def finetuned(data):
    #Access finetune model
    model_name = "dahongj/finetuned_toxictweets"
    tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
    model = DistilBertForSequenceClassification.from_pretrained(model_name)
    tokenized_text = tokenizer(data, return_tensors="pt")
    res = model(**tokenized_text)

    #Obtain score values
    mes = torch.sigmoid(res.logits)

    #Labels corresponding to the array index
    Dict = {0: "toxic", 1: "severe_toxic", 2: "obscene", 3: "threat", 4: "insult", 5: "identity_hate"}

    maxres, maxscore, sec, secscore = Dict[0], mes[0][0].item(), 0, 0

    #Search for second highest label
    for i in range(1,6):
        if mes[0][i].item() > secscore:
            sec = i
            secscore = mes[0][i].item()

    return maxres, maxscore, Dict[sec], secscore

#Run model based on selection box
def getSent(data, model):
    if(model == 'Bertweet'):
        label,score = bertweet(data)
        #Create visual columns
        col1, col2 = st.columns(2)
        col1.metric("Feeling",label,None)
        col2.metric("Score",score,None)
    elif(model == 'Roberta'):
        label,score = roberta(data)
        #Create visual columns
        col1, col2 = st.columns(2)
        col1.metric("Feeling",label,None)
        col2.metric("Score",score,None)
    elif(model == 'Siebert'):
        label,score = siebert(data)
        #Create visual columns
        col1, col2 = st.columns(2)
        col1.metric("Feeling",label,None)
        col2.metric("Score",score,None)
    elif(model == 'Finetuned'):
        label, score, sec, secsc = finetuned(data)
        #Create visual columns
        col1, col2 = st.columns(2)
        col3, col4 = st.columns(2)
        col1.metric("Highest",label,None)
        col2.metric("Score",score,None)
        col3.metric("Second Highest", sec, None)
        col4.metric("Score", secsc, None)

def rendPage():
    st.title("Sentiment Analysis")
    userText = st.text_area('User Input', "Hope you are having a great day!")
    st.text("")
    #Selection box
    type = st.selectbox(
        'Choose your model',
        ('Bertweet','Roberta','Siebert','Finetuned'))
    st.text("")

    #Create button
    if st.button('Calculate'):
        if(userText!="" and type != None):
            st.text("")
            getSent(userText,type)
    
    #Image for sample 10 texts
    image = Image.open("milestone3.jpg")
    st.image(image, caption="10 Example Texts")

rendPage()