Spaces:
Runtime error
Runtime error
edited code documentation
Browse files- app.py +18 -1
- finetune.py +2 -0
app.py
CHANGED
@@ -5,6 +5,7 @@ from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassific
|
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
|
|
|
8 |
def bertweet(data):
|
9 |
specific_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
|
10 |
result = specific_model(data)
|
@@ -13,12 +14,13 @@ def bertweet(data):
|
|
13 |
|
14 |
return label, score
|
15 |
|
|
|
16 |
def roberta(data):
|
17 |
specific_model = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")
|
18 |
result = specific_model(data)
|
19 |
label = result[0]['label']
|
20 |
score = result[0]['score']
|
21 |
-
|
22 |
if(label == 'LABEL_0'):
|
23 |
label = 'Negative'
|
24 |
elif(label == 'LABEL_1'):
|
@@ -28,6 +30,7 @@ def roberta(data):
|
|
28 |
|
29 |
return label, score
|
30 |
|
|
|
31 |
def siebert(data):
|
32 |
specific_model = pipeline(model='siebert/sentiment-roberta-large-english')
|
33 |
result = specific_model(data)
|
@@ -36,18 +39,24 @@ def siebert(data):
|
|
36 |
|
37 |
return label, score
|
38 |
|
|
|
39 |
def finetuned(data):
|
|
|
40 |
model_name = "dahongj/finetuned_toxictweets"
|
41 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
42 |
model = DistilBertForSequenceClassification.from_pretrained(model_name)
|
43 |
tokenized_text = tokenizer(data, return_tensors="pt")
|
44 |
res = model(**tokenized_text)
|
|
|
|
|
45 |
mes = torch.sigmoid(res.logits)
|
46 |
|
|
|
47 |
Dict = {0: "toxic", 1: "severe_toxic", 2: "obscene", 3: "threat", 4: "insult", 5: "identity_hate"}
|
48 |
|
49 |
maxres, maxscore, sec, secscore = Dict[0], mes[0][0].item(), 0, 0
|
50 |
|
|
|
51 |
for i in range(1,6):
|
52 |
if mes[0][i].item() > secscore:
|
53 |
sec = i
|
@@ -55,24 +64,29 @@ def finetuned(data):
|
|
55 |
|
56 |
return maxres, maxscore, Dict[sec], secscore
|
57 |
|
|
|
58 |
def getSent(data, model):
|
59 |
if(model == 'Bertweet'):
|
60 |
label,score = bertweet(data)
|
|
|
61 |
col1, col2 = st.columns(2)
|
62 |
col1.metric("Feeling",label,None)
|
63 |
col2.metric("Score",score,None)
|
64 |
elif(model == 'Roberta'):
|
65 |
label,score = roberta(data)
|
|
|
66 |
col1, col2 = st.columns(2)
|
67 |
col1.metric("Feeling",label,None)
|
68 |
col2.metric("Score",score,None)
|
69 |
elif(model == 'Siebert'):
|
70 |
label,score = siebert(data)
|
|
|
71 |
col1, col2 = st.columns(2)
|
72 |
col1.metric("Feeling",label,None)
|
73 |
col2.metric("Score",score,None)
|
74 |
elif(model == 'Finetuned'):
|
75 |
label, score, sec, secsc = finetuned(data)
|
|
|
76 |
col1, col2 = st.columns(2)
|
77 |
col3, col4 = st.columns(2)
|
78 |
col1.metric("Highest",label,None)
|
@@ -84,16 +98,19 @@ def rendPage():
|
|
84 |
st.title("Sentiment Analysis")
|
85 |
userText = st.text_area('User Input', "Hope you are having a great day!")
|
86 |
st.text("")
|
|
|
87 |
type = st.selectbox(
|
88 |
'Choose your model',
|
89 |
('Bertweet','Roberta','Siebert','Finetuned'))
|
90 |
st.text("")
|
91 |
|
|
|
92 |
if st.button('Calculate'):
|
93 |
if(userText!="" and type != None):
|
94 |
st.text("")
|
95 |
getSent(userText,type)
|
96 |
|
|
|
97 |
image = Image.open("milestone3.jpg")
|
98 |
st.image(image, caption="10 Example Texts")
|
99 |
|
|
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
|
8 |
+
#Bertweet obtain label and score
|
9 |
def bertweet(data):
|
10 |
specific_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
|
11 |
result = specific_model(data)
|
|
|
14 |
|
15 |
return label, score
|
16 |
|
17 |
+
#Roberta obtain labels and score
|
18 |
def roberta(data):
|
19 |
specific_model = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")
|
20 |
result = specific_model(data)
|
21 |
label = result[0]['label']
|
22 |
score = result[0]['score']
|
23 |
+
#Change name of labels
|
24 |
if(label == 'LABEL_0'):
|
25 |
label = 'Negative'
|
26 |
elif(label == 'LABEL_1'):
|
|
|
30 |
|
31 |
return label, score
|
32 |
|
33 |
+
#Siebert obtain labels and score
|
34 |
def siebert(data):
|
35 |
specific_model = pipeline(model='siebert/sentiment-roberta-large-english')
|
36 |
result = specific_model(data)
|
|
|
39 |
|
40 |
return label, score
|
41 |
|
42 |
+
#Finetuned model obtain max and second highest labels and scores
|
43 |
def finetuned(data):
|
44 |
+
#Access finetune model
|
45 |
model_name = "dahongj/finetuned_toxictweets"
|
46 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
47 |
model = DistilBertForSequenceClassification.from_pretrained(model_name)
|
48 |
tokenized_text = tokenizer(data, return_tensors="pt")
|
49 |
res = model(**tokenized_text)
|
50 |
+
|
51 |
+
#Obtain score values
|
52 |
mes = torch.sigmoid(res.logits)
|
53 |
|
54 |
+
#Labels corresponding to the array index
|
55 |
Dict = {0: "toxic", 1: "severe_toxic", 2: "obscene", 3: "threat", 4: "insult", 5: "identity_hate"}
|
56 |
|
57 |
maxres, maxscore, sec, secscore = Dict[0], mes[0][0].item(), 0, 0
|
58 |
|
59 |
+
#Search for second highest label
|
60 |
for i in range(1,6):
|
61 |
if mes[0][i].item() > secscore:
|
62 |
sec = i
|
|
|
64 |
|
65 |
return maxres, maxscore, Dict[sec], secscore
|
66 |
|
67 |
+
#Run model based on selection box
|
68 |
def getSent(data, model):
|
69 |
if(model == 'Bertweet'):
|
70 |
label,score = bertweet(data)
|
71 |
+
#Create visual columns
|
72 |
col1, col2 = st.columns(2)
|
73 |
col1.metric("Feeling",label,None)
|
74 |
col2.metric("Score",score,None)
|
75 |
elif(model == 'Roberta'):
|
76 |
label,score = roberta(data)
|
77 |
+
#Create visual columns
|
78 |
col1, col2 = st.columns(2)
|
79 |
col1.metric("Feeling",label,None)
|
80 |
col2.metric("Score",score,None)
|
81 |
elif(model == 'Siebert'):
|
82 |
label,score = siebert(data)
|
83 |
+
#Create visual columns
|
84 |
col1, col2 = st.columns(2)
|
85 |
col1.metric("Feeling",label,None)
|
86 |
col2.metric("Score",score,None)
|
87 |
elif(model == 'Finetuned'):
|
88 |
label, score, sec, secsc = finetuned(data)
|
89 |
+
#Create visual columns
|
90 |
col1, col2 = st.columns(2)
|
91 |
col3, col4 = st.columns(2)
|
92 |
col1.metric("Highest",label,None)
|
|
|
98 |
st.title("Sentiment Analysis")
|
99 |
userText = st.text_area('User Input', "Hope you are having a great day!")
|
100 |
st.text("")
|
101 |
+
#Selection box
|
102 |
type = st.selectbox(
|
103 |
'Choose your model',
|
104 |
('Bertweet','Roberta','Siebert','Finetuned'))
|
105 |
st.text("")
|
106 |
|
107 |
+
#Create button
|
108 |
if st.button('Calculate'):
|
109 |
if(userText!="" and type != None):
|
110 |
st.text("")
|
111 |
getSent(userText,type)
|
112 |
|
113 |
+
#Image for sample 10 texts
|
114 |
image = Image.open("milestone3.jpg")
|
115 |
st.image(image, caption="10 Example Texts")
|
116 |
|
finetune.py
CHANGED
@@ -23,9 +23,11 @@ class TextDataset(Dataset):
|
|
23 |
self.labels = labels
|
24 |
|
25 |
def __getitem__(self,idx):
|
|
|
26 |
encodings = tokenizer(self.texts[idx], truncation=True, padding="max_length")
|
27 |
item = {key: torch.tensor(val) for key, val in encodings.items()}
|
28 |
item['labels'] = torch.tensor(self.labels[idx],dtype=torch.float32)
|
|
|
29 |
del encodings
|
30 |
return item
|
31 |
|
|
|
23 |
self.labels = labels
|
24 |
|
25 |
def __getitem__(self,idx):
|
26 |
+
#Create tokenizer
|
27 |
encodings = tokenizer(self.texts[idx], truncation=True, padding="max_length")
|
28 |
item = {key: torch.tensor(val) for key, val in encodings.items()}
|
29 |
item['labels'] = torch.tensor(self.labels[idx],dtype=torch.float32)
|
30 |
+
#Remove encoding to prevent memory leak
|
31 |
del encodings
|
32 |
return item
|
33 |
|