dahongj commited on
Commit
2f1aee4
1 Parent(s): 724abf8

edited code documentation

Browse files
Files changed (2) hide show
  1. app.py +18 -1
  2. finetune.py +2 -0
app.py CHANGED
@@ -5,6 +5,7 @@ from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassific
5
  from PIL import Image
6
  import torch
7
 
 
8
  def bertweet(data):
9
  specific_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
10
  result = specific_model(data)
@@ -13,12 +14,13 @@ def bertweet(data):
13
 
14
  return label, score
15
 
 
16
  def roberta(data):
17
  specific_model = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")
18
  result = specific_model(data)
19
  label = result[0]['label']
20
  score = result[0]['score']
21
-
22
  if(label == 'LABEL_0'):
23
  label = 'Negative'
24
  elif(label == 'LABEL_1'):
@@ -28,6 +30,7 @@ def roberta(data):
28
 
29
  return label, score
30
 
 
31
  def siebert(data):
32
  specific_model = pipeline(model='siebert/sentiment-roberta-large-english')
33
  result = specific_model(data)
@@ -36,18 +39,24 @@ def siebert(data):
36
 
37
  return label, score
38
 
 
39
  def finetuned(data):
 
40
  model_name = "dahongj/finetuned_toxictweets"
41
  tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
42
  model = DistilBertForSequenceClassification.from_pretrained(model_name)
43
  tokenized_text = tokenizer(data, return_tensors="pt")
44
  res = model(**tokenized_text)
 
 
45
  mes = torch.sigmoid(res.logits)
46
 
 
47
  Dict = {0: "toxic", 1: "severe_toxic", 2: "obscene", 3: "threat", 4: "insult", 5: "identity_hate"}
48
 
49
  maxres, maxscore, sec, secscore = Dict[0], mes[0][0].item(), 0, 0
50
 
 
51
  for i in range(1,6):
52
  if mes[0][i].item() > secscore:
53
  sec = i
@@ -55,24 +64,29 @@ def finetuned(data):
55
 
56
  return maxres, maxscore, Dict[sec], secscore
57
 
 
58
  def getSent(data, model):
59
  if(model == 'Bertweet'):
60
  label,score = bertweet(data)
 
61
  col1, col2 = st.columns(2)
62
  col1.metric("Feeling",label,None)
63
  col2.metric("Score",score,None)
64
  elif(model == 'Roberta'):
65
  label,score = roberta(data)
 
66
  col1, col2 = st.columns(2)
67
  col1.metric("Feeling",label,None)
68
  col2.metric("Score",score,None)
69
  elif(model == 'Siebert'):
70
  label,score = siebert(data)
 
71
  col1, col2 = st.columns(2)
72
  col1.metric("Feeling",label,None)
73
  col2.metric("Score",score,None)
74
  elif(model == 'Finetuned'):
75
  label, score, sec, secsc = finetuned(data)
 
76
  col1, col2 = st.columns(2)
77
  col3, col4 = st.columns(2)
78
  col1.metric("Highest",label,None)
@@ -84,16 +98,19 @@ def rendPage():
84
  st.title("Sentiment Analysis")
85
  userText = st.text_area('User Input', "Hope you are having a great day!")
86
  st.text("")
 
87
  type = st.selectbox(
88
  'Choose your model',
89
  ('Bertweet','Roberta','Siebert','Finetuned'))
90
  st.text("")
91
 
 
92
  if st.button('Calculate'):
93
  if(userText!="" and type != None):
94
  st.text("")
95
  getSent(userText,type)
96
 
 
97
  image = Image.open("milestone3.jpg")
98
  st.image(image, caption="10 Example Texts")
99
 
 
5
  from PIL import Image
6
  import torch
7
 
8
+ #Bertweet obtain label and score
9
  def bertweet(data):
10
  specific_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
11
  result = specific_model(data)
 
14
 
15
  return label, score
16
 
17
+ #Roberta obtain labels and score
18
  def roberta(data):
19
  specific_model = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")
20
  result = specific_model(data)
21
  label = result[0]['label']
22
  score = result[0]['score']
23
+ #Change name of labels
24
  if(label == 'LABEL_0'):
25
  label = 'Negative'
26
  elif(label == 'LABEL_1'):
 
30
 
31
  return label, score
32
 
33
+ #Siebert obtain labels and score
34
  def siebert(data):
35
  specific_model = pipeline(model='siebert/sentiment-roberta-large-english')
36
  result = specific_model(data)
 
39
 
40
  return label, score
41
 
42
+ #Finetuned model obtain max and second highest labels and scores
43
  def finetuned(data):
44
+ #Access finetune model
45
  model_name = "dahongj/finetuned_toxictweets"
46
  tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
47
  model = DistilBertForSequenceClassification.from_pretrained(model_name)
48
  tokenized_text = tokenizer(data, return_tensors="pt")
49
  res = model(**tokenized_text)
50
+
51
+ #Obtain score values
52
  mes = torch.sigmoid(res.logits)
53
 
54
+ #Labels corresponding to the array index
55
  Dict = {0: "toxic", 1: "severe_toxic", 2: "obscene", 3: "threat", 4: "insult", 5: "identity_hate"}
56
 
57
  maxres, maxscore, sec, secscore = Dict[0], mes[0][0].item(), 0, 0
58
 
59
+ #Search for second highest label
60
  for i in range(1,6):
61
  if mes[0][i].item() > secscore:
62
  sec = i
 
64
 
65
  return maxres, maxscore, Dict[sec], secscore
66
 
67
+ #Run model based on selection box
68
  def getSent(data, model):
69
  if(model == 'Bertweet'):
70
  label,score = bertweet(data)
71
+ #Create visual columns
72
  col1, col2 = st.columns(2)
73
  col1.metric("Feeling",label,None)
74
  col2.metric("Score",score,None)
75
  elif(model == 'Roberta'):
76
  label,score = roberta(data)
77
+ #Create visual columns
78
  col1, col2 = st.columns(2)
79
  col1.metric("Feeling",label,None)
80
  col2.metric("Score",score,None)
81
  elif(model == 'Siebert'):
82
  label,score = siebert(data)
83
+ #Create visual columns
84
  col1, col2 = st.columns(2)
85
  col1.metric("Feeling",label,None)
86
  col2.metric("Score",score,None)
87
  elif(model == 'Finetuned'):
88
  label, score, sec, secsc = finetuned(data)
89
+ #Create visual columns
90
  col1, col2 = st.columns(2)
91
  col3, col4 = st.columns(2)
92
  col1.metric("Highest",label,None)
 
98
  st.title("Sentiment Analysis")
99
  userText = st.text_area('User Input', "Hope you are having a great day!")
100
  st.text("")
101
+ #Selection box
102
  type = st.selectbox(
103
  'Choose your model',
104
  ('Bertweet','Roberta','Siebert','Finetuned'))
105
  st.text("")
106
 
107
+ #Create button
108
  if st.button('Calculate'):
109
  if(userText!="" and type != None):
110
  st.text("")
111
  getSent(userText,type)
112
 
113
+ #Image for sample 10 texts
114
  image = Image.open("milestone3.jpg")
115
  st.image(image, caption="10 Example Texts")
116
 
finetune.py CHANGED
@@ -23,9 +23,11 @@ class TextDataset(Dataset):
23
  self.labels = labels
24
 
25
  def __getitem__(self,idx):
 
26
  encodings = tokenizer(self.texts[idx], truncation=True, padding="max_length")
27
  item = {key: torch.tensor(val) for key, val in encodings.items()}
28
  item['labels'] = torch.tensor(self.labels[idx],dtype=torch.float32)
 
29
  del encodings
30
  return item
31
 
 
23
  self.labels = labels
24
 
25
  def __getitem__(self,idx):
26
+ #Create tokenizer
27
  encodings = tokenizer(self.texts[idx], truncation=True, padding="max_length")
28
  item = {key: torch.tensor(val) for key, val in encodings.items()}
29
  item['labels'] = torch.tensor(self.labels[idx],dtype=torch.float32)
30
+ #Remove encoding to prevent memory leak
31
  del encodings
32
  return item
33