Committed required files
Browse files- app.py +271 -0
- base_model.py +37 -0
- sentiment_model.py +38 -0
app.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from sentiment_model import PretrainedSentimentAnalyzer
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
import pandas as pd
|
7 |
+
import re
|
8 |
+
|
9 |
+
|
10 |
+
# Create an instance of the PretrainedSentimentAnalyzer class
|
11 |
+
analyzer = PretrainedSentimentAnalyzer(None, None, None, None)
|
12 |
+
# Define the Streamlit app
|
13 |
+
def main():
|
14 |
+
st.title("Early Depression Detection System")
|
15 |
+
|
16 |
+
# Get user input
|
17 |
+
option = st.selectbox("Select an option:", ("Enter a sentence", "Upload a CSV file"))
|
18 |
+
|
19 |
+
if option == "Enter a sentence":
|
20 |
+
text = st.text_input("Enter a sentence:")
|
21 |
+
|
22 |
+
# Perform sentiment analysis
|
23 |
+
if st.button("Analyze"):
|
24 |
+
if not text or len(text) == 1:
|
25 |
+
st.write("Enter valid text")
|
26 |
+
else:
|
27 |
+
sentiment = analyzer.predict([text], inverse_transform=True)[0]
|
28 |
+
st.write("Sentiment Analysis Results:")
|
29 |
+
st.write("Sentiment:", sentiment['label'])
|
30 |
+
#st.write("Score:", sentiment['score'])
|
31 |
+
if sentiment['label'] == 'negative':
|
32 |
+
if scan(text) == "Depression Detected":
|
33 |
+
st.write("Depression Detected")
|
34 |
+
helplines_message = """
|
35 |
+
If you or your loved ones are feeling depressed, please reach out to these helplines:
|
36 |
+
|
37 |
+
- **Beyond Blue**: 1300 659 467
|
38 |
+
- **Mental Health Emergency**: 13 14 65
|
39 |
+
"""
|
40 |
+
|
41 |
+
st.error(helplines_message)
|
42 |
+
else:
|
43 |
+
st.success("No Depression Detected")
|
44 |
+
else:
|
45 |
+
st.success("No Depression Detected")
|
46 |
+
elif option == "Upload a CSV file":
|
47 |
+
file = st.file_uploader("Upload a CSV file:")
|
48 |
+
if file is not None:
|
49 |
+
# Check file format
|
50 |
+
if file.name.endswith('.csv'):
|
51 |
+
# Perform sentiment analysis on the uploaded file
|
52 |
+
df = pd.read_csv(file)
|
53 |
+
if st.button("Analyze"):
|
54 |
+
# with st.spinner("Predicting..."):
|
55 |
+
# column = df.columns[1]
|
56 |
+
# data = df[column].astype(str).tolist()
|
57 |
+
# sentiments = analyzer.predict(data, inverse_transform=True)
|
58 |
+
# df['sentiment'] = [s['label'] for s in sentiments]
|
59 |
+
spinner_placeholder = st.markdown("""
|
60 |
+
<style>
|
61 |
+
.spinner-container {
|
62 |
+
display: flex;
|
63 |
+
justify-content: center;
|
64 |
+
align-items: center;
|
65 |
+
flex-direction: column;
|
66 |
+
}
|
67 |
+
|
68 |
+
.spinner {
|
69 |
+
border: 16px solid #f3f3f3;
|
70 |
+
border-radius: 50%;
|
71 |
+
border-top: 16px solid #3498db;
|
72 |
+
width: 100px;
|
73 |
+
height: 100px;
|
74 |
+
-webkit-animation: spin 2s linear infinite;
|
75 |
+
animation: spin 2s linear infinite;
|
76 |
+
}
|
77 |
+
|
78 |
+
@-webkit-keyframes spin {
|
79 |
+
0% { -webkit-transform: rotate(0deg); }
|
80 |
+
100% { -webkit-transform: rotate(360deg); }
|
81 |
+
}
|
82 |
+
|
83 |
+
@keyframes spin {
|
84 |
+
0% { transform: rotate(0deg); }
|
85 |
+
100% { transform: rotate(360deg); }
|
86 |
+
}
|
87 |
+
|
88 |
+
</style>
|
89 |
+
<div class="spinner-container">
|
90 |
+
<div class="spinner"></div>
|
91 |
+
<p>Predicting...</p>
|
92 |
+
</div>
|
93 |
+
""", unsafe_allow_html=True)
|
94 |
+
|
95 |
+
column = df.columns[1]
|
96 |
+
data = df[column].astype(str).tolist()
|
97 |
+
sentiments = analyzer.predict(data, inverse_transform=True)
|
98 |
+
df['Prediction'] = [s['label'] for s in sentiments]
|
99 |
+
|
100 |
+
# Clear the spinner
|
101 |
+
spinner_placeholder.empty()
|
102 |
+
st.write("Sentiment Analysis Results:")
|
103 |
+
st.write(df)
|
104 |
+
|
105 |
+
sentiment_counts = df['Prediction'].value_counts()
|
106 |
+
sentiment_dict = {'Positive': 0, 'Neutral': 0, 'Negative': 0}
|
107 |
+
|
108 |
+
for sentiment in sentiment_counts.index:
|
109 |
+
if sentiment == 'positive':
|
110 |
+
sentiment_dict['Positive'] += sentiment_counts[sentiment]
|
111 |
+
elif sentiment == 'neutral':
|
112 |
+
sentiment_dict['Neutral'] += sentiment_counts[sentiment]
|
113 |
+
elif sentiment == 'negative':
|
114 |
+
sentiment_dict['Negative'] += sentiment_counts[sentiment]
|
115 |
+
st.write("Sentiment Counts:")
|
116 |
+
for sentiment, count in sentiment_dict.items():
|
117 |
+
st.write(sentiment + ":", count)
|
118 |
+
|
119 |
+
labels = list(sentiment_dict.keys())
|
120 |
+
sizes = list(sentiment_dict.values())
|
121 |
+
colors = ['green', 'white', 'red'] # colors for Positive, Neutral, Negative
|
122 |
+
|
123 |
+
fig = go.Figure(data=[go.Pie(labels=labels, values=sizes,hole=.2, marker=dict(colors=colors))])
|
124 |
+
|
125 |
+
fig.update_layout(
|
126 |
+
title="Sentiment Analysis",
|
127 |
+
showlegend=True,
|
128 |
+
legend_title="Sentiment",
|
129 |
+
uniformtext_minsize=12,
|
130 |
+
uniformtext_mode='hide'
|
131 |
+
)
|
132 |
+
|
133 |
+
st.plotly_chart(fig)
|
134 |
+
negative_data = df[df['Prediction'] == 'negative']
|
135 |
+
# Create a df that only has the tweets column
|
136 |
+
tweets_data = negative_data['Tweets']
|
137 |
+
depressed_df=scan(tweets_data)
|
138 |
+
if depressed_df.empty:
|
139 |
+
st.success("None of the Comments in the Dataset are depressed")
|
140 |
+
else:
|
141 |
+
st.write("Depressed Comments")
|
142 |
+
st.write(depressed_df)
|
143 |
+
helplines_message = """
|
144 |
+
I found these tweets to be depressed, please reach out to the below helplines for help:
|
145 |
+
|
146 |
+
- **Beyond Blue**: 1300 659 467
|
147 |
+
- **Mental Health Emergency**: 13 14 65
|
148 |
+
"""
|
149 |
+
st.error(helplines_message)
|
150 |
+
|
151 |
+
def scan(tweets_data):
|
152 |
+
|
153 |
+
contractions_dict = { "ain't": "are not","'s":" is","aren't": "are not",
|
154 |
+
"can't": "cannot","can't've": "cannot have",
|
155 |
+
"'cause": "because","could've": "could have","couldn't": "could not",
|
156 |
+
"couldn't've": "could not have", "didn't": "did not","doesn't": "does not",
|
157 |
+
"don't": "do not","hadn't": "had not","hadn't've": "had not have",
|
158 |
+
"hasn't": "has not","haven't": "have not","he'd": "he would",
|
159 |
+
"he'd've": "he would have","he'll": "he will", "he'll've": "he will have",
|
160 |
+
"how'd": "how did","how'd'y": "how do you","how'll": "how will",
|
161 |
+
"I'd": "I would", "I'd've": "I would have","I'll": "I will",
|
162 |
+
"I'll've": "I will have","I'm": "I am","I've": "I have", "isn't": "is not",
|
163 |
+
"it'd": "it would","it'd've": "it would have","it'll": "it will",
|
164 |
+
"it'll've": "it will have", "let's": "let us","ma'am": "madam",
|
165 |
+
"mayn't": "may not","might've": "might have","mightn't": "might not",
|
166 |
+
"mightn't've": "might not have","must've": "must have","mustn't": "must not",
|
167 |
+
"mustn't've": "must not have", "needn't": "need not",
|
168 |
+
"needn't've": "need not have","o'clock": "of the clock","oughtn't": "ought not",
|
169 |
+
"oughtn't've": "ought not have","shan't": "shall not","sha'n't": "shall not",
|
170 |
+
"shan't've": "shall not have","she'd": "she would","she'd've": "she would have",
|
171 |
+
"she'll": "she will", "she'll've": "she will have","should've": "should have",
|
172 |
+
"shouldn't": "should not", "shouldn't've": "should not have","so've": "so have",
|
173 |
+
"that'd": "that would","that'd've": "that would have", "there'd": "there would",
|
174 |
+
"there'd've": "there would have", "they'd": "they would",
|
175 |
+
"they'd've": "they would have","they'll": "they will",
|
176 |
+
"they'll've": "they will have", "they're": "they are","they've": "they have",
|
177 |
+
"to've": "to have","wasn't": "was not","we'd": "we would",
|
178 |
+
"we'd've": "we would have","we'll": "we will","we'll've": "we will have",
|
179 |
+
"we're": "we are","we've": "we have", "weren't": "were not","what'll": "what will",
|
180 |
+
"what'll've": "what will have","what're": "what are", "what've": "what have",
|
181 |
+
"when've": "when have","where'd": "where did", "where've": "where have",
|
182 |
+
"who'll": "who will","who'll've": "who will have","who've": "who have",
|
183 |
+
"why've": "why have","will've": "will have","won't": "will not",
|
184 |
+
"won't've": "will not have", "would've": "would have","wouldn't": "would not",
|
185 |
+
"wouldn't've": "would not have","y'all": "you all", "y'all'd": "you all would",
|
186 |
+
"y'all'd've": "you all would have","y'all're": "you all are",
|
187 |
+
"y'all've": "you all have","you'd": "you would","you'd've": "you would have",
|
188 |
+
"you'll": "you will","you'll've": "you will have", "you're": "you are",
|
189 |
+
"you've": "you have"}
|
190 |
+
absolute_words = {
|
191 |
+
"I", "feeling", "feels", "always", "never", "completely", "totally",
|
192 |
+
"absolutely", "must", "should", "all", "every", "none", "nothing",
|
193 |
+
"everyone", "everything", "only", "impossible", "forever", "can not",
|
194 |
+
"cannot", "won’t", "will not", "no one", "no-one", "every time",
|
195 |
+
"low", "everytime", "difficult", "difficulty", "nightmare", "bored",
|
196 |
+
"disaster", "irritate", "broken", "hurt", "lost", "love", "alone",
|
197 |
+
"stupid", "disgusted", "stress", "hostile", "reserved", "danger",
|
198 |
+
"funeral", "respect", "vomit", "sick", "phase", "suffer", "suffering",
|
199 |
+
"betray", "poster", "grief", "safe", "home", "treat",
|
200 |
+
"confident", "peace", "lucky", "win",
|
201 |
+
"proud", "beautiful","please","pleaded", "success", "laughing","laugh", "party",
|
202 |
+
"key", "justice", "sorry", "apologize", "kill", "help", "myself",
|
203 |
+
"depress", "depressed", "hopeless", "helpless", "worthless",
|
204 |
+
"guilty", "ashamed", "miserable", "regret", "despair", "empty",
|
205 |
+
"numb", "withdrawn", "lonely", "fatigued","fatigue","cry", "exhausted", "overwhelmed",
|
206 |
+
"suicidal", "self-harm", "panic", "anxiety", "trauma", "ptsd",
|
207 |
+
"bipolar", "disorder", "breakup", "divorce", "loss", "unemployed",
|
208 |
+
"fired", "bullied", "abused", "neglected", "failure", "inferior",
|
209 |
+
"insecure", "ugly", "unloved", "unwanted", "loser", "pessimistic",
|
210 |
+
"pointless", "meaningless", "purposeless", "directionless",
|
211 |
+
"hopelessness", "uninterested", "disinterested", "unmotivated",
|
212 |
+
"apathetic", "indifferent", "worthlessness", "guilt", "shame","crying","cry","cried","sadness","sad","sorrow"}
|
213 |
+
|
214 |
+
|
215 |
+
# Regular expression for finding contractions
|
216 |
+
contractions_re=re.compile('(%s)' % '|'.join(contractions_dict.keys()))
|
217 |
+
|
218 |
+
# Function for expanding contractions
|
219 |
+
def expand_contractions(text,contractions_dict=contractions_dict):
|
220 |
+
def replace(match):
|
221 |
+
return contractions_dict[match.group(0)]
|
222 |
+
return contractions_re.sub(replace, text)
|
223 |
+
|
224 |
+
# Preprocessing function
|
225 |
+
def preprocess_text(text):
|
226 |
+
# Convert the input to a string
|
227 |
+
text = str(text)
|
228 |
+
# Expand contractions
|
229 |
+
text = expand_contractions(text)
|
230 |
+
# Remove punctuations
|
231 |
+
text = re.sub(r'[^\w\s]', '', text)
|
232 |
+
# Convert to lowercase
|
233 |
+
text = text.lower()
|
234 |
+
# If the resulting text is empty, return None
|
235 |
+
if text == "":
|
236 |
+
return None
|
237 |
+
return text
|
238 |
+
|
239 |
+
if isinstance(tweets_data, str):
|
240 |
+
preprocessed_tweets=preprocess_text(tweets_data)
|
241 |
+
count = 0
|
242 |
+
for word in absolute_words:
|
243 |
+
if word in preprocessed_tweets:
|
244 |
+
count += 1
|
245 |
+
if count >= 2:
|
246 |
+
return("Depression Detected")
|
247 |
+
break
|
248 |
+
if count<2:
|
249 |
+
return("No Depression Detected")
|
250 |
+
|
251 |
+
else:
|
252 |
+
# Apply the preprocess_text function to the tweet df
|
253 |
+
preprocessed_tweets = tweets_data.apply(preprocess_text)
|
254 |
+
print(preprocessed_tweets)
|
255 |
+
|
256 |
+
# Check if the preprocessed tweets have at least 2 of the absolute words
|
257 |
+
Early_dep = pd.DataFrame(columns=['Tweets'])
|
258 |
+
for tweet in preprocessed_tweets:
|
259 |
+
count = 0
|
260 |
+
for word in absolute_words:
|
261 |
+
if word in tweet:
|
262 |
+
count += 1
|
263 |
+
if count >= 2:
|
264 |
+
Early_dep = pd.concat([Early_dep, pd.DataFrame({'Tweets': [tweet]})], ignore_index=True)
|
265 |
+
break
|
266 |
+
return(Early_dep)
|
267 |
+
|
268 |
+
|
269 |
+
# Run the app
|
270 |
+
if __name__ == "__main__":
|
271 |
+
main()
|
base_model.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, classification_report
|
2 |
+
from sklearn.preprocessing import LabelEncoder
|
3 |
+
|
4 |
+
class TextClassifier:
|
5 |
+
|
6 |
+
def __init__(self, train_features, train_targets, test_features, test_targets):
|
7 |
+
self.train_features = train_features
|
8 |
+
self.train_targets = train_targets
|
9 |
+
self.test_features = test_features
|
10 |
+
self.test_targets = test_targets
|
11 |
+
|
12 |
+
self.model = None
|
13 |
+
self.classification_report = None
|
14 |
+
self.accuracy = None
|
15 |
+
self.precision = None
|
16 |
+
self.recall = None
|
17 |
+
self.f1 = None
|
18 |
+
|
19 |
+
def train(self) -> None:
|
20 |
+
raise NotImplementedError
|
21 |
+
|
22 |
+
def predict(self, text_samples:list, inverse_transform:bool=True) -> list:
|
23 |
+
raise NotImplementedError
|
24 |
+
|
25 |
+
def evaluate(self) -> dict:
|
26 |
+
|
27 |
+
predictions = self.predict(self.test_features, inverse_transform=False)
|
28 |
+
|
29 |
+
self.accuracy = accuracy_score(self.test_targets, predictions)
|
30 |
+
self.precision = precision_score(self.test_targets, predictions, average='weighted')
|
31 |
+
self.recall = recall_score(self.test_targets, predictions, average='weighted')
|
32 |
+
self.f1 = f1_score(self.test_targets, predictions, average='weighted')
|
33 |
+
self.classification_report = classification_report(self.test_targets, predictions)
|
34 |
+
|
35 |
+
return {'accuracy' : self.accuracy,
|
36 |
+
'precision' : self.precision,
|
37 |
+
'recall' : self.recall}
|
sentiment_model.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from base_model import TextClassifier
|
2 |
+
import torch
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
class PretrainedSentimentAnalyzer(TextClassifier):
|
6 |
+
|
7 |
+
def __init__(self, train_features, train_targets, test_features, test_targets, min_threshold=0.7):
|
8 |
+
|
9 |
+
super().__init__(train_features, train_targets, test_features, test_targets)
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
+
|
15 |
+
self.model = pipeline("text-classification",
|
16 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
17 |
+
device=device)
|
18 |
+
|
19 |
+
self.prediction_map = {'positive' : 'positive',
|
20 |
+
'negative' : 'negative',
|
21 |
+
'neutral' : 'neutral'}
|
22 |
+
|
23 |
+
self.threshold = min_threshold
|
24 |
+
|
25 |
+
def train(self):
|
26 |
+
pass
|
27 |
+
|
28 |
+
def predict(self, text_samples:list, inverse_transform:bool, proba:bool=True) -> list:
|
29 |
+
|
30 |
+
predictions = self.model(text_samples, batch_size=128)
|
31 |
+
if proba:
|
32 |
+
return predictions
|
33 |
+
|
34 |
+
predictions = [self.prediction_map[prediction['label']] if prediction['score'] > self.threshold else 'neutral'
|
35 |
+
for prediction in predictions]
|
36 |
+
|
37 |
+
return predictions
|
38 |
+
|