AroojImtiaz commited on
Commit
520faf7
·
verified ·
1 Parent(s): 917736f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -0
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tracemalloc import stop
2
+ import streamlit as st
3
+ import numpy as np
4
+ import pandas as pd
5
+ import re
6
+ import string
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.stem.porter import PorterStemmer
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.tree import DecisionTreeRegressor
15
+ from sklearn.ensemble import RandomForestClassifier
16
+
17
+ nltk.download('punkt')
18
+ nltk.download('stopwords')
19
+ sw=nltk.corpus.stopwords.words("english")
20
+
21
+ rad=st.sidebar.radio("Navigation",["Home","Sarcasm Detection","Sentiment Analysis","Spam or Ham Detection","Stress Detection","Hate and Offensive Content Detection"])
22
+
23
+ #Home Page
24
+ if rad=="Home":
25
+ st.title("Complete Text Analysis App")
26
+ st.image("SEO-articles-V2_Text-Analysis.png")
27
+ st.text(" ")
28
+ st.text("The Following Text Analysis Options Are Available->")
29
+ st.text(" ")
30
+ st.text("1. Spam or Ham Detection")
31
+ st.text("2. Sentiment Analysis")
32
+ st.text("3. Stress Detection")
33
+ st.text("4. Hate and Offensive Content Detection")
34
+ st.text("5. Sarcasm Detection")
35
+
36
+ #function to clean and transform the user input which is in raw format
37
+ def transform_text(text):
38
+ text=text.lower()
39
+ text=nltk.word_tokenize(text)
40
+ y=[]
41
+ for i in text:
42
+ if i.isalnum():
43
+ y.append(i)
44
+ text=y[:]
45
+ y.clear()
46
+ for i in text:
47
+ if i not in stopwords.words('english') and i not in string.punctuation:
48
+ y.append(i)
49
+ text=y[:]
50
+ y.clear()
51
+ ps=PorterStemmer()
52
+ for i in text:
53
+ y.append(ps.stem(i))
54
+ return " ".join(y)
55
+
56
+ #Spam Detection Prediction
57
+ tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
58
+ def transform1(txt1):
59
+ txt2=tfidf1.fit_transform(txt1)
60
+ return txt2.toarray()
61
+
62
+ df1=pd.read_csv("Spam Detection.csv")
63
+ df1.columns=["Label","Text"]
64
+ x=transform1(df1["Text"])
65
+ y=df1["Label"]
66
+ x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
67
+ model1=LogisticRegression()
68
+ model1.fit(x_train1,y_train1)
69
+
70
+ #Spam Detection Analysis Page
71
+ if rad=="Spam or Ham Detection":
72
+ st.header("Detect Whether A Text Is Spam Or Ham??")
73
+ sent1=st.text_area("Enter The Text")
74
+ transformed_sent1=transform_text(sent1)
75
+ vector_sent1=tfidf1.transform([transformed_sent1])
76
+ prediction1=model1.predict(vector_sent1)[0]
77
+
78
+ if st.button("Predict"):
79
+ if prediction1=="spam":
80
+ st.warning("Spam Text!!")
81
+ elif prediction1=="ham":
82
+ st.success("Ham Text!!")
83
+
84
+ #Sentiment Analysis Prediction
85
+ tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
86
+ def transform2(txt1):
87
+ txt2=tfidf2.fit_transform(txt1)
88
+ return txt2.toarray()
89
+
90
+ df2=pd.read_csv("Sentiment Analysis.csv")
91
+ df2.columns=["Text","Label"]
92
+ x=transform2(df2["Text"])
93
+ y=df2["Label"]
94
+ x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
95
+ model2=LogisticRegression()
96
+ model2.fit(x_train2,y_train2)
97
+
98
+ #Sentiment Analysis Page
99
+ if rad=="Sentiment Analysis":
100
+ st.header("Detect The Sentiment Of The Text!!")
101
+ sent2=st.text_area("Enter The Text")
102
+ transformed_sent2=transform_text(sent2)
103
+ vector_sent2=tfidf2.transform([transformed_sent2])
104
+ prediction2=model2.predict(vector_sent2)[0]
105
+
106
+ if st.button("Predict"):
107
+ if prediction2==0:
108
+ st.warning("Negetive Text!!")
109
+ elif prediction2==1:
110
+ st.success("Positive Text!!")
111
+
112
+ #Stress Detection Prediction
113
+ tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
114
+ def transform3(txt1):
115
+ txt2=tfidf3.fit_transform(txt1)
116
+ return txt2.toarray()
117
+
118
+ df3=pd.read_csv("Stress Detection.csv")
119
+ df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
120
+ df3.columns=["Text","Sentiment","Stress Level"]
121
+ x=transform3(df3["Text"])
122
+ y=df3["Stress Level"].to_numpy()
123
+ x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
124
+ model3=DecisionTreeRegressor(max_leaf_nodes=2000)
125
+ model3.fit(x_train3,y_train3)
126
+
127
+ #Stress Detection Page
128
+ if rad=="Stress Detection":
129
+ st.header("Detect The Amount Of Stress In The Text!!")
130
+ sent3=st.text_area("Enter The Text")
131
+ transformed_sent3=transform_text(sent3)
132
+ vector_sent3=tfidf3.transform([transformed_sent3])
133
+ prediction3=model3.predict(vector_sent3)[0]
134
+
135
+ if st.button("Predict"):
136
+ if prediction3>=0:
137
+ st.warning("Stressful Text!!")
138
+ elif prediction3<0:
139
+ st.success("Not A Stressful Text!!")
140
+
141
+ #Hate & Offensive Content Prediction
142
+ tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
143
+ def transform4(txt1):
144
+ txt2=tfidf4.fit_transform(txt1)
145
+ return txt2.toarray()
146
+
147
+ df4=pd.read_csv("Hate Content Detection.csv")
148
+ df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
149
+ df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
150
+ x=transform4(df4["Text"])
151
+ y=df4["Class Level"]
152
+ x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
153
+ model4=RandomForestClassifier()
154
+ model4.fit(x_train4,y_train4)
155
+
156
+ #Hate & Offensive Content Page
157
+ if rad=="Hate and Offensive Content Detection":
158
+ st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
159
+ sent4=st.text_area("Enter The Text")
160
+ transformed_sent4=transform_text(sent4)
161
+ vector_sent4=tfidf4.transform([transformed_sent4])
162
+ prediction4=model4.predict(vector_sent4)[0]
163
+
164
+ if st.button("Predict"):
165
+ if prediction4==0:
166
+ st.exception("Highly Offensive Text!!")
167
+ elif prediction4==1:
168
+ st.warning("Offensive Text!!")
169
+ elif prediction4==2:
170
+ st.success("Non Offensive Text!!")
171
+
172
+ #Sarcasm Detection Prediction
173
+ tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
174
+ def transform5(txt1):
175
+ txt2=tfidf5.fit_transform(txt1)
176
+ return txt2.toarray()
177
+
178
+ df5=pd.read_csv("Sarcasm Detection.csv")
179
+ df5.columns=["Text","Label"]
180
+ x=transform5(df5["Text"])
181
+ y=df5["Label"]
182
+ x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
183
+ model5=LogisticRegression()
184
+ model5.fit(x_train5,y_train5)
185
+
186
+ #Sarcasm Detection Page
187
+ if rad=="Sarcasm Detection":
188
+ st.header("Detect Whether The Text Is Sarcastic Or Not!!")
189
+ sent5=st.text_area("Enter The Text")
190
+ transformed_sent5=transform_text(sent5)
191
+ vector_sent5=tfidf5.transform([transformed_sent5])
192
+ prediction5=model5.predict(vector_sent5)[0]
193
+
194
+ if st.button("Predict"):
195
+ if prediction5==1:
196
+ st.exception("Sarcastic Text!!")
197
+ elif prediction5==0:
198
+ st.success("Non Sarcastic Text!!")