Spaces:
Runtime error
Runtime error
Commit
·
d66e35f
1
Parent(s):
8b09eed
Delete app
Browse files
app
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
Created on Mon Jun 6 20:56:08 2022
|
4 |
-
|
5 |
-
@author: User
|
6 |
-
"""
|
7 |
-
import nltk
|
8 |
-
|
9 |
-
nltk.download('punkt')
|
10 |
-
nltk.download('stopwords')
|
11 |
-
nltk.download('wordnet')
|
12 |
-
nltk.download('omw-1.4')
|
13 |
-
|
14 |
-
# importing relevant python packages
|
15 |
-
import streamlit as st
|
16 |
-
import joblib
|
17 |
-
# preprocessing
|
18 |
-
import re
|
19 |
-
import string
|
20 |
-
import nltk
|
21 |
-
from nltk.corpus import stopwords
|
22 |
-
from nltk.stem import WordNetLemmatizer
|
23 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
24 |
-
# modeling
|
25 |
-
|
26 |
-
# creating page sections
|
27 |
-
site_header = st.container()
|
28 |
-
business_context = st.container()
|
29 |
-
data_desc = st.container()
|
30 |
-
performance = st.container()
|
31 |
-
tweet_input = st.container()
|
32 |
-
model_results = st.container()
|
33 |
-
sentiment_analysis = st.container()
|
34 |
-
contact = st.container()
|
35 |
-
|
36 |
-
with site_header:
|
37 |
-
st.title('Toxic Comment Detection')
|
38 |
-
|
39 |
-
|
40 |
-
with tweet_input:
|
41 |
-
st.header('Is Your Tweet Considered Hate Speech?')
|
42 |
-
st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
|
43 |
-
# user input here
|
44 |
-
user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
|
45 |
-
|
46 |
-
with model_results:
|
47 |
-
st.subheader('Prediction:')
|
48 |
-
if user_text:
|
49 |
-
# processing user_text
|
50 |
-
# removing punctuation
|
51 |
-
user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
|
52 |
-
# tokenizing
|
53 |
-
stop_words = set(stopwords.words('english'))
|
54 |
-
tokens = nltk.word_tokenize(user_text)
|
55 |
-
# removing stop words
|
56 |
-
stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
|
57 |
-
# taking root word
|
58 |
-
lemmatizer = WordNetLemmatizer()
|
59 |
-
lemmatized_output = []
|
60 |
-
for word in stopwords_removed:
|
61 |
-
lemmatized_output.append(lemmatizer.lemmatize(word))
|
62 |
-
|
63 |
-
# instantiating count vectorizor
|
64 |
-
tfidf = TfidfVectorizer(stop_words=stop_words)
|
65 |
-
X_train = joblib.load(open('X_train.pickel', 'rb'))
|
66 |
-
X_test = lemmatized_output
|
67 |
-
X_train_count = tfidf.fit_transform(X_train)
|
68 |
-
X_test_count = tfidf.transform(X_test)
|
69 |
-
|
70 |
-
# loading in model
|
71 |
-
final_model = joblib.load(open('final_bayes.pickle', 'rb'))
|
72 |
-
|
73 |
-
# apply model to make predictions
|
74 |
-
prediction = final_model.predict(X_test_count[0])
|
75 |
-
|
76 |
-
if prediction == 0:
|
77 |
-
st.subheader('**Not Hate Speech**')
|
78 |
-
else:
|
79 |
-
st.subheader('**Hate Speech**')
|
80 |
-
st.text('')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|