azizbarank commited on
Commit
8b09eed
·
1 Parent(s): 9febd82

Create app

Browse files
Files changed (1) hide show
  1. app +80 -0
app ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Jun 6 20:56:08 2022
4
+
5
+ @author: User
6
+ """
7
+ import nltk
8
+
9
+ nltk.download('punkt')
10
+ nltk.download('stopwords')
11
+ nltk.download('wordnet')
12
+ nltk.download('omw-1.4')
13
+
14
+ # importing relevant python packages
15
+ import streamlit as st
16
+ import joblib
17
+ # preprocessing
18
+ import re
19
+ import string
20
+ import nltk
21
+ from nltk.corpus import stopwords
22
+ from nltk.stem import WordNetLemmatizer
23
+ from sklearn.feature_extraction.text import TfidfVectorizer
24
+ # modeling
25
+
26
+ # creating page sections
27
+ site_header = st.container()
28
+ business_context = st.container()
29
+ data_desc = st.container()
30
+ performance = st.container()
31
+ tweet_input = st.container()
32
+ model_results = st.container()
33
+ sentiment_analysis = st.container()
34
+ contact = st.container()
35
+
36
+ with site_header:
37
+ st.title('Toxic Comment Detection')
38
+
39
+
40
+ with tweet_input:
41
+ st.header('Is Your Tweet Considered Hate Speech?')
42
+ st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
43
+ # user input here
44
+ user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
45
+
46
+ with model_results:
47
+ st.subheader('Prediction:')
48
+ if user_text:
49
+ # processing user_text
50
+ # removing punctuation
51
+ user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
52
+ # tokenizing
53
+ stop_words = set(stopwords.words('english'))
54
+ tokens = nltk.word_tokenize(user_text)
55
+ # removing stop words
56
+ stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
57
+ # taking root word
58
+ lemmatizer = WordNetLemmatizer()
59
+ lemmatized_output = []
60
+ for word in stopwords_removed:
61
+ lemmatized_output.append(lemmatizer.lemmatize(word))
62
+
63
+ # instantiating count vectorizor
64
+ tfidf = TfidfVectorizer(stop_words=stop_words)
65
+ X_train = joblib.load(open('X_train.pickel', 'rb'))
66
+ X_test = lemmatized_output
67
+ X_train_count = tfidf.fit_transform(X_train)
68
+ X_test_count = tfidf.transform(X_test)
69
+
70
+ # loading in model
71
+ final_model = joblib.load(open('final_bayes.pickle', 'rb'))
72
+
73
+ # apply model to make predictions
74
+ prediction = final_model.predict(X_test_count[0])
75
+
76
+ if prediction == 0:
77
+ st.subheader('**Not Hate Speech**')
78
+ else:
79
+ st.subheader('**Hate Speech**')
80
+ st.text('')