azizbarank commited on
Commit
dc48776
·
1 Parent(s): 3ec4ce5

Delete resources/app.py

Browse files
Files changed (1) hide show
  1. resources/app.py +0 -80
resources/app.py DELETED
@@ -1,80 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Mon Jun 6 20:56:08 2022
4
-
5
- @author: User
6
- """
7
- import nltk
8
-
9
- nltk.download('punkt')
10
- nltk.download('stopwords')
11
- nltk.download('wordnet')
12
- nltk.download('omw-1.4')
13
-
14
- # importing relevant python packages
15
- import streamlit as st
16
- import joblib
17
- # preprocessing
18
- import re
19
- import string
20
- import nltk
21
- from nltk.corpus import stopwords
22
- from nltk.stem import WordNetLemmatizer
23
- from sklearn.feature_extraction.text import TfidfVectorizer
24
- # modeling
25
-
26
- # creating page sections
27
- site_header = st.container()
28
- business_context = st.container()
29
- data_desc = st.container()
30
- performance = st.container()
31
- tweet_input = st.container()
32
- model_results = st.container()
33
- sentiment_analysis = st.container()
34
- contact = st.container()
35
-
36
- with site_header:
37
- st.title('Toxic Comment Detection')
38
-
39
-
40
- with tweet_input:
41
- st.header('Is Your Tweet Considered Hate Speech?')
42
- st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
43
- # user input here
44
- user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
45
-
46
- with model_results:
47
- st.subheader('Prediction:')
48
- if user_text:
49
- # processing user_text
50
- # removing punctuation
51
- user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
52
- # tokenizing
53
- stop_words = set(stopwords.words('english'))
54
- tokens = nltk.word_tokenize(user_text)
55
- # removing stop words
56
- stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
57
- # taking root word
58
- lemmatizer = WordNetLemmatizer()
59
- lemmatized_output = []
60
- for word in stopwords_removed:
61
- lemmatized_output.append(lemmatizer.lemmatize(word))
62
-
63
- # instantiating count vectorizor
64
- tfidf = TfidfVectorizer(stop_words=stop_words)
65
- X_train = joblib.load(open('X_train.pickel', 'rb'))
66
- X_test = lemmatized_output
67
- X_train_count = tfidf.fit_transform(X_train)
68
- X_test_count = tfidf.transform(X_test)
69
-
70
- # loading in model
71
- final_model = joblib.load(open('final_bayes.pickle', 'rb'))
72
-
73
- # apply model to make predictions
74
- prediction = final_model.predict(X_test_count[0])
75
-
76
- if prediction == 0:
77
- st.subheader('**Not Hate Speech**')
78
- else:
79
- st.subheader('**Hate Speech**')
80
- st.text('')