pknayak commited on
Commit
bcd163e
·
1 Parent(s): 54eef16

Adding eng stopwords

Browse files

Adding the # Get English stopwords
en_stopwords = stopwords.words('english')

Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -9,6 +9,12 @@ from sklearn.pipeline import Pipeline
9
  from sklearn.base import BaseEstimator, TransformerMixin
10
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
11
 
 
 
 
 
 
 
12
 
13
  #--------------------------------------------------------------------------------------
14
  #------------------------ NEWS DATA RETRIEVER------------------------------------------
@@ -144,20 +150,6 @@ def call_functions(domain):
144
 
145
 
146
 
147
- #----------------------------GRADIO APP--------------------------------------#
148
- # # GRADIO APP USING INTERFACE
149
- # # Create a Gradio interface
150
- # iface = gr.Interface(
151
- # fn=call_functions,
152
- # inputs=gr.components.Textbox(label="Directory Path"),
153
- # outputs=gr.components.Dataframe(type="pandas")
154
- # )
155
- # # Launch the Gradio app
156
- # iface.launch(debug=True)
157
-
158
- # GRADIO APP USING BLOCKS
159
-
160
-
161
 
162
 
163
  #--------------------------------------------------------------------------------------
@@ -202,6 +194,11 @@ def re_whitespaces(text_list):
202
  return white_spaces_end
203
 
204
  # Class for regular expressions application
 
 
 
 
 
205
  class ApplyRegex(BaseEstimator, TransformerMixin):
206
 
207
  def __init__(self, regex_transformers):
 
9
  from sklearn.base import BaseEstimator, TransformerMixin
10
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
11
 
12
+ import nltk
13
+ nltk.download('stopwords')
14
+ from nltk.corpus import stopwords
15
+
16
+ nltk.download('rslp')
17
+ from nltk.stem import RSLPStemmer
18
 
19
  #--------------------------------------------------------------------------------------
20
  #------------------------ NEWS DATA RETRIEVER------------------------------------------
 
150
 
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
  #--------------------------------------------------------------------------------------
 
194
  return white_spaces_end
195
 
196
  # Class for regular expressions application
197
+
198
+
199
+ # Get English stopwords
200
+ en_stopwords = stopwords.words('english')
201
+
202
  class ApplyRegex(BaseEstimator, TransformerMixin):
203
 
204
  def __init__(self, regex_transformers):