Spaces:

ruslanruslanruslan
/

nlp_project

Sleeping

ruslanruslanruslan commited on Jul 21, 2023

Commit

186a961

•

1 Parent(s): 60cb352

errors fixed

Files changed (1) hide show

pages/Film reviews classifier.py CHANGED Viewed

@@ -14,7 +14,10 @@ import transformers
 from collections import Counter
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
-# stop_words = set(stopwords.words('english'))
 def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
     preprocessed_string = data_preprocessing(input_string)
@@ -46,7 +49,7 @@ def data_preprocessing(text: str):
     text = text.lower()
     text = re.sub('<.*?>', '', text)
     text = ''.join([c for c in text if c not in string.punctuation])
-    text = [wn_lemmatizer.lemmatize(word) for word in text.split()] #if word not in stop_words]
     text = ' '.join(text)
     return text

 from collections import Counter
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
+nltk.download('wordnet')
+nltk.download('stopwords')
+stop_words = set(stopwords.words('english'))
 def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
     preprocessed_string = data_preprocessing(input_string)
     text = text.lower()
     text = re.sub('<.*?>', '', text)
     text = ''.join([c for c in text if c not in string.punctuation])
+    text = [wn_lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
     text = ' '.join(text)
     return text