Spaces:
Sleeping
Sleeping
ruslanruslanruslan
commited on
Commit
•
186a961
1
Parent(s):
60cb352
errors fixed
Browse files
pages/Film reviews classifier.py
CHANGED
@@ -14,7 +14,10 @@ import transformers
|
|
14 |
from collections import Counter
|
15 |
from nltk.corpus import stopwords
|
16 |
from nltk.stem import WordNetLemmatizer
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
|
20 |
preprocessed_string = data_preprocessing(input_string)
|
@@ -46,7 +49,7 @@ def data_preprocessing(text: str):
|
|
46 |
text = text.lower()
|
47 |
text = re.sub('<.*?>', '', text)
|
48 |
text = ''.join([c for c in text if c not in string.punctuation])
|
49 |
-
text = [wn_lemmatizer.lemmatize(word) for word in text.split()
|
50 |
text = ' '.join(text)
|
51 |
return text
|
52 |
|
|
|
14 |
from collections import Counter
|
15 |
from nltk.corpus import stopwords
|
16 |
from nltk.stem import WordNetLemmatizer
|
17 |
+
|
18 |
+
nltk.download('wordnet')
|
19 |
+
nltk.download('stopwords')
|
20 |
+
stop_words = set(stopwords.words('english'))
|
21 |
|
22 |
def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
|
23 |
preprocessed_string = data_preprocessing(input_string)
|
|
|
49 |
text = text.lower()
|
50 |
text = re.sub('<.*?>', '', text)
|
51 |
text = ''.join([c for c in text if c not in string.punctuation])
|
52 |
+
text = [wn_lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
|
53 |
text = ' '.join(text)
|
54 |
return text
|
55 |
|