Spaces:

Shchushch
/

find_me_book

Runtime error

Shchushch commited on Sep 1, 2023

Commit

a7055be

1 Parent(s): 50872cb

nltk2

Files changed (1) hide show

find.py CHANGED Viewed

@@ -15,9 +15,8 @@ from tqdm import tqdm
 tokenizer =  AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
 model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
-nltk.download('stopwords')
-eng_stop_words = stopwords.words('english')
 with open('russian.txt', 'r') as f:
     ru_stop_words = f.read()
@@ -99,7 +98,7 @@ def clean(text: str)-> str:
     text = ''.join(c for c in text if c in allow)
     text= text.split()
     text = [word for word in text if word.lower() not in ru_stop_words]
-    text = [word for word in text if word.lower() not in eng_stop_words]
     return ' '.join(text)

 tokenizer =  AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
 model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
+# nltk.download('stopwords')
+#eng_stop_words = stopwords.words('english')
 with open('russian.txt', 'r') as f:
     ru_stop_words = f.read()
     text = ''.join(c for c in text if c in allow)
     text= text.split()
     text = [word for word in text if word.lower() not in ru_stop_words]
+    #text = [word for word in text if word.lower() not in eng_stop_words]
     return ' '.join(text)