Spaces:
Build error
Build error
Commit
·
4e0e782
1
Parent(s):
8ccc348
Remove NLTK package
Browse files
app/search/bm25_search.py
CHANGED
@@ -32,9 +32,9 @@ class BM25_search:
|
|
32 |
- perform_lemmatization (bool): Whether to perform lemmatization on tokens.
|
33 |
"""
|
34 |
# Ensure NLTK resources are downloaded only once
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
|
39 |
self.documents: List[str] = []
|
40 |
self.doc_ids: List[str] = []
|
@@ -153,7 +153,7 @@ async def initialize_bm25_search(remove_stopwords: bool = True, perform_lemmatiz
|
|
153 |
Initializes the BM25search with proper NLTK resource downloading.
|
154 |
"""
|
155 |
loop = asyncio.get_running_loop()
|
156 |
-
|
157 |
return BM25_search(remove_stopwords, perform_lemmatization)
|
158 |
|
159 |
|
|
|
32 |
- perform_lemmatization (bool): Whether to perform lemmatization on tokens.
|
33 |
"""
|
34 |
# Ensure NLTK resources are downloaded only once
|
35 |
+
if not BM25_search.nltk_resources_downloaded:
|
36 |
+
download_nltk_resources()
|
37 |
+
BM25_search.nltk_resources_downloaded = True # Mark as downloaded
|
38 |
|
39 |
self.documents: List[str] = []
|
40 |
self.doc_ids: List[str] = []
|
|
|
153 |
Initializes the BM25search with proper NLTK resource downloading.
|
154 |
"""
|
155 |
loop = asyncio.get_running_loop()
|
156 |
+
await loop.run_in_executor(None, download_nltk_resources)
|
157 |
return BM25_search(remove_stopwords, perform_lemmatization)
|
158 |
|
159 |
|