Spaces:
Sleeping
Sleeping
Shakshi3104
commited on
Commit
β’
b58c0e3
1
Parent(s):
371a1e6
[fix] Rename and add timer
Browse files
model/search/{bm25.py β surface.py}
RENAMED
@@ -11,6 +11,7 @@ from rank_bm25 import BM25Okapi
|
|
11 |
|
12 |
from model.search.base import BaseSearchClient
|
13 |
from model.utils.tokenizer import MeCabTokenizer
|
|
|
14 |
|
15 |
|
16 |
class BM25Wrapper(BM25Okapi):
|
@@ -105,6 +106,7 @@ class BM25SearchClient(BaseSearchClient):
|
|
105 |
bm25 = BM25Wrapper(_data, "tokenized")
|
106 |
return cls(bm25, corpus_tokenized)
|
107 |
|
|
|
108 |
def search_top_n(self, _query: Union[List[str], str], n: int = 10) -> List[pd.DataFrame]:
|
109 |
"""
|
110 |
γ―γ¨γͺγ«ε―Ύγγζ€η΄’η΅ζγtop-nεεεΎγγ
|
|
|
11 |
|
12 |
from model.search.base import BaseSearchClient
|
13 |
from model.utils.tokenizer import MeCabTokenizer
|
14 |
+
from model.utils.timer import stop_watch
|
15 |
|
16 |
|
17 |
class BM25Wrapper(BM25Okapi):
|
|
|
106 |
bm25 = BM25Wrapper(_data, "tokenized")
|
107 |
return cls(bm25, corpus_tokenized)
|
108 |
|
109 |
+
@stop_watch
|
110 |
def search_top_n(self, _query: Union[List[str], str], n: int = 10) -> List[pd.DataFrame]:
|
111 |
"""
|
112 |
γ―γ¨γͺγ«ε―Ύγγζ€η΄’η΅ζγtop-nεεεΎγγ
|
model/search/{ruri.py β vector.py}
RENAMED
@@ -12,11 +12,11 @@ from loguru import logger
|
|
12 |
from tqdm import tqdm
|
13 |
|
14 |
import sentence_transformers as st
|
15 |
-
from sentence_transformers import util as st_util
|
16 |
|
17 |
import voyager
|
18 |
|
19 |
from model.search.base import BaseSearchClient
|
|
|
20 |
|
21 |
|
22 |
def array_to_string(array: np.ndarray) -> str:
|
@@ -94,6 +94,7 @@ class RuriVoyagerSearchClient(BaseSearchClient):
|
|
94 |
self.index = index
|
95 |
|
96 |
@classmethod
|
|
|
97 |
def from_dataframe(cls, _data: pd.DataFrame, _target: str):
|
98 |
logger.info("π¦ [RuriVoyagerSearchClient] Initialize from DataFrame")
|
99 |
|
@@ -120,6 +121,7 @@ class RuriVoyagerSearchClient(BaseSearchClient):
|
|
120 |
|
121 |
return cls(_data, _target, index, embedder)
|
122 |
|
|
|
123 |
def search_top_n(self, _query: Union[List[str], str], n: int = 10) -> List[pd.DataFrame]:
|
124 |
"""
|
125 |
γ―γ¨γͺγ«ε―Ύγγζ€η΄’η΅ζγtop-nεεεΎγγ
|
|
|
12 |
from tqdm import tqdm
|
13 |
|
14 |
import sentence_transformers as st
|
|
|
15 |
|
16 |
import voyager
|
17 |
|
18 |
from model.search.base import BaseSearchClient
|
19 |
+
from model.utils.timer import stop_watch
|
20 |
|
21 |
|
22 |
def array_to_string(array: np.ndarray) -> str:
|
|
|
94 |
self.index = index
|
95 |
|
96 |
@classmethod
|
97 |
+
@stop_watch
|
98 |
def from_dataframe(cls, _data: pd.DataFrame, _target: str):
|
99 |
logger.info("π¦ [RuriVoyagerSearchClient] Initialize from DataFrame")
|
100 |
|
|
|
121 |
|
122 |
return cls(_data, _target, index, embedder)
|
123 |
|
124 |
+
@stop_watch
|
125 |
def search_top_n(self, _query: Union[List[str], str], n: int = 10) -> List[pd.DataFrame]:
|
126 |
"""
|
127 |
γ―γ¨γͺγ«ε―Ύγγζ€η΄’η΅ζγtop-nεεεΎγγ
|