Spaces:
Running
Running
seanpedrickcase
commited on
Commit
·
8216d8c
1
Parent(s):
9e84863
Debugged reference to random_seed in vectorisation and reference to torch in representation_model.py
Browse files- funcs/embeddings.py +2 -1
- funcs/topic_core_funcs.py +1 -1
funcs/embeddings.py
CHANGED
@@ -25,7 +25,7 @@ else:
|
|
25 |
|
26 |
|
27 |
@spaces.GPU
|
28 |
-
def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1") -> np.ndarray:
|
29 |
"""
|
30 |
Create or load embeddings for the given documents.
|
31 |
|
@@ -35,6 +35,7 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
|
|
35 |
embeddings_out (np.ndarray): Array to store the embeddings.
|
36 |
embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No").
|
37 |
high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No").
|
|
|
38 |
|
39 |
Returns:
|
40 |
np.ndarray: The generated or loaded embeddings.
|
|
|
25 |
|
26 |
|
27 |
@spaces.GPU
|
28 |
+
def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1", random_seed:int=42) -> np.ndarray:
|
29 |
"""
|
30 |
Create or load embeddings for the given documents.
|
31 |
|
|
|
35 |
embeddings_out (np.ndarray): Array to store the embeddings.
|
36 |
embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No").
|
37 |
high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No").
|
38 |
+
random_seed (int): Random seed for vectorisation
|
39 |
|
40 |
Returns:
|
41 |
np.ndarray: The generated or loaded embeddings.
|
funcs/topic_core_funcs.py
CHANGED
@@ -326,7 +326,7 @@ def extract_topics(
|
|
326 |
# UMAP model uses Bertopic defaults
|
327 |
umap_model = UMAP(n_neighbors=umap_n_neighbours, n_components=5, min_dist=umap_min_dist, metric=umap_metric, low_memory=True, random_state=random_seed)
|
328 |
|
329 |
-
embeddings_out, embedding_model = make_or_load_embeddings(docs, file_list, embeddings_out, embeddings_super_compress, high_quality_mode, embeddings_name)
|
330 |
|
331 |
# If you want to save your embedding files
|
332 |
if return_intermediate_files == "Yes":
|
|
|
326 |
# UMAP model uses Bertopic defaults
|
327 |
umap_model = UMAP(n_neighbors=umap_n_neighbours, n_components=5, min_dist=umap_min_dist, metric=umap_metric, low_memory=True, random_state=random_seed)
|
328 |
|
329 |
+
embeddings_out, embedding_model = make_or_load_embeddings(docs, file_list, embeddings_out, embeddings_super_compress, high_quality_mode, embeddings_name, random_seed)
|
330 |
|
331 |
# If you want to save your embedding files
|
332 |
if return_intermediate_files == "Yes":
|