seanpedrickcase commited on
Commit
8216d8c
·
1 Parent(s): 9e84863

Debugged reference to random_seed in vectorisation and reference to torch in representation_model.py

Browse files
Files changed (2) hide show
  1. funcs/embeddings.py +2 -1
  2. funcs/topic_core_funcs.py +1 -1
funcs/embeddings.py CHANGED
@@ -25,7 +25,7 @@ else:
25
 
26
 
27
  @spaces.GPU
28
- def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1") -> np.ndarray:
29
  """
30
  Create or load embeddings for the given documents.
31
 
@@ -35,6 +35,7 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
35
  embeddings_out (np.ndarray): Array to store the embeddings.
36
  embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No").
37
  high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No").
 
38
 
39
  Returns:
40
  np.ndarray: The generated or loaded embeddings.
 
25
 
26
 
27
  @spaces.GPU
28
+ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1", random_seed:int=42) -> np.ndarray:
29
  """
30
  Create or load embeddings for the given documents.
31
 
 
35
  embeddings_out (np.ndarray): Array to store the embeddings.
36
  embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No").
37
  high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No").
38
+ random_seed (int): Random seed for vectorisation
39
 
40
  Returns:
41
  np.ndarray: The generated or loaded embeddings.
funcs/topic_core_funcs.py CHANGED
@@ -326,7 +326,7 @@ def extract_topics(
326
  # UMAP model uses Bertopic defaults
327
  umap_model = UMAP(n_neighbors=umap_n_neighbours, n_components=5, min_dist=umap_min_dist, metric=umap_metric, low_memory=True, random_state=random_seed)
328
 
329
- embeddings_out, embedding_model = make_or_load_embeddings(docs, file_list, embeddings_out, embeddings_super_compress, high_quality_mode, embeddings_name)
330
 
331
  # If you want to save your embedding files
332
  if return_intermediate_files == "Yes":
 
326
  # UMAP model uses Bertopic defaults
327
  umap_model = UMAP(n_neighbors=umap_n_neighbours, n_components=5, min_dist=umap_min_dist, metric=umap_metric, low_memory=True, random_state=random_seed)
328
 
329
+ embeddings_out, embedding_model = make_or_load_embeddings(docs, file_list, embeddings_out, embeddings_super_compress, high_quality_mode, embeddings_name, random_seed)
330
 
331
  # If you want to save your embedding files
332
  if return_intermediate_files == "Yes":