asoria HF staff commited on
Commit
b45d09c
·
1 Parent(s): f15f0a4

Try to fix cuml error

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -23,13 +23,21 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
23
  from bertopic import BERTopic
24
  from bertopic.representation import KeyBERTInspired
25
  from bertopic.representation import TextGeneration
26
- from cuml.manifold import UMAP
27
- from cuml.cluster import HDBSCAN
28
  from huggingface_hub import HfApi
29
  from sklearn.feature_extraction.text import CountVectorizer
30
  from sentence_transformers import SentenceTransformer
31
  from prompts import REPRESENTATION_PROMPT
32
 
 
 
 
 
 
 
 
 
 
 
33
  """
34
  TODOs:
35
  - Improve representation layer (Try with llamacpp or TextGeneration)
@@ -127,11 +135,6 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
127
  return df[column].tolist()
128
 
129
 
130
- @spaces.GPU
131
- def calculate_embeddings(docs):
132
- return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
133
-
134
-
135
  def calculate_n_neighbors_and_components(n_rows):
136
  n_neighbors = min(max(n_rows // 20, 15), 100)
137
  n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
 
23
  from bertopic import BERTopic
24
  from bertopic.representation import KeyBERTInspired
25
  from bertopic.representation import TextGeneration
 
 
26
  from huggingface_hub import HfApi
27
  from sklearn.feature_extraction.text import CountVectorizer
28
  from sentence_transformers import SentenceTransformer
29
  from prompts import REPRESENTATION_PROMPT
30
 
31
+
32
+ @spaces.GPU
33
+ def calculate_embeddings(docs):
34
+ return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
35
+
36
+
37
+ from cuml.manifold import UMAP
38
+ from cuml.cluster import HDBSCAN
39
+
40
+
41
  """
42
  TODOs:
43
  - Improve representation layer (Try with llamacpp or TextGeneration)
 
135
  return df[column].tolist()
136
 
137
 
 
 
 
 
 
138
  def calculate_n_neighbors_and_components(n_rows):
139
  n_neighbors = min(max(n_rows // 20, 15), 100)
140
  n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets