asoria HF staff commited on
Commit
75e3496
1 Parent(s): b45d09c

Try to fix cuml not finding cuda library?

Browse files
Files changed (2) hide show
  1. app.py +7 -10
  2. requirements.txt +3 -1
app.py CHANGED
@@ -23,21 +23,13 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
23
  from bertopic import BERTopic
24
  from bertopic.representation import KeyBERTInspired
25
  from bertopic.representation import TextGeneration
 
 
26
  from huggingface_hub import HfApi
27
  from sklearn.feature_extraction.text import CountVectorizer
28
  from sentence_transformers import SentenceTransformer
29
  from prompts import REPRESENTATION_PROMPT
30
 
31
-
32
- @spaces.GPU
33
- def calculate_embeddings(docs):
34
- return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
35
-
36
-
37
- from cuml.manifold import UMAP
38
- from cuml.cluster import HDBSCAN
39
-
40
-
41
  """
42
  TODOs:
43
  - Improve representation layer (Try with llamacpp or TextGeneration)
@@ -135,6 +127,11 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
135
  return df[column].tolist()
136
 
137
 
 
 
 
 
 
138
  def calculate_n_neighbors_and_components(n_rows):
139
  n_neighbors = min(max(n_rows // 20, 15), 100)
140
  n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
 
23
  from bertopic import BERTopic
24
  from bertopic.representation import KeyBERTInspired
25
  from bertopic.representation import TextGeneration
26
+ from cuml.manifold import UMAP
27
+ from cuml.cluster import HDBSCAN
28
  from huggingface_hub import HfApi
29
  from sklearn.feature_extraction.text import CountVectorizer
30
  from sentence_transformers import SentenceTransformer
31
  from prompts import REPRESENTATION_PROMPT
32
 
 
 
 
 
 
 
 
 
 
 
33
  """
34
  TODOs:
35
  - Improve representation layer (Try with llamacpp or TextGeneration)
 
127
  return df[column].tolist()
128
 
129
 
130
+ @spaces.GPU
131
+ def calculate_embeddings(docs):
132
+ return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
133
+
134
+
135
  def calculate_n_neighbors_and_components(n_rows):
136
  n_neighbors = min(max(n_rows // 20, 15), 100)
137
  n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
requirements.txt CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  gradio_huggingfacehub_search==0.0.7
2
  duckdb
3
  accelerate
@@ -7,7 +10,6 @@ bitsandbytes
7
  datamapplot==0.3.0
8
  bertopic
9
  pandas
10
- torch
11
  numpy
12
  python-dotenv
13
  kaleido
 
1
+ spaces
2
+ gradio
3
+ torch
4
  gradio_huggingfacehub_search==0.0.7
5
  duckdb
6
  accelerate
 
10
  datamapplot==0.3.0
11
  bertopic
12
  pandas
 
13
  numpy
14
  python-dotenv
15
  kaleido