Spaces:
Sleeping
Sleeping
Try to fix cuml not finding cuda library?
Browse files- app.py +7 -10
- requirements.txt +3 -1
app.py
CHANGED
@@ -23,21 +23,13 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
23 |
from bertopic import BERTopic
|
24 |
from bertopic.representation import KeyBERTInspired
|
25 |
from bertopic.representation import TextGeneration
|
|
|
|
|
26 |
from huggingface_hub import HfApi
|
27 |
from sklearn.feature_extraction.text import CountVectorizer
|
28 |
from sentence_transformers import SentenceTransformer
|
29 |
from prompts import REPRESENTATION_PROMPT
|
30 |
|
31 |
-
|
32 |
-
@spaces.GPU
|
33 |
-
def calculate_embeddings(docs):
|
34 |
-
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
35 |
-
|
36 |
-
|
37 |
-
from cuml.manifold import UMAP
|
38 |
-
from cuml.cluster import HDBSCAN
|
39 |
-
|
40 |
-
|
41 |
"""
|
42 |
TODOs:
|
43 |
- Improve representation layer (Try with llamacpp or TextGeneration)
|
@@ -135,6 +127,11 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
135 |
return df[column].tolist()
|
136 |
|
137 |
|
|
|
|
|
|
|
|
|
|
|
138 |
def calculate_n_neighbors_and_components(n_rows):
|
139 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
140 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
|
|
23 |
from bertopic import BERTopic
|
24 |
from bertopic.representation import KeyBERTInspired
|
25 |
from bertopic.representation import TextGeneration
|
26 |
+
from cuml.manifold import UMAP
|
27 |
+
from cuml.cluster import HDBSCAN
|
28 |
from huggingface_hub import HfApi
|
29 |
from sklearn.feature_extraction.text import CountVectorizer
|
30 |
from sentence_transformers import SentenceTransformer
|
31 |
from prompts import REPRESENTATION_PROMPT
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
"""
|
34 |
TODOs:
|
35 |
- Improve representation layer (Try with llamacpp or TextGeneration)
|
|
|
127 |
return df[column].tolist()
|
128 |
|
129 |
|
130 |
+
@spaces.GPU
|
131 |
+
def calculate_embeddings(docs):
|
132 |
+
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
133 |
+
|
134 |
+
|
135 |
def calculate_n_neighbors_and_components(n_rows):
|
136 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
137 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
requirements.txt
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
gradio_huggingfacehub_search==0.0.7
|
2 |
duckdb
|
3 |
accelerate
|
@@ -7,7 +10,6 @@ bitsandbytes
|
|
7 |
datamapplot==0.3.0
|
8 |
bertopic
|
9 |
pandas
|
10 |
-
torch
|
11 |
numpy
|
12 |
python-dotenv
|
13 |
kaleido
|
|
|
1 |
+
spaces
|
2 |
+
gradio
|
3 |
+
torch
|
4 |
gradio_huggingfacehub_search==0.0.7
|
5 |
duckdb
|
6 |
accelerate
|
|
|
10 |
datamapplot==0.3.0
|
11 |
bertopic
|
12 |
pandas
|
|
|
13 |
numpy
|
14 |
python-dotenv
|
15 |
kaleido
|