Spaces:

datasets-topics
/

topics-generator

Sleeping

asoria HF staff commited on Oct 16, 2024

Commit

b45d09c

1 Parent(s): f15f0a4

Try to fix cuml error

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,13 +23,21 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from bertopic import BERTopic
 from bertopic.representation import KeyBERTInspired
 from bertopic.representation import TextGeneration
-from cuml.manifold import UMAP
-from cuml.cluster import HDBSCAN
 from huggingface_hub import HfApi
 from sklearn.feature_extraction.text import CountVectorizer
 from sentence_transformers import SentenceTransformer
 from prompts import REPRESENTATION_PROMPT
 """
 TODOs:
 - Improve representation layer (Try with llamacpp or TextGeneration)
@@ -127,11 +135,6 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
     return df[column].tolist()
-@spaces.GPU
-def calculate_embeddings(docs):
-    return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
 def calculate_n_neighbors_and_components(n_rows):
     n_neighbors = min(max(n_rows // 20, 15), 100)
     n_components = 10 if n_rows > 1000 else 5  # Higher components for larger datasets

 from bertopic import BERTopic
 from bertopic.representation import KeyBERTInspired
 from bertopic.representation import TextGeneration
 from huggingface_hub import HfApi
 from sklearn.feature_extraction.text import CountVectorizer
 from sentence_transformers import SentenceTransformer
 from prompts import REPRESENTATION_PROMPT
+@spaces.GPU
+def calculate_embeddings(docs):
+    return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
+from cuml.manifold import UMAP
+from cuml.cluster import HDBSCAN
 """
 TODOs:
 - Improve representation layer (Try with llamacpp or TextGeneration)
     return df[column].tolist()
 def calculate_n_neighbors_and_components(n_rows):
     n_neighbors = min(max(n_rows // 20, 15), 100)
     n_components = 10 if n_rows > 1000 else 5  # Higher components for larger datasets