asoria HF staff commited on
Commit
9b9b3ce
·
1 Parent(s): 457b45f

Trying to reduce GPU load?

Browse files
Files changed (1) hide show
  1. app.py +26 -8
app.py CHANGED
@@ -54,14 +54,18 @@ model = AutoModelForCausalLM.from_pretrained(
54
  trust_remote_code=True,
55
  quantization_config=bnb_config,
56
  device_map="auto",
 
57
  )
58
 
 
 
 
59
  generator = pipeline(
60
  model=model,
61
  tokenizer=tokenizer,
62
  task="text-generation",
63
  temperature=0.1,
64
- max_new_tokens=500,
65
  repetition_penalty=1.1,
66
  )
67
 
@@ -71,19 +75,31 @@ representation_model = {
71
  "Llama2": llama2,
72
  }
73
 
 
 
 
 
74
  umap_model = UMAP(
75
- n_neighbors=15, n_components=5, min_dist=0.0, metric="cosine", random_state=42
 
 
 
 
76
  )
77
 
78
  hdbscan_model = HDBSCAN(
79
- min_cluster_size=15,
80
  metric="euclidean",
81
  cluster_selection_method="eom",
82
  prediction_data=True,
83
  )
84
 
85
  reduce_umap_model = UMAP(
86
- n_neighbors=15, n_components=2, min_dist=0.0, metric="cosine", random_state=42
 
 
 
 
87
  )
88
 
89
 
@@ -107,8 +123,9 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
107
 
108
 
109
  # @spaces.GPU
 
110
  def calculate_embeddings(docs):
111
- return sentence_model.encode(docs, show_progress_bar=True, batch_size=100)
112
 
113
 
114
  # @spaces.GPU
@@ -124,7 +141,7 @@ def fit_model(base_model, docs, embeddings):
124
  # Hyperparameters
125
  top_n_words=10,
126
  verbose=True,
127
- min_topic_size=15,
128
  )
129
  logging.debug("Fitting new model")
130
  new_model.fit(docs, embeddings)
@@ -185,13 +202,14 @@ def generate_topics(dataset, config, split, column, nested_column):
185
  # )
186
  topic_plot = base_model.visualize_barchart()
187
 
188
- logging.info(f"Topics: {llama2_labels}")
189
 
190
  yield topics_info, topic_plot
191
 
192
  offset += chunk_size
193
 
194
  logging.info("Finished processing all data")
 
195
  return topics_info, topic_plot
196
 
197
 
@@ -229,7 +247,7 @@ with gr.Blocks() as demo:
229
  label="Nested text column name", visible=False
230
  )
231
 
232
- generate_button = gr.Button("Generate Notebook", variant="primary")
233
 
234
  gr.Markdown("## Datamap")
235
  topics_plot = gr.Plot()
 
54
  trust_remote_code=True,
55
  quantization_config=bnb_config,
56
  device_map="auto",
57
+ offload_folder="offload", # Offloading part of the model to CPU to save GPU memory
58
  )
59
 
60
+ # Enable gradient checkpointing for memory efficiency during backprop
61
+ model.gradient_checkpointing_enable()
62
+
63
  generator = pipeline(
64
  model=model,
65
  tokenizer=tokenizer,
66
  task="text-generation",
67
  temperature=0.1,
68
+ max_new_tokens=200, # Reduced max_new_tokens to limit memory consumption
69
  repetition_penalty=1.1,
70
  )
71
 
 
75
  "Llama2": llama2,
76
  }
77
 
78
+ # TODO: It should be proporcional to the number of rows
79
+ # For small datasets (1-200 rows) it worked fine with 2 neighbors
80
+ N_NEIGHBORS = 15
81
+
82
  umap_model = UMAP(
83
+ n_neighbors=N_NEIGHBORS,
84
+ n_components=5,
85
+ min_dist=0.0,
86
+ metric="cosine",
87
+ random_state=42,
88
  )
89
 
90
  hdbscan_model = HDBSCAN(
91
+ min_cluster_size=N_NEIGHBORS,
92
  metric="euclidean",
93
  cluster_selection_method="eom",
94
  prediction_data=True,
95
  )
96
 
97
  reduce_umap_model = UMAP(
98
+ n_neighbors=N_NEIGHBORS,
99
+ n_components=2,
100
+ min_dist=0.0,
101
+ metric="cosine",
102
+ random_state=42,
103
  )
104
 
105
 
 
123
 
124
 
125
  # @spaces.GPU
126
+ # TODO: Modify batch size to reduce memory consumption during embedding calculation, which value is better?
127
  def calculate_embeddings(docs):
128
+ return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
129
 
130
 
131
  # @spaces.GPU
 
141
  # Hyperparameters
142
  top_n_words=10,
143
  verbose=True,
144
+ min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
145
  )
146
  logging.debug("Fitting new model")
147
  new_model.fit(docs, embeddings)
 
202
  # )
203
  topic_plot = base_model.visualize_barchart()
204
 
205
+ logging.info(f"Topics: {repr_model_topics}")
206
 
207
  yield topics_info, topic_plot
208
 
209
  offset += chunk_size
210
 
211
  logging.info("Finished processing all data")
212
+ cuda.empty_cache() # Clear cache at the end of each chunk
213
  return topics_info, topic_plot
214
 
215
 
 
247
  label="Nested text column name", visible=False
248
  )
249
 
250
+ generate_button = gr.Button("Generate Topics", variant="primary")
251
 
252
  gr.Markdown("## Datamap")
253
  topics_plot = gr.Plot()