davidberenstein1957 HF staff commited on
Commit
cb05c15
·
1 Parent(s): f23a835

feat: cache pipeline

Browse files
src/distilabel_dataset_generator/apps/sft.py CHANGED
@@ -23,13 +23,8 @@ from src.distilabel_dataset_generator.utils import (
23
 
24
 
25
  def _run_pipeline(result_queue, num_turns, num_rows, system_prompt, is_sample):
26
- pipeline = get_pipeline(
27
- num_turns,
28
- num_rows,
29
- system_prompt,
30
- is_sample
31
- )
32
- distiset: Distiset = pipeline.run(use_cache=False)
33
  result_queue.put(distiset)
34
 
35
 
@@ -55,7 +50,9 @@ def generate_system_prompt(dataset_description, progress=gr.Progress()):
55
 
56
  def generate_sample_dataset(system_prompt, progress=gr.Progress()):
57
  progress(0.1, desc="Initializing sample dataset generation")
58
- result = generate_dataset(system_prompt, num_turns=1, num_rows=1, progress=progress, is_sample=True)
 
 
59
  progress(1.0, desc="Sample dataset generated")
60
  return result
61
 
@@ -181,7 +178,6 @@ with gr.Blocks(
181
  gr.Column(scale=1)
182
  btn_generate_system_prompt = gr.Button(value="Generate sample")
183
  gr.Column(scale=1)
184
-
185
 
186
  system_prompt = gr.TextArea(
187
  label="System prompt for dataset generation. You can tune it and regenerate the sample",
@@ -196,7 +192,6 @@ with gr.Blocks(
196
  wrap=True,
197
  )
198
 
199
-
200
  with gr.Row():
201
  gr.Column(scale=1)
202
  btn_generate_sample_dataset = gr.Button(
@@ -256,7 +251,9 @@ with gr.Blocks(
256
  visible=False,
257
  )
258
  org_name = get_org_dropdown()
259
- repo_name = gr.Textbox(label="Repo name", placeholder="dataset_name", value="my-distiset")
 
 
260
  private = gr.Checkbox(
261
  label="Private dataset", value=True, interactive=True, scale=0.5
262
  )
 
23
 
24
 
25
  def _run_pipeline(result_queue, num_turns, num_rows, system_prompt, is_sample):
26
+ pipeline = get_pipeline(num_turns, num_rows, system_prompt, is_sample)
27
+ distiset: Distiset = pipeline.run(use_cache=True)
 
 
 
 
 
28
  result_queue.put(distiset)
29
 
30
 
 
50
 
51
  def generate_sample_dataset(system_prompt, progress=gr.Progress()):
52
  progress(0.1, desc="Initializing sample dataset generation")
53
+ result = generate_dataset(
54
+ system_prompt, num_turns=1, num_rows=1, progress=progress, is_sample=True
55
+ )
56
  progress(1.0, desc="Sample dataset generated")
57
  return result
58
 
 
178
  gr.Column(scale=1)
179
  btn_generate_system_prompt = gr.Button(value="Generate sample")
180
  gr.Column(scale=1)
 
181
 
182
  system_prompt = gr.TextArea(
183
  label="System prompt for dataset generation. You can tune it and regenerate the sample",
 
192
  wrap=True,
193
  )
194
 
 
195
  with gr.Row():
196
  gr.Column(scale=1)
197
  btn_generate_sample_dataset = gr.Button(
 
251
  visible=False,
252
  )
253
  org_name = get_org_dropdown()
254
+ repo_name = gr.Textbox(
255
+ label="Repo name", placeholder="dataset_name", value="my-distiset"
256
+ )
257
  private = gr.Checkbox(
258
  label="Private dataset", value=True, interactive=True, scale=0.5
259
  )