davidberenstein1957 HF staff commited on
Commit
7a3ba19
·
1 Parent(s): 8a94398

feat: update flow sample dataset

Browse files
src/distilabel_dataset_generator/apps/sft.py CHANGED
@@ -175,13 +175,13 @@ with gr.Blocks(
175
 
176
  gr.Markdown("## Iterate on a sample dataset")
177
  with gr.Column() as main_ui:
178
- dataset_description = gr.TextArea(
179
  label="Give a precise description of the assistant or tool. Don't describe the dataset",
180
  value=DEFAULT_DATASET_DESCRIPTIONS[0],
181
  )
182
  examples = gr.Examples(
183
  elem_id="system_prompt_examples",
184
- examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS[1:]],
185
  inputs=[dataset_description],
186
  )
187
  with gr.Row():
@@ -189,13 +189,13 @@ with gr.Blocks(
189
  btn_generate_system_prompt = gr.Button(value="Generate sample")
190
  gr.Column(scale=1)
191
 
192
- system_prompt = gr.TextArea(
193
  label="System prompt for dataset generation. You can tune it and regenerate the sample",
194
  value=DEFAULT_SYSTEM_PROMPTS[0],
195
  )
196
 
197
  with gr.Row():
198
- table = gr.DataFrame(
199
  value=DEFAULT_DATASETS[0],
200
  label="Sample dataset. Prompts and completions truncated to 256 tokens.",
201
  interactive=False,
@@ -217,14 +217,14 @@ with gr.Blocks(
217
  ).then(
218
  fn=generate_sample_dataset,
219
  inputs=[system_prompt],
220
- outputs=[table],
221
  show_progress=True,
222
  )
223
 
224
  btn_generate_sample_dataset.click(
225
  fn=generate_sample_dataset,
226
  inputs=[system_prompt],
227
- outputs=[table],
228
  show_progress=True,
229
  )
230
 
@@ -302,6 +302,12 @@ with gr.Blocks(
302
  def hide_success_message():
303
  return gr.Markdown(visible=False)
304
 
 
 
 
 
 
 
305
  btn_generate_full_dataset.click(
306
  fn=hide_success_message,
307
  outputs=[success_message],
 
175
 
176
  gr.Markdown("## Iterate on a sample dataset")
177
  with gr.Column() as main_ui:
178
+ dataset_description = gr.Textbox(
179
  label="Give a precise description of the assistant or tool. Don't describe the dataset",
180
  value=DEFAULT_DATASET_DESCRIPTIONS[0],
181
  )
182
  examples = gr.Examples(
183
  elem_id="system_prompt_examples",
184
+ examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS],
185
  inputs=[dataset_description],
186
  )
187
  with gr.Row():
 
189
  btn_generate_system_prompt = gr.Button(value="Generate sample")
190
  gr.Column(scale=1)
191
 
192
+ system_prompt = gr.Textbox(
193
  label="System prompt for dataset generation. You can tune it and regenerate the sample",
194
  value=DEFAULT_SYSTEM_PROMPTS[0],
195
  )
196
 
197
  with gr.Row():
198
+ sample_dataset = gr.DataFrame(
199
  value=DEFAULT_DATASETS[0],
200
  label="Sample dataset. Prompts and completions truncated to 256 tokens.",
201
  interactive=False,
 
217
  ).then(
218
  fn=generate_sample_dataset,
219
  inputs=[system_prompt],
220
+ outputs=[sample_dataset],
221
  show_progress=True,
222
  )
223
 
224
  btn_generate_sample_dataset.click(
225
  fn=generate_sample_dataset,
226
  inputs=[system_prompt],
227
+ outputs=[sample_dataset],
228
  show_progress=True,
229
  )
230
 
 
302
  def hide_success_message():
303
  return gr.Markdown(visible=False)
304
 
305
+ sample_dataset.change(
306
+ fn=lambda x: x,
307
+ inputs=[sample_dataset],
308
+ outputs=[final_dataset],
309
+ )
310
+
311
  btn_generate_full_dataset.click(
312
  fn=hide_success_message,
313
  outputs=[success_message],
src/distilabel_dataset_generator/pipelines/sft.py CHANGED
@@ -117,6 +117,7 @@ User dataset description:
117
  MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
118
  DEFAULT_DATASET_DESCRIPTIONS = (
119
  "assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
 
120
  "highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
121
  "skilled high school math assistant who helps students solve problems",
122
  "attentive and well-educated customer service assistant for a clothes e-commerce platform",
 
117
  MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
118
  DEFAULT_DATASET_DESCRIPTIONS = (
119
  "assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
120
+ "a super helpful and intelligent assistant that answers using chain of thought, analysing the question, defining the steps to solve it, reflecting and revising its assumptions before responding",
121
  "highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
122
  "skilled high school math assistant who helps students solve problems",
123
  "attentive and well-educated customer service assistant for a clothes e-commerce platform",