Commit
·
7a3ba19
1
Parent(s):
8a94398
feat: update flow sample dataset
Browse files
src/distilabel_dataset_generator/apps/sft.py
CHANGED
@@ -175,13 +175,13 @@ with gr.Blocks(
|
|
175 |
|
176 |
gr.Markdown("## Iterate on a sample dataset")
|
177 |
with gr.Column() as main_ui:
|
178 |
-
dataset_description = gr.
|
179 |
label="Give a precise description of the assistant or tool. Don't describe the dataset",
|
180 |
value=DEFAULT_DATASET_DESCRIPTIONS[0],
|
181 |
)
|
182 |
examples = gr.Examples(
|
183 |
elem_id="system_prompt_examples",
|
184 |
-
examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS
|
185 |
inputs=[dataset_description],
|
186 |
)
|
187 |
with gr.Row():
|
@@ -189,13 +189,13 @@ with gr.Blocks(
|
|
189 |
btn_generate_system_prompt = gr.Button(value="Generate sample")
|
190 |
gr.Column(scale=1)
|
191 |
|
192 |
-
system_prompt = gr.
|
193 |
label="System prompt for dataset generation. You can tune it and regenerate the sample",
|
194 |
value=DEFAULT_SYSTEM_PROMPTS[0],
|
195 |
)
|
196 |
|
197 |
with gr.Row():
|
198 |
-
|
199 |
value=DEFAULT_DATASETS[0],
|
200 |
label="Sample dataset. Prompts and completions truncated to 256 tokens.",
|
201 |
interactive=False,
|
@@ -217,14 +217,14 @@ with gr.Blocks(
|
|
217 |
).then(
|
218 |
fn=generate_sample_dataset,
|
219 |
inputs=[system_prompt],
|
220 |
-
outputs=[
|
221 |
show_progress=True,
|
222 |
)
|
223 |
|
224 |
btn_generate_sample_dataset.click(
|
225 |
fn=generate_sample_dataset,
|
226 |
inputs=[system_prompt],
|
227 |
-
outputs=[
|
228 |
show_progress=True,
|
229 |
)
|
230 |
|
@@ -302,6 +302,12 @@ with gr.Blocks(
|
|
302 |
def hide_success_message():
|
303 |
return gr.Markdown(visible=False)
|
304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
btn_generate_full_dataset.click(
|
306 |
fn=hide_success_message,
|
307 |
outputs=[success_message],
|
|
|
175 |
|
176 |
gr.Markdown("## Iterate on a sample dataset")
|
177 |
with gr.Column() as main_ui:
|
178 |
+
dataset_description = gr.Textbox(
|
179 |
label="Give a precise description of the assistant or tool. Don't describe the dataset",
|
180 |
value=DEFAULT_DATASET_DESCRIPTIONS[0],
|
181 |
)
|
182 |
examples = gr.Examples(
|
183 |
elem_id="system_prompt_examples",
|
184 |
+
examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS],
|
185 |
inputs=[dataset_description],
|
186 |
)
|
187 |
with gr.Row():
|
|
|
189 |
btn_generate_system_prompt = gr.Button(value="Generate sample")
|
190 |
gr.Column(scale=1)
|
191 |
|
192 |
+
system_prompt = gr.Textbox(
|
193 |
label="System prompt for dataset generation. You can tune it and regenerate the sample",
|
194 |
value=DEFAULT_SYSTEM_PROMPTS[0],
|
195 |
)
|
196 |
|
197 |
with gr.Row():
|
198 |
+
sample_dataset = gr.DataFrame(
|
199 |
value=DEFAULT_DATASETS[0],
|
200 |
label="Sample dataset. Prompts and completions truncated to 256 tokens.",
|
201 |
interactive=False,
|
|
|
217 |
).then(
|
218 |
fn=generate_sample_dataset,
|
219 |
inputs=[system_prompt],
|
220 |
+
outputs=[sample_dataset],
|
221 |
show_progress=True,
|
222 |
)
|
223 |
|
224 |
btn_generate_sample_dataset.click(
|
225 |
fn=generate_sample_dataset,
|
226 |
inputs=[system_prompt],
|
227 |
+
outputs=[sample_dataset],
|
228 |
show_progress=True,
|
229 |
)
|
230 |
|
|
|
302 |
def hide_success_message():
|
303 |
return gr.Markdown(visible=False)
|
304 |
|
305 |
+
sample_dataset.change(
|
306 |
+
fn=lambda x: x,
|
307 |
+
inputs=[sample_dataset],
|
308 |
+
outputs=[final_dataset],
|
309 |
+
)
|
310 |
+
|
311 |
btn_generate_full_dataset.click(
|
312 |
fn=hide_success_message,
|
313 |
outputs=[success_message],
|
src/distilabel_dataset_generator/pipelines/sft.py
CHANGED
@@ -117,6 +117,7 @@ User dataset description:
|
|
117 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
118 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
119 |
"assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
|
|
|
120 |
"highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
|
121 |
"skilled high school math assistant who helps students solve problems",
|
122 |
"attentive and well-educated customer service assistant for a clothes e-commerce platform",
|
|
|
117 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
118 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
119 |
"assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
|
120 |
+
"a super helpful and intelligent assistant that answers using chain of thought, analysing the question, defining the steps to solve it, reflecting and revising its assumptions before responding",
|
121 |
"highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
|
122 |
"skilled high school math assistant who helps students solve problems",
|
123 |
"attentive and well-educated customer service assistant for a clothes e-commerce platform",
|