Spaces:

argilla
/

synthetic-data-generator

Running

App Files Files Community

davidberenstein1957 HF staff commited on Sep 16, 2024

Commit

7a3ba19

1 Parent(s): 8a94398

feat: update flow sample dataset

Browse files

Files changed (2) hide show

src/distilabel_dataset_generator/apps/sft.py +12 -6
src/distilabel_dataset_generator/pipelines/sft.py +1 -0

src/distilabel_dataset_generator/apps/sft.py CHANGED Viewed

@@ -175,13 +175,13 @@ with gr.Blocks(
     gr.Markdown("## Iterate on a sample dataset")
     with gr.Column() as main_ui:
-        dataset_description = gr.TextArea(
             label="Give a precise description of the assistant or tool. Don't describe the dataset",
             value=DEFAULT_DATASET_DESCRIPTIONS[0],
         )
         examples = gr.Examples(
             elem_id="system_prompt_examples",
-            examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS[1:]],
             inputs=[dataset_description],
         )
         with gr.Row():
@@ -189,13 +189,13 @@ with gr.Blocks(
             btn_generate_system_prompt = gr.Button(value="Generate sample")
             gr.Column(scale=1)
-        system_prompt = gr.TextArea(
             label="System prompt for dataset generation. You can tune it and regenerate the sample",
             value=DEFAULT_SYSTEM_PROMPTS[0],
         )
         with gr.Row():
-            table = gr.DataFrame(
                 value=DEFAULT_DATASETS[0],
                 label="Sample dataset. Prompts and completions truncated to 256 tokens.",
                 interactive=False,
@@ -217,14 +217,14 @@ with gr.Blocks(
         ).then(
             fn=generate_sample_dataset,
             inputs=[system_prompt],
-            outputs=[table],
             show_progress=True,
         )
         btn_generate_sample_dataset.click(
             fn=generate_sample_dataset,
             inputs=[system_prompt],
-            outputs=[table],
             show_progress=True,
         )
@@ -302,6 +302,12 @@ with gr.Blocks(
     def hide_success_message():
         return gr.Markdown(visible=False)
     btn_generate_full_dataset.click(
         fn=hide_success_message,
         outputs=[success_message],

     gr.Markdown("## Iterate on a sample dataset")
     with gr.Column() as main_ui:
+        dataset_description = gr.Textbox(
             label="Give a precise description of the assistant or tool. Don't describe the dataset",
             value=DEFAULT_DATASET_DESCRIPTIONS[0],
         )
         examples = gr.Examples(
             elem_id="system_prompt_examples",
+            examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS],
             inputs=[dataset_description],
         )
         with gr.Row():
             btn_generate_system_prompt = gr.Button(value="Generate sample")
             gr.Column(scale=1)
+        system_prompt = gr.Textbox(
             label="System prompt for dataset generation. You can tune it and regenerate the sample",
             value=DEFAULT_SYSTEM_PROMPTS[0],
         )
         with gr.Row():
+            sample_dataset = gr.DataFrame(
                 value=DEFAULT_DATASETS[0],
                 label="Sample dataset. Prompts and completions truncated to 256 tokens.",
                 interactive=False,
         ).then(
             fn=generate_sample_dataset,
             inputs=[system_prompt],
+            outputs=[sample_dataset],
             show_progress=True,
         )
         btn_generate_sample_dataset.click(
             fn=generate_sample_dataset,
             inputs=[system_prompt],
+            outputs=[sample_dataset],
             show_progress=True,
         )
     def hide_success_message():
         return gr.Markdown(visible=False)
+    sample_dataset.change(
+        fn=lambda x: x,
+        inputs=[sample_dataset],
+        outputs=[final_dataset],
+    )
     btn_generate_full_dataset.click(
         fn=hide_success_message,
         outputs=[success_message],

src/distilabel_dataset_generator/pipelines/sft.py CHANGED Viewed

@@ -117,6 +117,7 @@ User dataset description:
 MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
 DEFAULT_DATASET_DESCRIPTIONS = (
     "assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
     "highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
     "skilled high school math assistant who helps students solve problems",
     "attentive and well-educated customer service assistant for a clothes e-commerce platform",

 MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
 DEFAULT_DATASET_DESCRIPTIONS = (
     "assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
+    "a super helpful and intelligent assistant that answers using chain of thought, analysing the question, defining the steps to solve it, reflecting and revising its assumptions before responding",
     "highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
     "skilled high school math assistant who helps students solve problems",
     "attentive and well-educated customer service assistant for a clothes e-commerce platform",