update successful message and pipeline code
Browse files
src/distilabel_dataset_generator/apps/base.py
CHANGED
@@ -300,25 +300,6 @@ def get_iterate_on_sample_dataset_ui(
|
|
300 |
)
|
301 |
|
302 |
|
303 |
-
def get_pipeline_code_ui(pipeline_code: str) -> gr.Code:
|
304 |
-
gr.Markdown("## Customize and run with distilabel")
|
305 |
-
gr.HTML("<hr>")
|
306 |
-
|
307 |
-
with gr.Accordion(
|
308 |
-
"Run this pipeline using distilabel",
|
309 |
-
open=False,
|
310 |
-
):
|
311 |
-
gr.Markdown(
|
312 |
-
"You can run this pipeline locally with distilabel. For more information, please refer to the [distilabel documentation](https://distilabel.argilla.io/) or go to the FAQ tab at the top of the page for more information."
|
313 |
-
)
|
314 |
-
pipeline_code = gr.Code(
|
315 |
-
value=pipeline_code,
|
316 |
-
language="python",
|
317 |
-
label="Distilabel Pipeline Code",
|
318 |
-
)
|
319 |
-
return pipeline_code
|
320 |
-
|
321 |
-
|
322 |
def get_argilla_tab() -> Tuple[Any]:
|
323 |
with gr.Tab(label="Argilla"):
|
324 |
if get_argilla_client() is not None:
|
@@ -492,7 +473,7 @@ def get_success_message_row() -> gr.Markdown:
|
|
492 |
return success_message
|
493 |
|
494 |
|
495 |
-
def
|
496 |
client = get_argilla_client()
|
497 |
argilla_api_url = client.api_url
|
498 |
return gr.Markdown(
|
@@ -500,25 +481,27 @@ def show_success_message_hub(org_name, repo_name) -> gr.Markdown:
|
|
500 |
<div style="padding: 1em; background-color: #e6f3e6; border-radius: 5px; margin-top: 1em;">
|
501 |
<h3 style="color: #2e7d32; margin: 0;">Dataset Published Successfully!</h3>
|
502 |
<p style="margin-top: 0.5em;">
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
|
|
507 |
</p>
|
508 |
<p style="margin-top: 0.5em;">
|
509 |
-
Your dataset is now available
|
510 |
-
<a href="{
|
511 |
-
{
|
512 |
</a>
|
513 |
-
<br>Unfamiliar with Argilla? Here are some docs to help you get started:
|
514 |
-
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
|
515 |
-
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
|
516 |
</p>
|
517 |
</div>
|
|
|
|
|
|
|
|
|
|
|
518 |
""",
|
519 |
visible=True,
|
520 |
)
|
521 |
|
522 |
-
|
523 |
def hide_success_message() -> gr.Markdown:
|
524 |
return gr.Markdown(value="")
|
|
|
300 |
)
|
301 |
|
302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
def get_argilla_tab() -> Tuple[Any]:
|
304 |
with gr.Tab(label="Argilla"):
|
305 |
if get_argilla_client() is not None:
|
|
|
473 |
return success_message
|
474 |
|
475 |
|
476 |
+
def show_success_message(org_name, repo_name) -> gr.Markdown:
|
477 |
client = get_argilla_client()
|
478 |
argilla_api_url = client.api_url
|
479 |
return gr.Markdown(
|
|
|
481 |
<div style="padding: 1em; background-color: #e6f3e6; border-radius: 5px; margin-top: 1em;">
|
482 |
<h3 style="color: #2e7d32; margin: 0;">Dataset Published Successfully!</h3>
|
483 |
<p style="margin-top: 0.5em;">
|
484 |
+
<strong>
|
485 |
+
<a href="{argilla_api_url}" target="_blank" style="color: #1565c0; text-decoration: none;">
|
486 |
+
Open dataset in Argilla space
|
487 |
+
</a>
|
488 |
+
</strong>
|
489 |
</p>
|
490 |
<p style="margin-top: 0.5em;">
|
491 |
+
The generated dataset is in the right format for fine-tuning with TRL, AutoTrain, or other frameworks. Your dataset is now available at:
|
492 |
+
<a href="https://huggingface.co/datasets/{org_name}/{repo_name}" target="_blank" style="color: #1565c0; text-decoration: none;">
|
493 |
+
https://huggingface.co/datasets/{org_name}/{repo_name}
|
494 |
</a>
|
|
|
|
|
|
|
495 |
</p>
|
496 |
</div>
|
497 |
+
<p style="margin-top: 1em; font-size: 0.9em; color: #333;">
|
498 |
+
Unfamiliar with Argilla? Here are some docs to help you get started:
|
499 |
+
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
|
500 |
+
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
|
501 |
+
</p>
|
502 |
""",
|
503 |
visible=True,
|
504 |
)
|
505 |
|
|
|
506 |
def hide_success_message() -> gr.Markdown:
|
507 |
return gr.Markdown(value="")
|
src/distilabel_dataset_generator/apps/sft.py
CHANGED
@@ -10,9 +10,8 @@ from distilabel.distiset import Distiset
|
|
10 |
from huggingface_hub import HfApi
|
11 |
|
12 |
from src.distilabel_dataset_generator.apps.base import (
|
13 |
-
get_pipeline_code_ui,
|
14 |
hide_success_message,
|
15 |
-
|
16 |
validate_argilla_user_workspace_dataset,
|
17 |
validate_push_to_hub,
|
18 |
)
|
@@ -343,6 +342,10 @@ def push_dataset_to_argilla(
|
|
343 |
return ""
|
344 |
|
345 |
|
|
|
|
|
|
|
|
|
346 |
######################
|
347 |
# Gradio UI
|
348 |
######################
|
@@ -394,7 +397,7 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
394 |
|
395 |
gr.HTML(value="<hr>")
|
396 |
gr.Markdown(value="## 3. Generate your dataset")
|
397 |
-
with gr.Row():
|
398 |
with gr.Column(scale=2):
|
399 |
org_name = get_org_dropdown()
|
400 |
repo_name = gr.Textbox(
|
@@ -417,11 +420,22 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
417 |
)
|
418 |
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
419 |
with gr.Column(scale=3):
|
420 |
-
success_message = gr.Markdown()
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
|
426 |
gr.on(
|
427 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
@@ -463,9 +477,14 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
463 |
outputs=[success_message],
|
464 |
show_progress=True,
|
465 |
).success(
|
466 |
-
fn=
|
467 |
inputs=[org_name, repo_name],
|
468 |
outputs=[success_message],
|
|
|
|
|
|
|
|
|
469 |
)
|
|
|
470 |
app.load(fn=swap_visibility, outputs=main_ui)
|
471 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
|
|
10 |
from huggingface_hub import HfApi
|
11 |
|
12 |
from src.distilabel_dataset_generator.apps.base import (
|
|
|
13 |
hide_success_message,
|
14 |
+
show_success_message,
|
15 |
validate_argilla_user_workspace_dataset,
|
16 |
validate_push_to_hub,
|
17 |
)
|
|
|
342 |
return ""
|
343 |
|
344 |
|
345 |
+
def update_pipeline_code_visibility():
|
346 |
+
return {pipeline_code_ui: gr.Accordion(visible=True)}
|
347 |
+
|
348 |
+
|
349 |
######################
|
350 |
# Gradio UI
|
351 |
######################
|
|
|
397 |
|
398 |
gr.HTML(value="<hr>")
|
399 |
gr.Markdown(value="## 3. Generate your dataset")
|
400 |
+
with gr.Row(equal_height=False):
|
401 |
with gr.Column(scale=2):
|
402 |
org_name = get_org_dropdown()
|
403 |
repo_name = gr.Textbox(
|
|
|
420 |
)
|
421 |
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
422 |
with gr.Column(scale=3):
|
423 |
+
success_message = gr.Markdown(visible=True)
|
424 |
+
with gr.Accordion(
|
425 |
+
"Do you want to go further? Customize and run with Distilabel",
|
426 |
+
open=False,
|
427 |
+
visible=False,
|
428 |
+
) as pipeline_code_ui:
|
429 |
+
code = generate_pipeline_code(
|
430 |
+
system_prompt=system_prompt.value,
|
431 |
+
num_turns=num_turns.value,
|
432 |
+
num_rows=num_rows.value,
|
433 |
+
)
|
434 |
+
pipeline_code = gr.Code(
|
435 |
+
value=code,
|
436 |
+
language="python",
|
437 |
+
label="Distilabel Pipeline Code",
|
438 |
+
)
|
439 |
|
440 |
gr.on(
|
441 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
|
477 |
outputs=[success_message],
|
478 |
show_progress=True,
|
479 |
).success(
|
480 |
+
fn=show_success_message,
|
481 |
inputs=[org_name, repo_name],
|
482 |
outputs=[success_message],
|
483 |
+
).success(
|
484 |
+
fn=update_pipeline_code_visibility,
|
485 |
+
inputs=[],
|
486 |
+
outputs=[pipeline_code_ui],
|
487 |
)
|
488 |
+
|
489 |
app.load(fn=swap_visibility, outputs=main_ui)
|
490 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
src/distilabel_dataset_generator/apps/textcat.py
CHANGED
@@ -10,9 +10,8 @@ from distilabel.distiset import Distiset
|
|
10 |
from huggingface_hub import HfApi
|
11 |
|
12 |
from src.distilabel_dataset_generator.apps.base import (
|
13 |
-
get_pipeline_code_ui,
|
14 |
hide_success_message,
|
15 |
-
|
16 |
validate_argilla_user_workspace_dataset,
|
17 |
validate_push_to_hub,
|
18 |
)
|
@@ -351,6 +350,9 @@ def validate_input_labels(labels):
|
|
351 |
def update_max_num_labels(labels):
|
352 |
return gr.update(maximum=len(labels) if labels else 1)
|
353 |
|
|
|
|
|
|
|
354 |
|
355 |
######################
|
356 |
# Gradio UI
|
@@ -438,7 +440,7 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
438 |
|
439 |
gr.HTML("<hr>")
|
440 |
gr.Markdown("## 3. Generate your dataset")
|
441 |
-
with gr.Row():
|
442 |
with gr.Column(scale=2):
|
443 |
org_name = get_org_dropdown()
|
444 |
repo_name = gr.Textbox(
|
@@ -462,17 +464,24 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
462 |
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
463 |
with gr.Column(scale=3):
|
464 |
success_message = gr.Markdown(visible=True)
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
476 |
|
477 |
gr.on(
|
478 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
@@ -525,9 +534,14 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
525 |
outputs=[success_message],
|
526 |
show_progress=True,
|
527 |
).success(
|
528 |
-
fn=
|
529 |
inputs=[org_name, repo_name],
|
530 |
outputs=[success_message],
|
|
|
|
|
|
|
|
|
531 |
)
|
|
|
532 |
app.load(fn=swap_visibility, outputs=main_ui)
|
533 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
|
|
10 |
from huggingface_hub import HfApi
|
11 |
|
12 |
from src.distilabel_dataset_generator.apps.base import (
|
|
|
13 |
hide_success_message,
|
14 |
+
show_success_message,
|
15 |
validate_argilla_user_workspace_dataset,
|
16 |
validate_push_to_hub,
|
17 |
)
|
|
|
350 |
def update_max_num_labels(labels):
|
351 |
return gr.update(maximum=len(labels) if labels else 1)
|
352 |
|
353 |
+
def update_pipeline_code_visibility():
|
354 |
+
return {pipeline_code_ui: gr.Accordion(visible=True)}
|
355 |
+
|
356 |
|
357 |
######################
|
358 |
# Gradio UI
|
|
|
440 |
|
441 |
gr.HTML("<hr>")
|
442 |
gr.Markdown("## 3. Generate your dataset")
|
443 |
+
with gr.Row(equal_height=False):
|
444 |
with gr.Column(scale=2):
|
445 |
org_name = get_org_dropdown()
|
446 |
repo_name = gr.Textbox(
|
|
|
464 |
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
465 |
with gr.Column(scale=3):
|
466 |
success_message = gr.Markdown(visible=True)
|
467 |
+
with gr.Accordion(
|
468 |
+
"Do you want to go further? Customize and run with Distilabel",
|
469 |
+
open=False,
|
470 |
+
visible=False,
|
471 |
+
) as pipeline_code_ui:
|
472 |
+
code = generate_pipeline_code(
|
473 |
+
system_prompt.value,
|
474 |
+
difficulty=difficulty.value,
|
475 |
+
clarity=clarity.value,
|
476 |
+
labels=labels.value,
|
477 |
+
num_labels=num_labels.value,
|
478 |
+
num_rows=num_rows.value,
|
479 |
+
)
|
480 |
+
pipeline_code = gr.Code(
|
481 |
+
value=code,
|
482 |
+
language="python",
|
483 |
+
label="Distilabel Pipeline Code",
|
484 |
+
)
|
485 |
|
486 |
gr.on(
|
487 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
|
534 |
outputs=[success_message],
|
535 |
show_progress=True,
|
536 |
).success(
|
537 |
+
fn=show_success_message,
|
538 |
inputs=[org_name, repo_name],
|
539 |
outputs=[success_message],
|
540 |
+
).success(
|
541 |
+
fn=update_pipeline_code_visibility,
|
542 |
+
inputs=[],
|
543 |
+
outputs=[pipeline_code_ui],
|
544 |
)
|
545 |
+
|
546 |
app.load(fn=swap_visibility, outputs=main_ui)
|
547 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|