davidberenstein1957 HF staff commited on
Commit
9ac3da0
Β·
1 Parent(s): fd936a6

feat: add examples

Browse files
app.py CHANGED
@@ -16,8 +16,12 @@ h3{margin-top: 0}
16
  .tabitem{border: 0px}
17
  .group_padding{padding: .55em}
18
  #space_model .wrap > label:last-child{opacity: 0.3; pointer-events:none}
 
 
 
19
  """
20
 
 
21
  demo = gr.TabbedInterface(
22
  [sft_app, faq_app],
23
  ["Supervised Fine-Tuning", "FAQ"],
 
16
  .tabitem{border: 0px}
17
  .group_padding{padding: .55em}
18
  #space_model .wrap > label:last-child{opacity: 0.3; pointer-events:none}
19
+ #system_prompt_examples {
20
+ color: black;
21
+ }
22
  """
23
 
24
+
25
  demo = gr.TabbedInterface(
26
  [sft_app, faq_app],
27
  ["Supervised Fine-Tuning", "FAQ"],
src/distilabel_dataset_generator/apps/sft.py CHANGED
@@ -1,6 +1,5 @@
1
  import multiprocessing
2
  import time
3
- from typing import Union
4
 
5
  import gradio as gr
6
  import pandas as pd
@@ -8,7 +7,7 @@ from distilabel.distiset import Distiset
8
 
9
  from src.distilabel_dataset_generator.pipelines.sft import (
10
  DEFAULT_DATASET,
11
- DEFAULT_DATASET_DESCRIPTION,
12
  DEFAULT_SYSTEM_PROMPT,
13
  PROMPT_CREATION_PROMPT,
14
  generate_pipeline_code,
@@ -19,6 +18,7 @@ from src.distilabel_dataset_generator.utils import (
19
  get_login_button,
20
  get_org_dropdown,
21
  get_token,
 
22
  )
23
 
24
 
@@ -141,13 +141,6 @@ def generate_dataset(
141
  return pd.DataFrame(outputs)
142
 
143
 
144
- def swap_visibilty(profile: Union[gr.OAuthProfile, None]):
145
- if profile is None:
146
- return gr.update(elem_classes=["main_ui_logged_out"]), gr.Mark
147
- else:
148
- return gr.update(elem_classes=["main_ui_logged_in"])
149
-
150
-
151
  css = """
152
  .main_ui_logged_out{opacity: 0.3; pointer-events: none}
153
  """
@@ -162,14 +155,19 @@ with gr.Blocks(
162
  get_login_button()
163
  with gr.Column(scale=2):
164
  gr.Markdown(
165
- "This token will only be used to push the dataset to the Hugging Face Hub. It won't be incurring any costs because we are using Free Serverless Inference Endpoints."
166
  )
167
 
168
  gr.Markdown("## Iterate on a sample dataset")
169
  with gr.Column() as main_ui:
170
  dataset_description = gr.TextArea(
171
  label="Provide a description of the dataset",
172
- value=DEFAULT_DATASET_DESCRIPTION,
 
 
 
 
 
173
  )
174
  with gr.Row():
175
  gr.Column(scale=1)
 
1
  import multiprocessing
2
  import time
 
3
 
4
  import gradio as gr
5
  import pandas as pd
 
7
 
8
  from src.distilabel_dataset_generator.pipelines.sft import (
9
  DEFAULT_DATASET,
10
+ DEFAULT_DATASET_DESCRIPTIONS,
11
  DEFAULT_SYSTEM_PROMPT,
12
  PROMPT_CREATION_PROMPT,
13
  generate_pipeline_code,
 
18
  get_login_button,
19
  get_org_dropdown,
20
  get_token,
21
+ swap_visibilty,
22
  )
23
 
24
 
 
141
  return pd.DataFrame(outputs)
142
 
143
 
 
 
 
 
 
 
 
144
  css = """
145
  .main_ui_logged_out{opacity: 0.3; pointer-events: none}
146
  """
 
155
  get_login_button()
156
  with gr.Column(scale=2):
157
  gr.Markdown(
158
+ "This token will only be used to push the dataset to the Hugging Face Hub. There are no generation costs because we are using Free Serverless Inference Endpoints."
159
  )
160
 
161
  gr.Markdown("## Iterate on a sample dataset")
162
  with gr.Column() as main_ui:
163
  dataset_description = gr.TextArea(
164
  label="Provide a description of the dataset",
165
+ value=DEFAULT_DATASET_DESCRIPTIONS[0],
166
+ )
167
+ examples = gr.Examples(
168
+ elem_id="system_prompt_examples",
169
+ examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS[1:]],
170
+ inputs=[dataset_description],
171
  )
172
  with gr.Row():
173
  gr.Column(scale=1)
src/distilabel_dataset_generator/pipelines/sft.py CHANGED
@@ -115,8 +115,11 @@ User dataset description:
115
  """
116
 
117
  MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
118
- DEFAULT_DATASET_DESCRIPTION = (
119
- "A chemistry dataset for an assistant that explains chemical reactions and formulas"
 
 
 
120
  )
121
  DEFAULT_SYSTEM_PROMPT = "You are an AI assistant specializing in chemistry and chemical reactions. Your purpose is to help users understand and work with chemical formulas, equations, and reactions. Provide clear explanations of reaction mechanisms, assist in balancing chemical equations, and offer guidance on the interpretation of chemical structures. Explain the roles of reactants, products, catalysts, and solvents, and define key chemistry terms when necessary."
122
  DEFAULT_DATASET = pd.DataFrame(
 
115
  """
116
 
117
  MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
118
+ DEFAULT_DATASET_DESCRIPTIONS = (
119
+ "A chemistry dataset for an assistant that explains chemical reactions and formulas.",
120
+ "A dataset for an assistant that work in the customer support domain.",
121
+ "A dataset for an assistant that writes code.",
122
+ "A dataset for an assistant that work in the legal domain.",
123
  )
124
  DEFAULT_SYSTEM_PROMPT = "You are an AI assistant specializing in chemistry and chemical reactions. Your purpose is to help users understand and work with chemical formulas, equations, and reactions. Provide clear explanations of reaction mechanisms, assist in balancing chemical equations, and offer guidance on the interpretation of chemical structures. Explain the roles of reactants, products, catalysts, and solvents, and define key chemistry terms when necessary."
125
  DEFAULT_DATASET = pd.DataFrame(
src/distilabel_dataset_generator/utils.py CHANGED
@@ -52,8 +52,8 @@ def list_orgs(oauth_token: OAuthToken = None):
52
  return list(set(organisations))
53
 
54
 
55
- def get_org_dropdown(token: OAuthToken = None):
56
- orgs = list_orgs(token)
57
  return gr.Dropdown(
58
  label="Organization",
59
  choices=orgs,
@@ -62,8 +62,15 @@ def get_org_dropdown(token: OAuthToken = None):
62
  )
63
 
64
 
65
- def get_token(token: OAuthToken = None):
66
- if token:
67
- return token.token
68
  else:
69
  return ""
 
 
 
 
 
 
 
 
52
  return list(set(organisations))
53
 
54
 
55
+ def get_org_dropdown(oauth_token: OAuthToken = None):
56
+ orgs = list_orgs(oauth_token)
57
  return gr.Dropdown(
58
  label="Organization",
59
  choices=orgs,
 
62
  )
63
 
64
 
65
+ def get_token(oauth_token: OAuthToken = None):
66
+ if oauth_token:
67
+ return oauth_token.token
68
  else:
69
  return ""
70
+
71
+
72
+ def swap_visibilty(oauth_token: OAuthToken = None):
73
+ if oauth_token is None:
74
+ return gr.update(elem_classes=["main_ui_logged_out"])
75
+ else:
76
+ return gr.update(elem_classes=["main_ui_logged_in"])