Commit
•
67fa2ba
1
Parent(s):
ccd1c40
feat: disable api requests
Browse files
src/distilabel_dataset_generator/apps/sft.py
CHANGED
@@ -17,9 +17,9 @@ from src.distilabel_dataset_generator.pipelines.sft import (
|
|
17 |
get_prompt_generation_step,
|
18 |
)
|
19 |
from src.distilabel_dataset_generator.utils import (
|
|
|
20 |
get_login_button,
|
21 |
get_org_dropdown,
|
22 |
-
get_token,
|
23 |
swap_visibilty,
|
24 |
)
|
25 |
|
@@ -76,7 +76,7 @@ def generate_dataset(
|
|
76 |
private: bool = True,
|
77 |
org_name: str = None,
|
78 |
repo_name: str = None,
|
79 |
-
oauth_token:
|
80 |
progress=gr.Progress(),
|
81 |
is_sample: bool = False,
|
82 |
):
|
@@ -157,7 +157,9 @@ def generate_dataset(
|
|
157 |
return pd.DataFrame(outputs)
|
158 |
|
159 |
|
160 |
-
def upload_pipeline_code(
|
|
|
|
|
161 |
with io.BytesIO(pipeline_code.encode("utf-8")) as f:
|
162 |
upload_file(
|
163 |
path_or_fileobj=f,
|
@@ -269,13 +271,6 @@ with gr.Blocks(
|
|
269 |
)
|
270 |
|
271 |
with gr.Row(variant="panel"):
|
272 |
-
oauth_token = gr.Textbox(
|
273 |
-
value=get_token(),
|
274 |
-
label="Hugging Face Token",
|
275 |
-
placeholder="hf_...",
|
276 |
-
type="password",
|
277 |
-
visible=False,
|
278 |
-
)
|
279 |
org_name = get_org_dropdown()
|
280 |
repo_name = gr.Textbox(
|
281 |
label="Repo name", placeholder="dataset_name", value="my-distiset"
|
@@ -352,13 +347,12 @@ with gr.Blocks(
|
|
352 |
private,
|
353 |
org_name,
|
354 |
repo_name,
|
355 |
-
oauth_token,
|
356 |
],
|
357 |
outputs=[final_dataset],
|
358 |
show_progress=True,
|
359 |
).then(
|
360 |
fn=upload_pipeline_code,
|
361 |
-
inputs=[pipeline_code, org_name, repo_name
|
362 |
outputs=[],
|
363 |
).success(
|
364 |
fn=show_success_message,
|
@@ -381,6 +375,5 @@ with gr.Blocks(
|
|
381 |
inputs=[system_prompt, num_turns, num_rows],
|
382 |
outputs=[pipeline_code],
|
383 |
)
|
384 |
-
app.load(get_token, outputs=[oauth_token])
|
385 |
app.load(get_org_dropdown, outputs=[org_name])
|
386 |
app.load(fn=swap_visibilty, outputs=main_ui)
|
|
|
17 |
get_prompt_generation_step,
|
18 |
)
|
19 |
from src.distilabel_dataset_generator.utils import (
|
20 |
+
OAuthToken,
|
21 |
get_login_button,
|
22 |
get_org_dropdown,
|
|
|
23 |
swap_visibilty,
|
24 |
)
|
25 |
|
|
|
76 |
private: bool = True,
|
77 |
org_name: str = None,
|
78 |
repo_name: str = None,
|
79 |
+
oauth_token: OAuthToken = None,
|
80 |
progress=gr.Progress(),
|
81 |
is_sample: bool = False,
|
82 |
):
|
|
|
157 |
return pd.DataFrame(outputs)
|
158 |
|
159 |
|
160 |
+
def upload_pipeline_code(
|
161 |
+
pipeline_code, org_name, repo_name, oauth_token: OAuthToken = None
|
162 |
+
):
|
163 |
with io.BytesIO(pipeline_code.encode("utf-8")) as f:
|
164 |
upload_file(
|
165 |
path_or_fileobj=f,
|
|
|
271 |
)
|
272 |
|
273 |
with gr.Row(variant="panel"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
org_name = get_org_dropdown()
|
275 |
repo_name = gr.Textbox(
|
276 |
label="Repo name", placeholder="dataset_name", value="my-distiset"
|
|
|
347 |
private,
|
348 |
org_name,
|
349 |
repo_name,
|
|
|
350 |
],
|
351 |
outputs=[final_dataset],
|
352 |
show_progress=True,
|
353 |
).then(
|
354 |
fn=upload_pipeline_code,
|
355 |
+
inputs=[pipeline_code, org_name, repo_name],
|
356 |
outputs=[],
|
357 |
).success(
|
358 |
fn=show_success_message,
|
|
|
375 |
inputs=[system_prompt, num_turns, num_rows],
|
376 |
outputs=[pipeline_code],
|
377 |
)
|
|
|
378 |
app.load(get_org_dropdown, outputs=[org_name])
|
379 |
app.load(fn=swap_visibilty, outputs=main_ui)
|
src/distilabel_dataset_generator/pipelines/sft.py
CHANGED
@@ -89,7 +89,7 @@ BRAINSTORMING_PROMPT = (
|
|
89 |
|
90 |
PROMPT_CREATION_PROMPT = f"""You are an AI assistant specialized in generating very precise prompts for dataset creation.
|
91 |
|
92 |
-
Your task is to write a prompt following the instruction of the user. Respond with the prompt and nothing else.
|
93 |
|
94 |
In the generated prompt always finish with this sentence: User questions are direct and concise.
|
95 |
|
@@ -121,7 +121,7 @@ User dataset description:
|
|
121 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
122 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
123 |
"rude customer assistant for a phone company",
|
124 |
-
"assistant that solves math puzzles using python"
|
125 |
)
|
126 |
DEFAULT_SYSTEM_PROMPTS = [
|
127 |
"""You are a customer support agent for a phone company. Your purpose is to assist customers with their phone-related issues, but you are not very patient and tend to be a bit rude. User queries will be straightforward and clear, but you will respond in a somewhat blunt and curt manner. Remember to keep your responses concise and to the point. User queries are often about phone plans, billing, and technical issues. Your responses should be direct and focus on resolving the issue at hand, but with a slightly abrasive tone. User queries will be concise and to the point, User queries are often about phone plans, billing, and technical issues.""",
|
|
|
89 |
|
90 |
PROMPT_CREATION_PROMPT = f"""You are an AI assistant specialized in generating very precise prompts for dataset creation.
|
91 |
|
92 |
+
Your task is to write a prompt following the instruction of the user. Respond with the prompt and nothing else.
|
93 |
|
94 |
In the generated prompt always finish with this sentence: User questions are direct and concise.
|
95 |
|
|
|
121 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
122 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
123 |
"rude customer assistant for a phone company",
|
124 |
+
"assistant that solves math puzzles using python",
|
125 |
)
|
126 |
DEFAULT_SYSTEM_PROMPTS = [
|
127 |
"""You are a customer support agent for a phone company. Your purpose is to assist customers with their phone-related issues, but you are not very patient and tend to be a bit rude. User queries will be straightforward and clear, but you will respond in a somewhat blunt and curt manner. Remember to keep your responses concise and to the point. User queries are often about phone plans, billing, and technical issues. Your responses should be direct and focus on resolving the issue at hand, but with a slightly abrasive tone. User queries will be concise and to the point, User queries are often about phone plans, billing, and technical issues.""",
|