Spaces:
Running
Running
inoki-giskard
commited on
Commit
•
01942d8
1
Parent(s):
d6b3b9f
Add dataset probing and validation
Browse files
app.py
CHANGED
@@ -1,10 +1,50 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
|
4 |
theme = gr.themes.Soft(
|
5 |
primary_hue="green",
|
6 |
)
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
with gr.Blocks(theme=theme) as iface:
|
9 |
with gr.Row():
|
10 |
with gr.Column():
|
@@ -28,13 +68,38 @@ with gr.Blocks(theme=theme) as iface:
|
|
28 |
placeholder="tweet_eval",
|
29 |
)
|
30 |
|
31 |
-
gr.Dropdown(
|
32 |
label="Hugging Face dataset subset",
|
|
|
|
|
|
|
|
|
|
|
33 |
)
|
34 |
|
35 |
-
gr.Dropdown(
|
36 |
label="Hugging Face dataset split",
|
|
|
|
|
|
|
|
|
|
|
37 |
)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
iface.queue(max_size=20)
|
40 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import datasets
|
3 |
|
4 |
|
5 |
theme = gr.themes.Soft(
|
6 |
primary_hue="green",
|
7 |
)
|
8 |
|
9 |
+
|
10 |
+
def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
|
11 |
+
try:
|
12 |
+
configs = datasets.get_dataset_config_names(dataset_id)
|
13 |
+
except Exception:
|
14 |
+
# Dataset may not exist
|
15 |
+
return None, dataset_config, dataset_split
|
16 |
+
|
17 |
+
if dataset_config not in configs:
|
18 |
+
# Need to choose dataset subset (config)
|
19 |
+
return dataset_id, configs, dataset_split
|
20 |
+
|
21 |
+
ds = datasets.load_dataset(dataset_id, dataset_config)
|
22 |
+
|
23 |
+
if isinstance(ds, datasets.DatasetDict):
|
24 |
+
# Need to choose dataset split
|
25 |
+
if dataset_split not in ds.keys():
|
26 |
+
return dataset_id, None, list(ds.keys())
|
27 |
+
elif not isinstance(ds, datasets.Dataset):
|
28 |
+
# Unknown type
|
29 |
+
return dataset_id, None, None
|
30 |
+
return dataset_id, dataset_config, dataset_split
|
31 |
+
|
32 |
+
|
33 |
+
def try_submit(dataset_id, dataset_config, dataset_split):
|
34 |
+
# Validate dataset
|
35 |
+
d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
|
36 |
+
|
37 |
+
if d_id is None:
|
38 |
+
gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
|
39 |
+
if isinstance(config, list):
|
40 |
+
gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_config}" config. Please choose a valid config.')
|
41 |
+
config = gr.Dropdown.update(choices=config, value=config[0])
|
42 |
+
if isinstance(split, list):
|
43 |
+
gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_split}" split. Please choose a valid split.')
|
44 |
+
split = gr.Dropdown.update(choices=split, value=split[0])
|
45 |
+
|
46 |
+
return config, split
|
47 |
+
|
48 |
with gr.Blocks(theme=theme) as iface:
|
49 |
with gr.Row():
|
50 |
with gr.Column():
|
|
|
68 |
placeholder="tweet_eval",
|
69 |
)
|
70 |
|
71 |
+
dataset_config_input = gr.Dropdown(
|
72 |
label="Hugging Face dataset subset",
|
73 |
+
choices=[
|
74 |
+
"default",
|
75 |
+
],
|
76 |
+
allow_custom_value=True,
|
77 |
+
value="default",
|
78 |
)
|
79 |
|
80 |
+
dataset_split_input = gr.Dropdown(
|
81 |
label="Hugging Face dataset split",
|
82 |
+
choices=[
|
83 |
+
"test",
|
84 |
+
],
|
85 |
+
allow_custom_value=True,
|
86 |
+
value="test",
|
87 |
)
|
88 |
|
89 |
+
with gr.Row():
|
90 |
+
run_btn = gr.Button("Validate and submit", variant="primary")
|
91 |
+
run_btn.click(
|
92 |
+
try_submit,
|
93 |
+
inputs=[
|
94 |
+
dataset_id_input,
|
95 |
+
dataset_config_input,
|
96 |
+
dataset_split_input
|
97 |
+
],
|
98 |
+
outputs=[
|
99 |
+
dataset_config_input,
|
100 |
+
dataset_split_input
|
101 |
+
],
|
102 |
+
)
|
103 |
+
|
104 |
iface.queue(max_size=20)
|
105 |
iface.launch()
|