Spaces:
Running
Running
inoki-giskard
commited on
Commit
•
53fe897
1
Parent(s):
3833563
Unify dataset checking, show header dataset
Browse files- app_text_classification.py +40 -22
- text_classification_ui_helpers.py +59 -21
app_text_classification.py
CHANGED
@@ -2,14 +2,16 @@ import uuid
|
|
2 |
|
3 |
import gradio as gr
|
4 |
|
5 |
-
from io_utils import
|
6 |
-
from text_classification_ui_helpers import (
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
13 |
from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
|
14 |
|
15 |
MAX_LABELS = 40
|
@@ -38,16 +40,19 @@ def get_demo():
|
|
38 |
)
|
39 |
|
40 |
with gr.Row():
|
41 |
-
dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
|
42 |
-
dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
|
43 |
|
44 |
with gr.Row():
|
45 |
example_btn = gr.Button(
|
46 |
-
"Auto-align Columns & Get Sample Prediction",
|
47 |
-
visible=True,
|
48 |
variant="primary",
|
49 |
-
interactive=False
|
|
|
50 |
|
|
|
|
|
51 |
with gr.Row():
|
52 |
example_input = gr.HTML(visible=False)
|
53 |
with gr.Row():
|
@@ -103,18 +108,29 @@ def get_demo():
|
|
103 |
)
|
104 |
|
105 |
with gr.Row():
|
106 |
-
logs = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
dataset_id_input.change(
|
109 |
-
|
110 |
inputs=[dataset_id_input],
|
111 |
-
outputs=[dataset_config_input],
|
112 |
)
|
113 |
|
114 |
dataset_config_input.change(
|
115 |
-
|
116 |
inputs=[dataset_id_input, dataset_config_input],
|
117 |
-
outputs=[dataset_split_input],
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
)
|
119 |
|
120 |
scanners.change(write_scanners, inputs=[scanners, uid_label])
|
@@ -155,15 +171,17 @@ def get_demo():
|
|
155 |
model_id_input.change,
|
156 |
dataset_id_input.change,
|
157 |
dataset_config_input.change,
|
158 |
-
dataset_split_input.change
|
|
|
159 |
fn=precheck_model_ds_enable_example_btn,
|
160 |
inputs=[
|
161 |
model_id_input,
|
162 |
dataset_id_input,
|
163 |
dataset_config_input,
|
164 |
dataset_split_input,
|
165 |
-
],
|
166 |
-
outputs=[example_btn]
|
|
|
167 |
|
168 |
gr.on(
|
169 |
triggers=[
|
@@ -222,6 +240,6 @@ def get_demo():
|
|
222 |
gr.on(
|
223 |
triggers=[label.input for label in column_mappings],
|
224 |
fn=enable_run_btn,
|
225 |
-
inputs=None,
|
226 |
outputs=[run_btn],
|
227 |
)
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
|
5 |
+
from io_utils import get_logs_file, read_scanners, write_scanners
|
6 |
+
from text_classification_ui_helpers import (
|
7 |
+
align_columns_and_show_prediction,
|
8 |
+
check_dataset,
|
9 |
+
deselect_run_inference,
|
10 |
+
precheck_model_ds_enable_example_btn,
|
11 |
+
select_run_mode,
|
12 |
+
try_submit,
|
13 |
+
write_column_mapping_to_config,
|
14 |
+
)
|
15 |
from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
|
16 |
|
17 |
MAX_LABELS = 40
|
|
|
40 |
)
|
41 |
|
42 |
with gr.Row():
|
43 |
+
dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False, allow_custom_value=True)
|
44 |
+
dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False, allow_custom_value=True)
|
45 |
|
46 |
with gr.Row():
|
47 |
example_btn = gr.Button(
|
48 |
+
"Auto-align Columns & Get Sample Prediction",
|
49 |
+
visible=True,
|
50 |
variant="primary",
|
51 |
+
interactive=False,
|
52 |
+
)
|
53 |
|
54 |
+
with gr.Row():
|
55 |
+
first_line_ds = gr.DataFrame(label="Dataset preview", visible=False)
|
56 |
with gr.Row():
|
57 |
example_input = gr.HTML(visible=False)
|
58 |
with gr.Row():
|
|
|
108 |
)
|
109 |
|
110 |
with gr.Row():
|
111 |
+
logs = gr.Textbox(
|
112 |
+
value=get_logs_file,
|
113 |
+
label="Giskard Bot Evaluation Log:",
|
114 |
+
visible=False,
|
115 |
+
every=0.5,
|
116 |
+
)
|
117 |
|
118 |
dataset_id_input.change(
|
119 |
+
check_dataset,
|
120 |
inputs=[dataset_id_input],
|
121 |
+
outputs=[dataset_config_input, dataset_split_input, first_line_ds],
|
122 |
)
|
123 |
|
124 |
dataset_config_input.change(
|
125 |
+
check_dataset,
|
126 |
inputs=[dataset_id_input, dataset_config_input],
|
127 |
+
outputs=[dataset_config_input, dataset_split_input, first_line_ds],
|
128 |
+
)
|
129 |
+
|
130 |
+
dataset_split_input.change(
|
131 |
+
check_dataset,
|
132 |
+
inputs=[dataset_id_input, dataset_config_input, dataset_split_input],
|
133 |
+
outputs=[dataset_config_input, dataset_split_input, first_line_ds],
|
134 |
)
|
135 |
|
136 |
scanners.change(write_scanners, inputs=[scanners, uid_label])
|
|
|
171 |
model_id_input.change,
|
172 |
dataset_id_input.change,
|
173 |
dataset_config_input.change,
|
174 |
+
dataset_split_input.change,
|
175 |
+
],
|
176 |
fn=precheck_model_ds_enable_example_btn,
|
177 |
inputs=[
|
178 |
model_id_input,
|
179 |
dataset_id_input,
|
180 |
dataset_config_input,
|
181 |
dataset_split_input,
|
182 |
+
],
|
183 |
+
outputs=[example_btn],
|
184 |
+
)
|
185 |
|
186 |
gr.on(
|
187 |
triggers=[
|
|
|
240 |
gr.on(
|
241 |
triggers=[label.input for label in column_mappings],
|
242 |
fn=enable_run_btn,
|
243 |
+
inputs=None, # FIXME
|
244 |
outputs=[run_btn],
|
245 |
)
|
text_classification_ui_helpers.py
CHANGED
@@ -7,15 +7,27 @@ import uuid
|
|
7 |
|
8 |
import datasets
|
9 |
import gradio as gr
|
|
|
10 |
from transformers.pipelines import TextClassificationPipeline
|
11 |
|
12 |
-
from io_utils import (
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
MAX_LABELS = 40
|
21 |
MAX_FEATURES = 20
|
@@ -32,24 +44,50 @@ HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
|
|
32 |
LEADERBOARD = "giskard-bot/evaluator-leaderboard"
|
33 |
|
34 |
|
35 |
-
|
36 |
-
try:
|
37 |
-
# write_column_mapping(None, uid) # reset column mapping
|
38 |
-
configs = datasets.get_dataset_config_names(dataset_id)
|
39 |
-
return gr.Dropdown(configs, value=configs[0], visible=True)
|
40 |
-
except Exception:
|
41 |
-
# Dataset may not exist
|
42 |
-
pass
|
43 |
|
44 |
|
45 |
-
def
|
|
|
|
|
|
|
46 |
try:
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Dataset may not exist
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
def select_run_mode(run_inf):
|
|
|
7 |
|
8 |
import datasets
|
9 |
import gradio as gr
|
10 |
+
import pandas as pd
|
11 |
from transformers.pipelines import TextClassificationPipeline
|
12 |
|
13 |
+
from io_utils import (
|
14 |
+
get_yaml_path,
|
15 |
+
read_column_mapping,
|
16 |
+
save_job_to_pipe,
|
17 |
+
write_column_mapping,
|
18 |
+
write_log_to_user_file,
|
19 |
+
)
|
20 |
+
from text_classification import (
|
21 |
+
check_model,
|
22 |
+
get_example_prediction,
|
23 |
+
get_labels_and_features_from_dataset,
|
24 |
+
)
|
25 |
+
from wordings import (
|
26 |
+
CHECK_CONFIG_OR_SPLIT_RAW,
|
27 |
+
CONFIRM_MAPPING_DETAILS_FAIL_RAW,
|
28 |
+
MAPPING_STYLED_ERROR_WARNING,
|
29 |
+
get_styled_input,
|
30 |
+
)
|
31 |
|
32 |
MAX_LABELS = 40
|
33 |
MAX_FEATURES = 20
|
|
|
44 |
LEADERBOARD = "giskard-bot/evaluator-leaderboard"
|
45 |
|
46 |
|
47 |
+
logger = logging.getLogger(__file__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
|
50 |
+
def check_dataset(dataset_id, dataset_config=None, dataset_split=None):
|
51 |
+
configs = ["default"]
|
52 |
+
splits = ["default"]
|
53 |
+
logger.info(f"Loading {dataset_id}, {dataset_config}, {dataset_split}")
|
54 |
try:
|
55 |
+
configs = datasets.get_dataset_config_names(dataset_id)
|
56 |
+
splits = list(
|
57 |
+
datasets.load_dataset(
|
58 |
+
dataset_id, configs[0] if not dataset_config else dataset_config
|
59 |
+
).keys()
|
60 |
+
)
|
61 |
+
if dataset_config == None:
|
62 |
+
dataset_config = configs[0]
|
63 |
+
dataset_split = splits[0]
|
64 |
+
elif dataset_split == None:
|
65 |
+
dataset_split = splits[0]
|
66 |
+
except Exception as e:
|
67 |
# Dataset may not exist
|
68 |
+
logger.warn(
|
69 |
+
f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
|
70 |
+
)
|
71 |
+
if dataset_config == None:
|
72 |
+
return (
|
73 |
+
gr.Dropdown(configs, value=configs[0], visible=True),
|
74 |
+
gr.Dropdown(splits, value=splits[0], visible=True),
|
75 |
+
gr.DataFrame(pd.DataFrame(), visible=False),
|
76 |
+
)
|
77 |
+
elif dataset_split == None:
|
78 |
+
return (
|
79 |
+
gr.Dropdown(configs, value=dataset_config, visible=True),
|
80 |
+
gr.Dropdown(splits, value=splits[0], visible=True),
|
81 |
+
gr.DataFrame(pd.DataFrame(), visible=False),
|
82 |
+
)
|
83 |
+
|
84 |
+
dataset_dict = datasets.load_dataset(dataset_id, dataset_config)
|
85 |
+
dataframe: pd.DataFrame = dataset_dict[dataset_split].to_pandas().head(5)
|
86 |
+
return (
|
87 |
+
gr.Dropdown(configs, value=dataset_config, visible=True),
|
88 |
+
gr.Dropdown(splits, value=dataset_split, visible=True),
|
89 |
+
gr.DataFrame(dataframe, visible=True),
|
90 |
+
)
|
91 |
|
92 |
|
93 |
def select_run_mode(run_inf):
|