inoki-giskard commited on
Commit
53fe897
·
1 Parent(s): 3833563

Unify dataset checking, show header dataset

Browse files
app_text_classification.py CHANGED
@@ -2,14 +2,16 @@ import uuid
2
 
3
  import gradio as gr
4
 
5
- from io_utils import (get_logs_file, read_scanners, write_scanners)
6
- from text_classification_ui_helpers import (check_dataset_and_get_config,
7
- check_dataset_and_get_split,
8
- align_columns_and_show_prediction,
9
- deselect_run_inference,
10
- select_run_mode, try_submit,
11
- write_column_mapping_to_config,
12
- precheck_model_ds_enable_example_btn)
 
 
13
  from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
14
 
15
  MAX_LABELS = 40
@@ -38,16 +40,19 @@ def get_demo():
38
  )
39
 
40
  with gr.Row():
41
- dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
42
- dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
43
 
44
  with gr.Row():
45
  example_btn = gr.Button(
46
- "Auto-align Columns & Get Sample Prediction",
47
- visible=True,
48
  variant="primary",
49
- interactive=False)
 
50
 
 
 
51
  with gr.Row():
52
  example_input = gr.HTML(visible=False)
53
  with gr.Row():
@@ -103,18 +108,29 @@ def get_demo():
103
  )
104
 
105
  with gr.Row():
106
- logs = gr.Textbox(value=get_logs_file, label="Giskard Bot Evaluation Log:", visible=False, every=0.5)
 
 
 
 
 
107
 
108
  dataset_id_input.change(
109
- check_dataset_and_get_config,
110
  inputs=[dataset_id_input],
111
- outputs=[dataset_config_input],
112
  )
113
 
114
  dataset_config_input.change(
115
- check_dataset_and_get_split,
116
  inputs=[dataset_id_input, dataset_config_input],
117
- outputs=[dataset_split_input],
 
 
 
 
 
 
118
  )
119
 
120
  scanners.change(write_scanners, inputs=[scanners, uid_label])
@@ -155,15 +171,17 @@ def get_demo():
155
  model_id_input.change,
156
  dataset_id_input.change,
157
  dataset_config_input.change,
158
- dataset_split_input.change],
 
159
  fn=precheck_model_ds_enable_example_btn,
160
  inputs=[
161
  model_id_input,
162
  dataset_id_input,
163
  dataset_config_input,
164
  dataset_split_input,
165
- ],
166
- outputs=[example_btn])
 
167
 
168
  gr.on(
169
  triggers=[
@@ -222,6 +240,6 @@ def get_demo():
222
  gr.on(
223
  triggers=[label.input for label in column_mappings],
224
  fn=enable_run_btn,
225
- inputs=None, # FIXME
226
  outputs=[run_btn],
227
  )
 
2
 
3
  import gradio as gr
4
 
5
+ from io_utils import get_logs_file, read_scanners, write_scanners
6
+ from text_classification_ui_helpers import (
7
+ align_columns_and_show_prediction,
8
+ check_dataset,
9
+ deselect_run_inference,
10
+ precheck_model_ds_enable_example_btn,
11
+ select_run_mode,
12
+ try_submit,
13
+ write_column_mapping_to_config,
14
+ )
15
  from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
16
 
17
  MAX_LABELS = 40
 
40
  )
41
 
42
  with gr.Row():
43
+ dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False, allow_custom_value=True)
44
+ dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False, allow_custom_value=True)
45
 
46
  with gr.Row():
47
  example_btn = gr.Button(
48
+ "Auto-align Columns & Get Sample Prediction",
49
+ visible=True,
50
  variant="primary",
51
+ interactive=False,
52
+ )
53
 
54
+ with gr.Row():
55
+ first_line_ds = gr.DataFrame(label="Dataset preview", visible=False)
56
  with gr.Row():
57
  example_input = gr.HTML(visible=False)
58
  with gr.Row():
 
108
  )
109
 
110
  with gr.Row():
111
+ logs = gr.Textbox(
112
+ value=get_logs_file,
113
+ label="Giskard Bot Evaluation Log:",
114
+ visible=False,
115
+ every=0.5,
116
+ )
117
 
118
  dataset_id_input.change(
119
+ check_dataset,
120
  inputs=[dataset_id_input],
121
+ outputs=[dataset_config_input, dataset_split_input, first_line_ds],
122
  )
123
 
124
  dataset_config_input.change(
125
+ check_dataset,
126
  inputs=[dataset_id_input, dataset_config_input],
127
+ outputs=[dataset_config_input, dataset_split_input, first_line_ds],
128
+ )
129
+
130
+ dataset_split_input.change(
131
+ check_dataset,
132
+ inputs=[dataset_id_input, dataset_config_input, dataset_split_input],
133
+ outputs=[dataset_config_input, dataset_split_input, first_line_ds],
134
  )
135
 
136
  scanners.change(write_scanners, inputs=[scanners, uid_label])
 
171
  model_id_input.change,
172
  dataset_id_input.change,
173
  dataset_config_input.change,
174
+ dataset_split_input.change,
175
+ ],
176
  fn=precheck_model_ds_enable_example_btn,
177
  inputs=[
178
  model_id_input,
179
  dataset_id_input,
180
  dataset_config_input,
181
  dataset_split_input,
182
+ ],
183
+ outputs=[example_btn],
184
+ )
185
 
186
  gr.on(
187
  triggers=[
 
240
  gr.on(
241
  triggers=[label.input for label in column_mappings],
242
  fn=enable_run_btn,
243
+ inputs=None, # FIXME
244
  outputs=[run_btn],
245
  )
text_classification_ui_helpers.py CHANGED
@@ -7,15 +7,27 @@ import uuid
7
 
8
  import datasets
9
  import gradio as gr
 
10
  from transformers.pipelines import TextClassificationPipeline
11
 
12
- from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
13
- write_column_mapping, write_log_to_user_file)
14
- from text_classification import (check_model, get_example_prediction,
15
- get_labels_and_features_from_dataset)
16
- from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
17
- CONFIRM_MAPPING_DETAILS_FAIL_RAW,
18
- MAPPING_STYLED_ERROR_WARNING, get_styled_input)
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  MAX_LABELS = 40
21
  MAX_FEATURES = 20
@@ -32,24 +44,50 @@ HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
32
  LEADERBOARD = "giskard-bot/evaluator-leaderboard"
33
 
34
 
35
- def check_dataset_and_get_config(dataset_id):
36
- try:
37
- # write_column_mapping(None, uid) # reset column mapping
38
- configs = datasets.get_dataset_config_names(dataset_id)
39
- return gr.Dropdown(configs, value=configs[0], visible=True)
40
- except Exception:
41
- # Dataset may not exist
42
- pass
43
 
44
 
45
- def check_dataset_and_get_split(dataset_id, dataset_config):
 
 
 
46
  try:
47
- splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
48
- return gr.Dropdown(splits, value=splits[0], visible=True)
49
- except Exception:
 
 
 
 
 
 
 
 
 
50
  # Dataset may not exist
51
- # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
52
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  def select_run_mode(run_inf):
 
7
 
8
  import datasets
9
  import gradio as gr
10
+ import pandas as pd
11
  from transformers.pipelines import TextClassificationPipeline
12
 
13
+ from io_utils import (
14
+ get_yaml_path,
15
+ read_column_mapping,
16
+ save_job_to_pipe,
17
+ write_column_mapping,
18
+ write_log_to_user_file,
19
+ )
20
+ from text_classification import (
21
+ check_model,
22
+ get_example_prediction,
23
+ get_labels_and_features_from_dataset,
24
+ )
25
+ from wordings import (
26
+ CHECK_CONFIG_OR_SPLIT_RAW,
27
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW,
28
+ MAPPING_STYLED_ERROR_WARNING,
29
+ get_styled_input,
30
+ )
31
 
32
  MAX_LABELS = 40
33
  MAX_FEATURES = 20
 
44
  LEADERBOARD = "giskard-bot/evaluator-leaderboard"
45
 
46
 
47
+ logger = logging.getLogger(__file__)
 
 
 
 
 
 
 
48
 
49
 
50
+ def check_dataset(dataset_id, dataset_config=None, dataset_split=None):
51
+ configs = ["default"]
52
+ splits = ["default"]
53
+ logger.info(f"Loading {dataset_id}, {dataset_config}, {dataset_split}")
54
  try:
55
+ configs = datasets.get_dataset_config_names(dataset_id)
56
+ splits = list(
57
+ datasets.load_dataset(
58
+ dataset_id, configs[0] if not dataset_config else dataset_config
59
+ ).keys()
60
+ )
61
+ if dataset_config == None:
62
+ dataset_config = configs[0]
63
+ dataset_split = splits[0]
64
+ elif dataset_split == None:
65
+ dataset_split = splits[0]
66
+ except Exception as e:
67
  # Dataset may not exist
68
+ logger.warn(
69
+ f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
70
+ )
71
+ if dataset_config == None:
72
+ return (
73
+ gr.Dropdown(configs, value=configs[0], visible=True),
74
+ gr.Dropdown(splits, value=splits[0], visible=True),
75
+ gr.DataFrame(pd.DataFrame(), visible=False),
76
+ )
77
+ elif dataset_split == None:
78
+ return (
79
+ gr.Dropdown(configs, value=dataset_config, visible=True),
80
+ gr.Dropdown(splits, value=splits[0], visible=True),
81
+ gr.DataFrame(pd.DataFrame(), visible=False),
82
+ )
83
+
84
+ dataset_dict = datasets.load_dataset(dataset_id, dataset_config)
85
+ dataframe: pd.DataFrame = dataset_dict[dataset_split].to_pandas().head(5)
86
+ return (
87
+ gr.Dropdown(configs, value=dataset_config, visible=True),
88
+ gr.Dropdown(splits, value=dataset_split, visible=True),
89
+ gr.DataFrame(dataframe, visible=True),
90
+ )
91
 
92
 
93
  def select_run_mode(run_inf):