inoki-giskard ZeroCommand commited on
Commit
1c00552
1 Parent(s): 0444141

Fix feature mapping selection bug (#21)

Browse files

- fix feature mapping selection bug (3025552358963d437caf5108d98dfa6653caffa3)
- add warning component (104fec1752a8f6d84d7d0a27e1fa4d43f0669364)
- do not show when dataset has problem (0160f4b08cf662a83c76f2fce2cd5d377fa17429)
- Show check config warning (b4afb862a243e1dfc725d284d2d58d4d9e10a777)
- add leaderboard arg and inf token (5927800bda60c3d05cb37dc1637687888aa68008)
- fix inf token ui (ff775723f239f6ccc60834a2b43a5644614ae0ce)
- fix config file io bug (8a5336fb86880b4f03caafb2a6e78c34b7067f58)


Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

app.py CHANGED
@@ -1,14 +1,10 @@
1
  import atexit
2
- import threading
3
-
4
  import gradio as gr
5
 
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
  from run_jobs import start_process_run_job, stop_thread
9
 
10
- if threading.current_thread() is not threading.main_thread():
11
- t = threading.current_thread()
12
  try:
13
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
14
  with gr.Tab("Text Classification"):
@@ -22,6 +18,6 @@ try:
22
  demo.launch(share=False)
23
  atexit.register(stop_thread)
24
 
25
- except Exception:
26
- print("stop background thread")
27
  stop_thread()
 
1
  import atexit
 
 
2
  import gradio as gr
3
 
4
  from app_leaderboard import get_demo as get_demo_leaderboard
5
  from app_text_classification import get_demo as get_demo_text_classification
6
  from run_jobs import start_process_run_job, stop_thread
7
 
 
 
8
  try:
9
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
10
  with gr.Tab("Text Classification"):
 
18
  demo.launch(share=False)
19
  atexit.register(stop_thread)
20
 
21
+ except Exception as e:
22
+ print("stop background thread: ", e)
23
  stop_thread()
app_text_classification.py CHANGED
@@ -4,8 +4,8 @@ from io_utils import (
4
  read_scanners,
5
  write_scanners,
6
  read_inference_type,
7
- write_inference_type,
8
  get_logs_file,
 
9
  )
10
  from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
11
  from text_classification_ui_helpers import (
@@ -14,6 +14,8 @@ from text_classification_ui_helpers import (
14
  check_dataset_and_get_split,
15
  check_model_and_show_prediction,
16
  write_column_mapping_to_config,
 
 
17
  )
18
 
19
  MAX_LABELS = 20
@@ -70,6 +72,7 @@ def get_demo(demo):
70
  run_local = gr.Checkbox(value=True, label="Run in this Space")
71
  use_inference = read_inference_type(uid) == "hf_inference_api"
72
  run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
 
73
 
74
  with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
75
  selected = read_scanners(uid)
@@ -94,7 +97,8 @@ def get_demo(demo):
94
  demo.load(get_logs_file, uid_label, logs, every=0.5)
95
 
96
  dataset_id_input.change(
97
- check_dataset_and_get_config, inputs=[dataset_id_input, uid_label], outputs=[dataset_config_input]
 
98
  )
99
 
100
  dataset_config_input.change(
@@ -105,8 +109,20 @@ def get_demo(demo):
105
 
106
  scanners.change(write_scanners, inputs=[scanners, uid_label])
107
 
108
- run_inference.change(write_inference_type, inputs=[run_inference, uid_label])
109
-
 
 
 
 
 
 
 
 
 
 
 
 
110
  gr.on(
111
  triggers=[label.change for label in column_mappings],
112
  fn=write_column_mapping_to_config,
@@ -119,6 +135,19 @@ def get_demo(demo):
119
  ],
120
  )
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  gr.on(
123
  triggers=[
124
  model_id_input.change,
 
4
  read_scanners,
5
  write_scanners,
6
  read_inference_type,
 
7
  get_logs_file,
8
+ write_inference_type,
9
  )
10
  from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
11
  from text_classification_ui_helpers import (
 
14
  check_dataset_and_get_split,
15
  check_model_and_show_prediction,
16
  write_column_mapping_to_config,
17
+ select_run_mode,
18
+ deselect_run_inference,
19
  )
20
 
21
  MAX_LABELS = 20
 
72
  run_local = gr.Checkbox(value=True, label="Run in this Space")
73
  use_inference = read_inference_type(uid) == "hf_inference_api"
74
  run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
75
+ inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
76
 
77
  with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
78
  selected = read_scanners(uid)
 
97
  demo.load(get_logs_file, uid_label, logs, every=0.5)
98
 
99
  dataset_id_input.change(
100
+ check_dataset_and_get_config,
101
+ inputs=[dataset_id_input, uid_label], outputs=[dataset_config_input]
102
  )
103
 
104
  dataset_config_input.change(
 
109
 
110
  scanners.change(write_scanners, inputs=[scanners, uid_label])
111
 
112
+ run_inference.change(
113
+ select_run_mode,
114
+ inputs=[run_inference, inference_token, uid_label],
115
+ outputs=[inference_token, run_local])
116
+
117
+ run_local.change(
118
+ deselect_run_inference,
119
+ inputs=[run_local],
120
+ outputs=[inference_token, run_inference])
121
+
122
+ inference_token.change(
123
+ write_inference_type,
124
+ inputs=[run_inference, inference_token, uid_label])
125
+
126
  gr.on(
127
  triggers=[label.change for label in column_mappings],
128
  fn=write_column_mapping_to_config,
 
135
  ],
136
  )
137
 
138
+ # label.change sometimes does not pass the changed value
139
+ gr.on(
140
+ triggers=[label.input for label in column_mappings],
141
+ fn=write_column_mapping_to_config,
142
+ inputs=[
143
+ dataset_id_input,
144
+ dataset_config_input,
145
+ dataset_split_input,
146
+ uid_label,
147
+ *column_mappings,
148
+ ],
149
+ )
150
+
151
  gr.on(
152
  triggers=[
153
  model_id_input.change,
io_utils.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import subprocess
3
-
4
  import yaml
5
 
6
  import pipe
@@ -28,17 +28,21 @@ def read_scanners(uid):
28
  with open(get_yaml_path(uid), "r") as f:
29
  config = yaml.load(f, Loader=yaml.FullLoader)
30
  scanners = config.get("detectors", [])
 
31
  return scanners
32
 
33
 
34
  # convert a list of scanners to yaml file
35
  def write_scanners(scanners, uid):
36
- with open(get_yaml_path(uid), "r+") as f:
37
  config = yaml.load(f, Loader=yaml.FullLoader)
38
  if config:
39
  config["detectors"] = scanners
40
- # save scanners to detectors in yaml
41
- yaml.dump(config, f, Dumper=Dumper)
 
 
 
42
 
43
 
44
  # read model_type from yaml file
@@ -47,19 +51,25 @@ def read_inference_type(uid):
47
  with open(get_yaml_path(uid), "r") as f:
48
  config = yaml.load(f, Loader=yaml.FullLoader)
49
  inference_type = config.get("inference_type", "")
 
50
  return inference_type
51
 
52
 
53
  # write model_type to yaml file
54
- def write_inference_type(use_inference, uid):
55
- with open(get_yaml_path(uid), "r+") as f:
56
  config = yaml.load(f, Loader=yaml.FullLoader)
57
  if use_inference:
58
  config["inference_type"] = "hf_inference_api"
 
59
  else:
60
  config["inference_type"] = "hf_pipeline"
61
- # save inference_type to inference_type in yaml
 
 
62
  yaml.dump(config, f, Dumper=Dumper)
 
 
63
 
64
 
65
  # read column mapping from yaml file
@@ -69,6 +79,7 @@ def read_column_mapping(uid):
69
  config = yaml.load(f, Loader=yaml.FullLoader)
70
  if config:
71
  column_mapping = config.get("column_mapping", dict())
 
72
  return column_mapping
73
 
74
 
@@ -76,15 +87,18 @@ def read_column_mapping(uid):
76
  def write_column_mapping(mapping, uid):
77
  with open(get_yaml_path(uid), "r") as f:
78
  config = yaml.load(f, Loader=yaml.FullLoader)
 
 
79
  if config is None:
80
  return
81
  if mapping is None and "column_mapping" in config.keys():
82
  del config["column_mapping"]
83
  else:
84
  config["column_mapping"] = mapping
 
85
  with open(get_yaml_path(uid), "w") as f:
86
- # save column_mapping to column_mapping in yaml
87
  yaml.dump(config, f, Dumper=Dumper)
 
88
 
89
 
90
  # convert column mapping dataframe to json
@@ -107,6 +121,7 @@ def get_logs_file(uid):
107
  def write_log_to_user_file(id, log):
108
  with open(f"./tmp/{id}_log", "a") as f:
109
  f.write(log)
 
110
 
111
 
112
  def save_job_to_pipe(id, job, lock):
 
1
  import os
2
  import subprocess
3
+ import gradio as gr
4
  import yaml
5
 
6
  import pipe
 
28
  with open(get_yaml_path(uid), "r") as f:
29
  config = yaml.load(f, Loader=yaml.FullLoader)
30
  scanners = config.get("detectors", [])
31
+ f.close()
32
  return scanners
33
 
34
 
35
  # convert a list of scanners to yaml file
36
  def write_scanners(scanners, uid):
37
+ with open(get_yaml_path(uid), "r") as f:
38
  config = yaml.load(f, Loader=yaml.FullLoader)
39
  if config:
40
  config["detectors"] = scanners
41
+ f.close()
42
+ # save scanners to detectors in yaml
43
+ with open(get_yaml_path(uid), "w") as f:
44
+ yaml.dump(config, f, Dumper=Dumper)
45
+ f.close()
46
 
47
 
48
  # read model_type from yaml file
 
51
  with open(get_yaml_path(uid), "r") as f:
52
  config = yaml.load(f, Loader=yaml.FullLoader)
53
  inference_type = config.get("inference_type", "")
54
+ f.close()
55
  return inference_type
56
 
57
 
58
  # write model_type to yaml file
59
+ def write_inference_type(use_inference, inference_token, uid):
60
+ with open(get_yaml_path(uid), "r") as f:
61
  config = yaml.load(f, Loader=yaml.FullLoader)
62
  if use_inference:
63
  config["inference_type"] = "hf_inference_api"
64
+ config["inference_token"] = inference_token
65
  else:
66
  config["inference_type"] = "hf_pipeline"
67
+ f.close()
68
+ # save inference_type to inference_type in yaml
69
+ with open(get_yaml_path(uid), "w") as f:
70
  yaml.dump(config, f, Dumper=Dumper)
71
+ f.close()
72
+
73
 
74
 
75
  # read column mapping from yaml file
 
79
  config = yaml.load(f, Loader=yaml.FullLoader)
80
  if config:
81
  column_mapping = config.get("column_mapping", dict())
82
+ f.close()
83
  return column_mapping
84
 
85
 
 
87
  def write_column_mapping(mapping, uid):
88
  with open(get_yaml_path(uid), "r") as f:
89
  config = yaml.load(f, Loader=yaml.FullLoader)
90
+ f.close()
91
+
92
  if config is None:
93
  return
94
  if mapping is None and "column_mapping" in config.keys():
95
  del config["column_mapping"]
96
  else:
97
  config["column_mapping"] = mapping
98
+
99
  with open(get_yaml_path(uid), "w") as f:
 
100
  yaml.dump(config, f, Dumper=Dumper)
101
+ f.close()
102
 
103
 
104
  # convert column mapping dataframe to json
 
121
  def write_log_to_user_file(id, log):
122
  with open(f"./tmp/{id}_log", "a") as f:
123
  f.write(log)
124
+ f.close()
125
 
126
 
127
  def save_job_to_pipe(id, job, lock):
run_jobs.py CHANGED
@@ -1,6 +1,6 @@
1
  import threading
2
  import time
3
-
4
  import pipe
5
  from io_utils import pop_job_from_pipe
6
 
@@ -9,7 +9,7 @@ is_running = False
9
 
10
  def start_process_run_job():
11
  try:
12
- print("Running jobs in thread")
13
  global thread, is_running
14
  thread = threading.Thread(target=run_job)
15
  thread.daemon = True
@@ -22,7 +22,7 @@ def start_process_run_job():
22
 
23
 
24
  def stop_thread():
25
- print("Stop thread")
26
  global is_running
27
  is_running = False
28
 
@@ -34,6 +34,6 @@ def run_job():
34
  pop_job_from_pipe()
35
  time.sleep(10)
36
  except KeyboardInterrupt:
37
- print("KeyboardInterrupt stop background thread")
38
  is_running = False
39
  break
 
1
  import threading
2
  import time
3
+ import logging
4
  import pipe
5
  from io_utils import pop_job_from_pipe
6
 
 
9
 
10
  def start_process_run_job():
11
  try:
12
+ logging.debug("Running jobs in thread")
13
  global thread, is_running
14
  thread = threading.Thread(target=run_job)
15
  thread.daemon = True
 
22
 
23
 
24
  def stop_thread():
25
+ logging.debug("Stop thread")
26
  global is_running
27
  is_running = False
28
 
 
34
  pop_job_from_pipe()
35
  time.sleep(10)
36
  except KeyboardInterrupt:
37
+ logging.debug("KeyboardInterrupt stop background thread")
38
  is_running = False
39
  break
text_classification_ui_helpers.py CHANGED
@@ -9,10 +9,11 @@ import gradio as gr
9
  from transformers.pipelines import TextClassificationPipeline
10
 
11
  from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
12
- write_column_mapping, write_log_to_user_file)
 
13
  from text_classification import (check_model, get_example_prediction,
14
  get_labels_and_features_from_dataset)
15
- from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW
16
 
17
  MAX_LABELS = 20
18
  MAX_FEATURES = 20
@@ -24,7 +25,7 @@ HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
24
 
25
  def check_dataset_and_get_config(dataset_id, uid):
26
  try:
27
- write_column_mapping(None, uid) # reset column mapping
28
  configs = datasets.get_dataset_config_names(dataset_id)
29
  return gr.Dropdown(configs, value=configs[0], visible=True)
30
  except Exception:
@@ -41,7 +42,30 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
41
  # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
42
  pass
43
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def write_column_mapping_to_config(
46
  dataset_id, dataset_config, dataset_split, uid, *labels
47
  ):
@@ -52,20 +76,17 @@ def write_column_mapping_to_config(
52
  )
53
  if labels is None:
54
  return
55
- labels = [*labels]
56
- all_mappings = read_column_mapping(uid)
57
 
58
- if all_mappings is None:
59
- all_mappings = dict()
60
 
61
  if "labels" not in all_mappings.keys():
62
  all_mappings["labels"] = dict()
63
  for i, label in enumerate(labels[:MAX_LABELS]):
64
  if label:
65
- all_mappings["labels"][label] = ds_labels[i]
66
  if "features" not in all_mappings.keys():
67
  all_mappings["features"] = dict()
68
- for i, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
69
  if feat:
70
  # TODO: Substitute 'text' with more features for zero-shot
71
  all_mappings["features"]["text"] = feat
@@ -134,7 +155,7 @@ def check_model_and_show_prediction(
134
 
135
  # when dataset does not have labels or features
136
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
137
- # gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
138
  return (
139
  gr.update(visible=False),
140
  gr.update(visible=False),
@@ -154,9 +175,8 @@ def check_model_and_show_prediction(
154
  collections.Counter(model_id2label.values()) != collections.Counter(ds_labels)
155
  or ds_features[0] != "text"
156
  ):
157
- gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
158
  return (
159
- gr.update(visible=False),
160
  gr.update(visible=False),
161
  gr.update(visible=True, open=True),
162
  *column_mappings,
@@ -192,6 +212,10 @@ def try_submit(m_id, d_id, config, split, local, uid):
192
  return (gr.update(interactive=True), gr.update(visible=False))
193
  feature_mapping = all_mappings["features"]
194
 
 
 
 
 
195
  # TODO: Set column mapping for some dataset such as `amazon_polarity`
196
  if local:
197
  command = [
@@ -220,6 +244,8 @@ def try_submit(m_id, d_id, config, split, local, uid):
220
  json.dumps(label_mapping),
221
  "--scan_config",
222
  get_yaml_path(uid),
 
 
223
  ]
224
 
225
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
 
9
  from transformers.pipelines import TextClassificationPipeline
10
 
11
  from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
12
+ write_column_mapping, write_log_to_user_file,
13
+ write_inference_type)
14
  from text_classification import (check_model, get_example_prediction,
15
  get_labels_and_features_from_dataset)
16
+ from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW, MAPPING_STYLED_ERROR_WARNING, CHECK_CONFIG_OR_SPLIT_RAW
17
 
18
  MAX_LABELS = 20
19
  MAX_FEATURES = 20
 
25
 
26
  def check_dataset_and_get_config(dataset_id, uid):
27
  try:
28
+ # write_column_mapping(None, uid) # reset column mapping
29
  configs = datasets.get_dataset_config_names(dataset_id)
30
  return gr.Dropdown(configs, value=configs[0], visible=True)
31
  except Exception:
 
42
  # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
43
  pass
44
 
45
+ def select_run_mode(run_inf, inf_token, uid):
46
+ if run_inf:
47
+ if len(inf_token) > 0:
48
+ write_inference_type(run_inf, inf_token, uid)
49
+ return (
50
+ gr.update(visible=True),
51
+ gr.update(value=False))
52
+ else:
53
+ return (
54
+ gr.update(visible=False),
55
+ gr.update(value=True))
56
 
57
+ def deselect_run_inference(run_local):
58
+ if run_local:
59
+ return (
60
+ gr.update(visible=False),
61
+ gr.update(value=False)
62
+ )
63
+ else:
64
+ return (
65
+ gr.update(visible=True),
66
+ gr.update(value=True)
67
+ )
68
+
69
  def write_column_mapping_to_config(
70
  dataset_id, dataset_config, dataset_split, uid, *labels
71
  ):
 
76
  )
77
  if labels is None:
78
  return
 
 
79
 
80
+ all_mappings = dict()
 
81
 
82
  if "labels" not in all_mappings.keys():
83
  all_mappings["labels"] = dict()
84
  for i, label in enumerate(labels[:MAX_LABELS]):
85
  if label:
86
+ all_mappings["labels"][label] = ds_labels[i%len(ds_labels)]
87
  if "features" not in all_mappings.keys():
88
  all_mappings["features"] = dict()
89
+ for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
90
  if feat:
91
  # TODO: Substitute 'text' with more features for zero-shot
92
  all_mappings["features"]["text"] = feat
 
155
 
156
  # when dataset does not have labels or features
157
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
158
+ gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
159
  return (
160
  gr.update(visible=False),
161
  gr.update(visible=False),
 
175
  collections.Counter(model_id2label.values()) != collections.Counter(ds_labels)
176
  or ds_features[0] != "text"
177
  ):
 
178
  return (
179
+ gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
180
  gr.update(visible=False),
181
  gr.update(visible=True, open=True),
182
  *column_mappings,
 
212
  return (gr.update(interactive=True), gr.update(visible=False))
213
  feature_mapping = all_mappings["features"]
214
 
215
+ leaderboard_dataset = None
216
+ if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
217
+ leaderboard_dataset = "ZeroCommand/test-giskard-report"
218
+
219
  # TODO: Set column mapping for some dataset such as `amazon_polarity`
220
  if local:
221
  command = [
 
244
  json.dumps(label_mapping),
245
  "--scan_config",
246
  get_yaml_path(uid),
247
+ "--leaderboard_dataset",
248
+ leaderboard_dataset,
249
  ]
250
 
251
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
wordings.py CHANGED
@@ -21,9 +21,19 @@ CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
  Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
22
  """
23
 
 
 
 
 
24
  PREDICTION_SAMPLE_MD = """
25
  <h1 style="text-align: center;">
26
  Model Prediction Sample
27
  </h1>
28
  Here is a sample prediction from your model based on your dataset.
29
  """
 
 
 
 
 
 
 
21
  Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
22
  """
23
 
24
+ CHECK_CONFIG_OR_SPLIT_RAW = """
25
+ Please check your dataset config or split.
26
+ """
27
+
28
  PREDICTION_SAMPLE_MD = """
29
  <h1 style="text-align: center;">
30
  Model Prediction Sample
31
  </h1>
32
  Here is a sample prediction from your model based on your dataset.
33
  """
34
+
35
+ MAPPING_STYLED_ERROR_WARNING = """
36
+ <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
37
+ Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
38
+ </h3>
39
+ """