ZeroCommand commited on
Commit
87119af
·
verified ·
1 Parent(s): c49d60b

update hf token validate (#7)

Browse files

- add check validity for hf token (ebc2961c6050ab25741c4380f556d90b156c8ec9)

app_text_classification.py CHANGED
@@ -11,7 +11,19 @@ from text_classification_ui_helpers import (
11
  try_submit,
12
  write_column_mapping_to_config,
13
  )
14
- from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD, USE_INFERENCE_API_TIP, CHECK_LOG_SECTION_RAW
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  MAX_LABELS = 40
17
  MAX_FEATURES = 20
@@ -28,7 +40,7 @@ def get_demo():
28
  )
29
  with gr.Row():
30
  model_id_input = gr.Textbox(
31
- label="Hugging Face Model id",
32
  placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
33
  )
34
 
@@ -89,6 +101,13 @@ def get_demo():
89
  visible=True,
90
  interactive=True,
91
  )
 
 
 
 
 
 
 
92
 
93
  with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
94
  scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
@@ -96,7 +115,7 @@ def get_demo():
96
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
97
  def get_scanners(uid):
98
  selected = read_scanners(uid)
99
- # currently we remove data_leakage from the default scanners
100
  # Reason: data_leakage barely raises any issues and takes too many requests
101
  # when using inference API, causing rate limit error
102
  scan_config = selected + ["data_leakage"]
@@ -225,6 +244,12 @@ def get_demo():
225
  return gr.update(interactive=False)
226
  if not column_mapping_accordion.visible:
227
  return gr.update(interactive=False)
 
 
 
 
 
 
228
  return gr.update(interactive=True)
229
 
230
  gr.on(
 
11
  try_submit,
12
  write_column_mapping_to_config,
13
  )
14
+
15
+ from text_classification import (
16
+ get_example_prediction,
17
+ check_hf_token_validity,
18
+ HuggingFaceInferenceAPIResponse
19
+ )
20
+ from wordings import (
21
+ CONFIRM_MAPPING_DETAILS_MD,
22
+ INTRODUCTION_MD,
23
+ USE_INFERENCE_API_TIP,
24
+ CHECK_LOG_SECTION_RAW,
25
+ HF_TOKEN_INVALID_STYLED
26
+ )
27
 
28
  MAX_LABELS = 40
29
  MAX_FEATURES = 20
 
40
  )
41
  with gr.Row():
42
  model_id_input = gr.Textbox(
43
+ label="Hugging Face model id",
44
  placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
45
  )
46
 
 
101
  visible=True,
102
  interactive=True,
103
  )
104
+ inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
105
+
106
+ inference_token.change(
107
+ lambda token: gr.update(visible=lambda: check_hf_token_validity(token)),
108
+ inputs=[inference_token],
109
+ outputs=[inference_token_info],
110
+ )
111
 
112
  with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
113
  scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
 
115
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
116
  def get_scanners(uid):
117
  selected = read_scanners(uid)
118
+ # we remove data_leakage from the default scanners
119
  # Reason: data_leakage barely raises any issues and takes too many requests
120
  # when using inference API, causing rate limit error
121
  scan_config = selected + ["data_leakage"]
 
244
  return gr.update(interactive=False)
245
  if not column_mapping_accordion.visible:
246
  return gr.update(interactive=False)
247
+ _, prediction_response = get_example_prediction(
248
+ model_id, dataset_id, dataset_config, dataset_split, inference_token
249
+ )
250
+ if not isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
251
+ gr.warning("Your HF token is invalid. Please check your token.")
252
+ return gr.update(interactive=False)
253
  return gr.update(interactive=True)
254
 
255
  gr.on(
fetch_utils.py CHANGED
@@ -14,18 +14,19 @@ def check_dataset_and_get_config(dataset_id):
14
 
15
  def check_dataset_and_get_split(dataset_id, dataset_config):
16
  try:
17
- splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
18
  except Exception as e:
19
  # Dataset may not exist
20
  logging.warning(
21
  f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
22
  )
23
  return None
24
- if isinstance(splits, list):
 
25
  return splits
26
- else:
27
  # Dataset has no splits
28
  logging.warning(
29
- f"Dataset {dataset_id} with config {dataset_config} has no splits"
30
  )
31
  return None
 
14
 
15
  def check_dataset_and_get_split(dataset_id, dataset_config):
16
  try:
17
+ ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
18
  except Exception as e:
19
  # Dataset may not exist
20
  logging.warning(
21
  f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
22
  )
23
  return None
24
+ try:
25
+ splits = list(ds.keys())
26
  return splits
27
+ except Exception as e:
28
  # Dataset has no splits
29
  logging.warning(
30
+ f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
31
  )
32
  return None
temp_log DELETED
@@ -1 +0,0 @@
1
- ./tmp/53513338-9dfa-4f6e-bea4-63857a9d93a6.log
 
 
text_classification.py CHANGED
@@ -272,7 +272,7 @@ def select_the_first_string_column(ds):
272
  return None
273
 
274
 
275
- def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
276
  # get a sample prediction from the model on the dataset
277
  prediction_input = None
278
  prediction_result = None
@@ -284,8 +284,7 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
284
  prediction_input = ds[0][select_the_first_string_column(ds)]
285
  else:
286
  prediction_input = ds[0]["text"]
287
-
288
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
289
  payload = {"inputs": prediction_input, "options": {"use_cache": True}}
290
  results = hf_inference_api(model_id, hf_token, payload)
291
 
@@ -381,4 +380,16 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
381
  def strip_model_id_from_url(model_id):
382
  if model_id.startswith("https://huggingface.co/"):
383
  return "/".join(model_id.split("/")[-2])
384
- return model_id
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  return None
273
 
274
 
275
+ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split, hf_token):
276
  # get a sample prediction from the model on the dataset
277
  prediction_input = None
278
  prediction_result = None
 
284
  prediction_input = ds[0][select_the_first_string_column(ds)]
285
  else:
286
  prediction_input = ds[0]["text"]
287
+
 
288
  payload = {"inputs": prediction_input, "options": {"use_cache": True}}
289
  results = hf_inference_api(model_id, hf_token, payload)
290
 
 
380
  def strip_model_id_from_url(model_id):
381
  if model_id.startswith("https://huggingface.co/"):
382
  return "/".join(model_id.split("/")[-2])
383
+ return model_id
384
+
385
+ def check_hf_token_validity(hf_token):
386
+ if hf_token == "":
387
+ return False
388
+ if not isinstance(hf_token, str):
389
+ return False
390
+ # use inference api to check the token
391
+ payload = {"inputs": "This is a test", "options": {"use_cache": True}}
392
+ response = hf_inference_api("cardiffnlp/twitter-roberta-base-sentiment-latest", hf_token, payload)
393
+ if "error" in response:
394
+ return False
395
+ return True
text_classification_ui_helpers.py CHANGED
@@ -27,6 +27,7 @@ from wordings import (
27
  CHECK_LOG_SECTION_RAW,
28
  get_styled_input,
29
  )
 
30
 
31
  MAX_LABELS = 40
32
  MAX_FEATURES = 20
@@ -59,7 +60,9 @@ def check_dataset(dataset_id):
59
  gr.update(),
60
  ""
61
  )
62
- splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
 
 
63
  return (
64
  gr.update(choices=configs, value=configs[0], visible=True),
65
  gr.update(choices=splits, value=splits[0], visible=True),
@@ -212,9 +215,11 @@ def align_columns_and_show_prediction(
212
  dropdown_placement = [
213
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
214
  ]
 
 
215
 
216
  prediction_input, prediction_response = get_example_prediction(
217
- model_id, dataset_id, dataset_config, dataset_split
218
  )
219
 
220
  if prediction_input is None or prediction_response is None:
@@ -255,7 +260,6 @@ def align_columns_and_show_prediction(
255
  )
256
 
257
  if len(ds_labels) != len(model_labels):
258
- # gr.Warning(UNMATCHED_MODEL_DATASET)
259
  return (
260
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
261
  gr.update(visible=False),
@@ -324,7 +328,6 @@ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
324
  feature_mapping = all_mappings["features"]
325
  return label_mapping, feature_mapping
326
 
327
-
328
  def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
329
  all_mappings = read_column_mapping(uid)
330
  check_column_mapping_keys_validity(all_mappings)
 
27
  CHECK_LOG_SECTION_RAW,
28
  get_styled_input,
29
  )
30
+ import os
31
 
32
  MAX_LABELS = 40
33
  MAX_FEATURES = 20
 
60
  gr.update(),
61
  ""
62
  )
63
+ splits = datasets.get_dataset_split_names(
64
+ dataset_id, configs[0], trust_remote_code=True
65
+ )
66
  return (
67
  gr.update(choices=configs, value=configs[0], visible=True),
68
  gr.update(choices=splits, value=splits[0], visible=True),
 
215
  dropdown_placement = [
216
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
217
  ]
218
+
219
+ hf_token = os.environ.get("HF_WRITE_TOKEN", default="")
220
 
221
  prediction_input, prediction_response = get_example_prediction(
222
+ model_id, dataset_id, dataset_config, dataset_split, hf_token
223
  )
224
 
225
  if prediction_input is None or prediction_response is None:
 
260
  )
261
 
262
  if len(ds_labels) != len(model_labels):
 
263
  return (
264
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
265
  gr.update(visible=False),
 
328
  feature_mapping = all_mappings["features"]
329
  return label_mapping, feature_mapping
330
 
 
331
  def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
332
  all_mappings = read_column_mapping(uid)
333
  check_column_mapping_keys_validity(all_mappings)
wordings.py CHANGED
@@ -2,7 +2,7 @@ INTRODUCTION_MD = """
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator
4
  </h1>
5
- Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
@@ -14,11 +14,7 @@ CONFIRM_MAPPING_DETAILS_FAIL_MD = """
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details
16
  </h1>
17
- The format of your dataset with the model does not match. <b>Pleaser double check your model and dataset.</b>
18
- """
19
-
20
- UNMATCHED_MODEL_DATASET = """
21
- Model prediction labels do not align with the labels present in the dataset. Please double check your model and dataset.
22
  """
23
 
24
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
@@ -30,7 +26,7 @@ CHECK_CONFIG_OR_SPLIT_RAW = """
30
  """
31
 
32
  CHECK_LOG_SECTION_RAW = """
33
- Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab, providing information on your queue status and the current job log.
34
  """
35
 
36
  PREDICTION_SAMPLE_MD = """
@@ -42,7 +38,7 @@ PREDICTION_SAMPLE_MD = """
42
 
43
  MAPPING_STYLED_ERROR_WARNING = """
44
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
45
- We cannot auto-align the labels/features of your dataset and model. Please double check the info below and select correct mapping before submission.
46
  </h3>
47
  """
48
 
@@ -57,18 +53,31 @@ NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
57
  """
58
 
59
  USE_INFERENCE_API_TIP = """
60
- We are using
61
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
62
  Hugging Face Inference API
63
  </a>
64
  for the evaluation,
65
  which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
66
  <br/>
67
- Your HF token is only used in this Space for your evaluation.
 
 
 
 
 
 
 
68
  </b>
69
  """
70
 
 
 
 
 
 
 
71
  def get_styled_input(input):
72
  return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
73
- Sample input: {input}
74
  </h3>"""
 
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator
4
  </h1>
5
+ Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model.
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
 
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details
16
  </h1>
17
+ Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
 
 
 
 
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
 
26
  """
27
 
28
  CHECK_LOG_SECTION_RAW = """
29
+ Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab. You can find your report will be posted to your model's community discussion.
30
  """
31
 
32
  PREDICTION_SAMPLE_MD = """
 
38
 
39
  MAPPING_STYLED_ERROR_WARNING = """
40
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
41
+ Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
42
  </h3>
43
  """
44
 
 
53
  """
54
 
55
  USE_INFERENCE_API_TIP = """
56
+ We recommend to use
57
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
58
  Hugging Face Inference API
59
  </a>
60
  for the evaluation,
61
  which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
62
  <br/>
63
+ Otherwise, an
64
+ <a href="https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.TextClassificationPipeline">
65
+ HF pipeline
66
+ </a>
67
+ will be created and run in this Space. It takes more time to get the result.
68
+ <br/>
69
+ <b>
70
+ Do not worry, your HF token is only used in this Space for your evaluation.
71
  </b>
72
  """
73
 
74
+ HF_TOKEN_INVALID_STYLED= """
75
+ <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
76
+ Your Hugging Face token is invalid. Please double check your token.
77
+ </h3>
78
+ """
79
+
80
  def get_styled_input(input):
81
  return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
82
+ Your model and dataset have been validated! <br /> Sample input: {input}
83
  </h3>"""