Spaces:

ZeroCommand
/

test-giskard-evaluator

Sleeping

App Files Files Community

ZeroCommand commited on Feb 8, 2024

Commit

87119af

verified ·

1 Parent(s): c49d60b

update hf token validate (#7)

Browse files

- add check validity for hf token (ebc2961c6050ab25741c4380f556d90b156c8ec9)

Files changed (6) hide show

app_text_classification.py +28 -3
fetch_utils.py +5 -4
temp_log +0 -1
text_classification.py +15 -4
text_classification_ui_helpers.py +7 -4
wordings.py +20 -11

app_text_classification.py CHANGED Viewed

@@ -11,7 +11,19 @@ from text_classification_ui_helpers import (
     try_submit,
     write_column_mapping_to_config,
 )
-from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD, USE_INFERENCE_API_TIP, CHECK_LOG_SECTION_RAW
 MAX_LABELS = 40
 MAX_FEATURES = 20
@@ -28,7 +40,7 @@ def get_demo():
         )
     with gr.Row():
         model_id_input = gr.Textbox(
-            label="Hugging Face Model id",
             placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
         )
@@ -89,6 +101,13 @@ def get_demo():
             visible=True,
             interactive=True,
         )
     with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
         scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
@@ -96,7 +115,7 @@ def get_demo():
         @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
         def get_scanners(uid):
             selected = read_scanners(uid)
-            # currently we remove data_leakage from the default scanners
             # Reason: data_leakage barely raises any issues and takes too many requests
             # when using inference API, causing rate limit error
             scan_config = selected + ["data_leakage"]
@@ -225,6 +244,12 @@ def get_demo():
             return gr.update(interactive=False)
         if not column_mapping_accordion.visible:
             return gr.update(interactive=False)
         return gr.update(interactive=True)
     gr.on(

     try_submit,
     write_column_mapping_to_config,
 )
+from text_classification import (
+  get_example_prediction,
+  check_hf_token_validity,
+  HuggingFaceInferenceAPIResponse
+)
+from wordings import (
+  CONFIRM_MAPPING_DETAILS_MD,
+  INTRODUCTION_MD,
+  USE_INFERENCE_API_TIP,
+  CHECK_LOG_SECTION_RAW,
+  HF_TOKEN_INVALID_STYLED
+)
 MAX_LABELS = 40
 MAX_FEATURES = 20
         )
     with gr.Row():
         model_id_input = gr.Textbox(
+            label="Hugging Face model id",
             placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
         )
             visible=True,
             interactive=True,
         )
+        inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
+        inference_token.change(
+            lambda token: gr.update(visible=lambda: check_hf_token_validity(token)),
+            inputs=[inference_token],
+            outputs=[inference_token_info],
+        )
     with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
         scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
         @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
         def get_scanners(uid):
             selected = read_scanners(uid)
+            # we remove data_leakage from the default scanners
             # Reason: data_leakage barely raises any issues and takes too many requests
             # when using inference API, causing rate limit error
             scan_config = selected + ["data_leakage"]
             return gr.update(interactive=False)
         if not column_mapping_accordion.visible:
             return gr.update(interactive=False)
+        _, prediction_response = get_example_prediction(
+            model_id, dataset_id, dataset_config, dataset_split, inference_token
+        )
+        if not isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
+            gr.warning("Your HF token is invalid. Please check your token.")
+            return gr.update(interactive=False)
         return gr.update(interactive=True)
     gr.on(

fetch_utils.py CHANGED Viewed

@@ -14,18 +14,19 @@ def check_dataset_and_get_config(dataset_id):
 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
-        splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
     except Exception as e:
         # Dataset may not exist
         logging.warning(
             f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
         )
         return None
-    if isinstance(splits, list):
         return splits
-    else:
         # Dataset has no splits
         logging.warning(
-            f"Dataset {dataset_id} with config {dataset_config} has no splits"
         )
         return None

 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
+        ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
     except Exception as e:
         # Dataset may not exist
         logging.warning(
             f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
         )
         return None
+    try:
+        splits = list(ds.keys())
         return splits
+    except Exception as e:
         # Dataset has no splits
         logging.warning(
+            f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
         )
         return None

temp_log DELETED Viewed

	@@ -1 +0,0 @@
1	- ./tmp/53513338-9dfa-4f6e-bea4-63857a9d93a6.log

text_classification.py CHANGED Viewed

@@ -272,7 +272,7 @@ def select_the_first_string_column(ds):
     return None
-def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
     # get a sample prediction from the model on the dataset
     prediction_input = None
     prediction_result = None
@@ -284,8 +284,7 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
             prediction_input = ds[0][select_the_first_string_column(ds)]
         else:
             prediction_input = ds[0]["text"]
-        hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
         payload = {"inputs": prediction_input, "options": {"use_cache": True}}
         results = hf_inference_api(model_id, hf_token, payload)
@@ -381,4 +380,16 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
 def strip_model_id_from_url(model_id):
     if model_id.startswith("https://huggingface.co/"):
         return "/".join(model_id.split("/")[-2])
-    return model_id

     return None
+def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split, hf_token):
     # get a sample prediction from the model on the dataset
     prediction_input = None
     prediction_result = None
             prediction_input = ds[0][select_the_first_string_column(ds)]
         else:
             prediction_input = ds[0]["text"]
         payload = {"inputs": prediction_input, "options": {"use_cache": True}}
         results = hf_inference_api(model_id, hf_token, payload)
 def strip_model_id_from_url(model_id):
     if model_id.startswith("https://huggingface.co/"):
         return "/".join(model_id.split("/")[-2])
+    return model_id
+def check_hf_token_validity(hf_token):
+    if hf_token == "":
+        return False
+    if not isinstance(hf_token, str):
+        return False
+    # use inference api to check the token
+    payload = {"inputs": "This is a test", "options": {"use_cache": True}}
+    response = hf_inference_api("cardiffnlp/twitter-roberta-base-sentiment-latest", hf_token, payload)
+    if "error" in response:
+        return False
+    return True

text_classification_ui_helpers.py CHANGED Viewed

@@ -27,6 +27,7 @@ from wordings import (
     CHECK_LOG_SECTION_RAW,
     get_styled_input,
 )
 MAX_LABELS = 40
 MAX_FEATURES = 20
@@ -59,7 +60,9 @@ def check_dataset(dataset_id):
                 gr.update(),
                 ""
             )
-        splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
         return (
             gr.update(choices=configs, value=configs[0], visible=True),
             gr.update(choices=splits, value=splits[0], visible=True),
@@ -212,9 +215,11 @@ def align_columns_and_show_prediction(
     dropdown_placement = [
         gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
     ]
     prediction_input, prediction_response = get_example_prediction(
-        model_id, dataset_id, dataset_config, dataset_split
     )
     if prediction_input is None or prediction_response is None:
@@ -255,7 +260,6 @@ def align_columns_and_show_prediction(
         )
     if len(ds_labels) != len(model_labels):
-        # gr.Warning(UNMATCHED_MODEL_DATASET)
         return (
             gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
             gr.update(visible=False),
@@ -324,7 +328,6 @@ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
     feature_mapping = all_mappings["features"]
     return label_mapping, feature_mapping
 def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     check_column_mapping_keys_validity(all_mappings)

     CHECK_LOG_SECTION_RAW,
     get_styled_input,
 )
+import os
 MAX_LABELS = 40
 MAX_FEATURES = 20
                 gr.update(),
                 ""
             )
+        splits = datasets.get_dataset_split_names(
+                        dataset_id, configs[0], trust_remote_code=True
+                    )
         return (
             gr.update(choices=configs, value=configs[0], visible=True),
             gr.update(choices=splits, value=splits[0], visible=True),
     dropdown_placement = [
         gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
     ]
+    hf_token = os.environ.get("HF_WRITE_TOKEN", default="")
     prediction_input, prediction_response = get_example_prediction(
+        model_id, dataset_id, dataset_config, dataset_split, hf_token
     )
     if prediction_input is None or prediction_response is None:
         )
     if len(ds_labels) != len(model_labels):
         return (
             gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
             gr.update(visible=False),
     feature_mapping = all_mappings["features"]
     return label_mapping, feature_mapping
 def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     check_column_mapping_keys_validity(all_mappings)

wordings.py CHANGED Viewed

@@ -2,7 +2,7 @@ INTRODUCTION_MD = """
                 <h1 style="text-align: center;">
                 🐢Giskard Evaluator
                 </h1>
-                Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
                 """
 CONFIRM_MAPPING_DETAILS_MD = """
                             <h1 style="text-align: center;">
@@ -14,11 +14,7 @@ CONFIRM_MAPPING_DETAILS_FAIL_MD = """
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
-                            The format of your dataset with the model does not match. <b>Pleaser double check your model and dataset.</b>
-                            """
-UNMATCHED_MODEL_DATASET = """
-                            Model prediction labels do not align with the labels present in the dataset. Please double check your model and dataset.
                             """
 CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
@@ -30,7 +26,7 @@ CHECK_CONFIG_OR_SPLIT_RAW = """
                             """
 CHECK_LOG_SECTION_RAW = """
-                          Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab, providing information on your queue status and the current job log.
                         """
 PREDICTION_SAMPLE_MD = """
@@ -42,7 +38,7 @@ PREDICTION_SAMPLE_MD = """
 MAPPING_STYLED_ERROR_WARNING = """
                         <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
-                        We cannot auto-align the labels/features of your dataset and model. Please double check the info below and select correct mapping before submission.
                         </h3>
                         """
@@ -57,18 +53,31 @@ NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
                       """
 USE_INFERENCE_API_TIP = """
-                We are using
                 <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
                     Hugging Face Inference API
                 </a>
                 for the evaluation,
                 which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
                 <br/>
-                Your HF token is only used in this Space for your evaluation.
                 </b>
             """
 def get_styled_input(input):
     return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
-            Sample input: {input}
             </h3>"""

                 <h1 style="text-align: center;">
                 🐢Giskard Evaluator
                 </h1>
+                Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model.
                 """
 CONFIRM_MAPPING_DETAILS_MD = """
                             <h1 style="text-align: center;">
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
+                            Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
                             """
 CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
                             """
 CHECK_LOG_SECTION_RAW = """
+                          Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab. You can find your report will be posted to your model's community discussion.
                         """
 PREDICTION_SAMPLE_MD = """
 MAPPING_STYLED_ERROR_WARNING = """
                         <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
+                        Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
                         </h3>
                         """
                       """
 USE_INFERENCE_API_TIP = """
+                We recommend to use
                 <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
                     Hugging Face Inference API
                 </a>
                 for the evaluation,
                 which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
                 <br/>
+                Otherwise, an
+                <a href="https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.TextClassificationPipeline">
+                    HF pipeline
+                </a>
+                will be created and run in this Space. It takes more time to get the result.
+                <br/>
+                <b>
+                Do not worry, your HF token is only used in this Space for your evaluation.
                 </b>
             """
+HF_TOKEN_INVALID_STYLED= """
+                <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
+                Your Hugging Face token is invalid. Please double check your token.
+                </h3>
+                """
 def get_styled_input(input):
     return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
+            Your model and dataset have been validated! <br /> Sample input: {input}
             </h3>"""