giskard-evaluator

Running

App Files Files Community

199

ZeroCommand commited on Jan 22

Commit

201d156

•

1 Parent(s): d9ca844

handle inference api error; fix not text dataset columns

Browse files

Files changed (3) hide show

app_text_classification.py +5 -1
text_classification.py +29 -17
text_classification_ui_helpers.py +27 -7

app_text_classification.py CHANGED Viewed

@@ -128,7 +128,11 @@ def get_demo():
         fn=get_related_datasets_from_leaderboard,
         inputs=[model_id_input],
         outputs=[dataset_id_input],
-    ).then(fn=check_dataset, inputs=[dataset_id_input], outputs=[dataset_config_input, dataset_split_input, loading_status])
     gr.on(
         triggers=[dataset_id_input.input],

         fn=get_related_datasets_from_leaderboard,
         inputs=[model_id_input],
         outputs=[dataset_id_input],
+    ).then(
+        fn=check_dataset,
+        inputs=[dataset_id_input],
+        outputs=[dataset_config_input, dataset_split_input, loading_status]
+    )
     gr.on(
         triggers=[dataset_id_input.input],

text_classification.py CHANGED Viewed

@@ -9,6 +9,7 @@ import requests
 import os
 import time
 HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
 logger = logging.getLogger(__file__)
@@ -76,19 +77,18 @@ def hf_inference_api(model_id, hf_token, payload):
     )
     url = f"{hf_inference_api_endpoint}/models/{model_id}"
     headers = {"Authorization": f"Bearer {hf_token}"}
-    output = {"error": "First attemp"}
-    attempt = 30
-    while "error" in output and attempt > 0:
-        response = requests.post(url, headers=headers, json=payload)
-        if response.status_code != 200:
-            logging.error(f"Request to inference API returns {response.status_code}")
-        try:
-            return response.json()
-        except Exception:
-            logging.error(f"{response.content}")
-            output = {"error": response.content}
-        attempt -= 1
-        time.sleep(2)
 def check_model_pipeline(model_id):
     try:
@@ -262,6 +262,12 @@ def check_dataset_features_validity(d_id, config, split):
     return df, dataset_features
 def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
     # get a sample prediction from the model on the dataset
@@ -272,13 +278,20 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
         ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
         if "text" not in ds.features.keys():
             # Dataset does not have text column
-            prediction_input = ds[0][list(ds.features.keys())[0]]
         else:
             prediction_input = ds[0]["text"]
         hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
         payload = {"inputs": prediction_input, "options": {"use_cache": True}}
         results = hf_inference_api(model_id, hf_token, payload)
         while isinstance(results, list):
             if isinstance(results[0], dict):
                 break
@@ -288,8 +301,7 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
         }
     except Exception as e:
         # Pipeline prediction failed, need to provide labels
-        logger.warn(f"Pipeline prediction failed due to {e}")
-        return prediction_input, None
     return prediction_input, prediction_result

 import os
 import time
+logger = logging.getLogger(__name__)
 HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
 logger = logging.getLogger(__file__)
     )
     url = f"{hf_inference_api_endpoint}/models/{model_id}"
     headers = {"Authorization": f"Bearer {hf_token}"}
+    response = requests.post(url, headers=headers, json=payload)
+    if not hasattr(response, "status_code") or response.status_code != 200:
+        logger.warning(f"Request to inference API returns {response}")
+    try:
+        return response.json()
+    except Exception:
+        return {"error": response.content}
+def preload_hf_inference_api(model_id):
+    payload = {"inputs": "This is a test", "options": {"use_cache": True, }}
+    hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
+    hf_inference_api(model_id, hf_token, payload)
 def check_model_pipeline(model_id):
     try:
     return df, dataset_features
+def select_the_first_string_column(ds):
+    for feature in ds.features.keys():
+        if isinstance(ds[0][feature], str):
+            return feature
+    return None
 def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
     # get a sample prediction from the model on the dataset
         ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
         if "text" not in ds.features.keys():
             # Dataset does not have text column
+            prediction_input = ds[0][select_the_first_string_column(ds)]
         else:
             prediction_input = ds[0]["text"]
         hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
         payload = {"inputs": prediction_input, "options": {"use_cache": True}}
         results = hf_inference_api(model_id, hf_token, payload)
+        if isinstance(results, dict) and "estimated_time" in results.keys():
+            return prediction_input, str(results["estimated_time"])
+        if isinstance(results, dict) and "error" in results.keys():
+            raise ValueError(results["error"])
         while isinstance(results, list):
             if isinstance(results[0], dict):
                 break
         }
     except Exception as e:
         # Pipeline prediction failed, need to provide labels
+        return prediction_input, e
     return prediction_input, prediction_result

text_classification_ui_helpers.py CHANGED Viewed

@@ -12,6 +12,7 @@ from io_utils import read_column_mapping, write_column_mapping
 from run_jobs import save_job_to_pipe
 from text_classification import (
     check_model_task,
     get_example_prediction,
     get_labels_and_features_from_dataset,
 )
@@ -159,9 +160,10 @@ def precheck_model_ds_enable_example_btn(
     model_id, dataset_id, dataset_config, dataset_split
 ):
     model_task = check_model_task(model_id)
     if model_task is None or model_task != "text-classification":
         gr.Warning("Please check your model.")
-        return gr.update(interactive=False), ""
     if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
         return (gr.update(), gr.update(), "")
@@ -182,8 +184,6 @@ def precheck_model_ds_enable_example_btn(
         return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
 def align_columns_and_show_prediction(
     model_id,
     dataset_id,
@@ -209,12 +209,32 @@ def align_columns_and_show_prediction(
         gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
     ]
-    # FIXME: prefiction_output could be None
-    prediction_input, prediction_output = get_example_prediction(
         model_id, dataset_id, dataset_config, dataset_split
     )
-    model_labels = list(prediction_output.keys())
     ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
     ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
@@ -255,7 +275,7 @@ def align_columns_and_show_prediction(
     return (
         gr.update(value=get_styled_input(prediction_input), visible=True),
-        gr.update(value=prediction_output, visible=True),
         gr.update(visible=True, open=False),
         gr.update(interactive=(run_inference and inference_token != "")),
         "",

 from run_jobs import save_job_to_pipe
 from text_classification import (
     check_model_task,
+    preload_hf_inference_api,
     get_example_prediction,
     get_labels_and_features_from_dataset,
 )
     model_id, dataset_id, dataset_config, dataset_split
 ):
     model_task = check_model_task(model_id)
+    preload_hf_inference_api(model_id)
     if model_task is None or model_task != "text-classification":
         gr.Warning("Please check your model.")
+        return (gr.update(), gr.update(),"")
     if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
         return (gr.update(), gr.update(), "")
         return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
 def align_columns_and_show_prediction(
     model_id,
     dataset_id,
         gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
     ]
+    prediction_input, prediction_response = get_example_prediction(
         model_id, dataset_id, dataset_config, dataset_split
     )
+    if isinstance(prediction_response, str):
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            gr.update(interactive=False),
+            f"Hugging Face Inference API is loading your model, estimation time {prediction_response}",
+            *dropdown_placement,
+        )
+    if isinstance(prediction_response, Exception):
+        gr.Warning("Please check your model or Hugging Face token.")
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            gr.update(interactive=False),
+            f"Sorry, inference api loading error {prediction_response}, please check your model and token.",
+            *dropdown_placement,
+        )
+    model_labels = list(prediction_response.keys())
     ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
     ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
     return (
         gr.update(value=get_styled_input(prediction_input), visible=True),
+        gr.update(value=prediction_response, visible=True),
         gr.update(visible=True, open=False),
         gr.update(interactive=(run_inference and inference_token != "")),
         "",