giskard-evaluator

Running

App Files Files Community

200

ZeroCommand commited on Dec 18, 2023

Commit

63bdb5b

•

1 Parent(s): 5b24f7d

GSK-2396 allow edit feature mapping and scan config

Browse files

Files changed (4) hide show

app.py +46 -31
scan_config.yaml +8 -0
text_classification.py +33 -23
utils.py +24 -0

app.py CHANGED Viewed

@@ -11,13 +11,12 @@ import json
 from transformers.pipelines import TextClassificationPipeline
 from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
 HF_REPO_ID = 'HF_REPO_ID'
 HF_SPACE_ID = 'SPACE_ID'
 HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
 theme = gr.themes.Soft(
     primary_hue="green",
 )
@@ -70,6 +69,7 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
             gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
         )
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load model": {ppl}')
@@ -80,6 +80,7 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
             gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
         )
     # Validate dataset
@@ -105,7 +106,7 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
             gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
-            # gr.update(visible=True),        # Column mapping
         )
     # TODO: Validate column mapping by running once
@@ -118,21 +119,21 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
         except Exception:
             column_mapping = {}
-        column_mapping, prediction_input, prediction_result, id2label_df = \
             text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
         column_mapping = json.dumps(column_mapping, indent=2)
-    if prediction_result is None:
         gr.Warning('The model failed to predict with the first row in the dataset. Please provide column mappings in "Advance" settings.')
         return (
             gr.update(interactive=False),   # Submit button
-            gr.update(visible=True),       # Loading row
-            gr.update(visible=False),        # Preview row
-            gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),   # Model prediction preview
-            gr.update(visible=False),   # Label mapping preview
-            # gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
         )
     elif id2label_df is None:
         gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
@@ -142,8 +143,8 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
             gr.update(visible=True),        # Preview row
             gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
             gr.update(value=prediction_result, visible=True),   # Model prediction preview
-            gr.update(visible=False),   # Label mapping preview
-            # gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
         )
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
@@ -155,6 +156,7 @@ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_ma
         gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
         gr.update(value=prediction_result, visible=True),   # Model prediction preview
         gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
     )
@@ -180,6 +182,7 @@ def try_submit(m_id, d_id, config, split, column_mappings, local):
             "--output_portal", "huggingface",
             # TODO: "--feature_mapping", json.dumps(column_mapping),
             "--label_mapping", json.dumps(label_mapping),
         ]
         eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
@@ -221,12 +224,14 @@ with gr.Blocks(theme=theme) as iface:
                 gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
                 pass
-        def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split, id2label_mapping_dataframe=None):
             column_mapping = '{}'
-            m_id, ppl = check_model(model_id=model_id)
             if id2label_mapping_dataframe is not None:
-                column_mapping = id2label_mapping_dataframe.to_json(orient="split")
             if check_column_mapping_keys_validity(column_mapping, ppl) is False:
                 gr.Warning('Label mapping table has invalid contents. Please check again.')
                 return (gr.update(interactive=False),
@@ -234,18 +239,18 @@ with gr.Blocks(theme=theme) as iface:
                         gr.update(),
                         gr.update(),
                         gr.update(),
                         gr.update())
             else:
                 if model_id and dataset_id and dataset_config and dataset_split:
-                    return try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping)
                 else:
-                    del ppl
                     return (gr.update(interactive=False),
                             gr.update(visible=True),
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False))
         with gr.Row():
             gr.Markdown('''
@@ -256,6 +261,12 @@ with gr.Blocks(theme=theme) as iface:
                 ''')
         with gr.Row():
             run_local = gr.Checkbox(value=True, label="Run in this Space")
         with gr.Row():
             model_id_input = gr.Textbox(
@@ -279,11 +290,11 @@ with gr.Blocks(theme=theme) as iface:
         with gr.Row(visible=True) as loading_row:
             gr.Markdown('''
-                        <h1 style="text-align: center;">
-                        Please validate your model and dataset first...
-                        </h1>
                         ''')
         with gr.Row(visible=False) as preview_row:
             gr.Markdown('''
                 <h1 style="text-align: center;">
@@ -294,7 +305,7 @@ with gr.Blocks(theme=theme) as iface:
         with gr.Row():
             id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping", interactive=True, visible=False)
         with gr.Row():
             example_input = gr.Markdown('Sample Input: ', visible=False)
@@ -310,20 +321,24 @@ with gr.Blocks(theme=theme) as iface:
         model_id_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
         dataset_id_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                outputs=[run_btn, loading_row, preview_row, example_input,  example_labels, id2label_mapping_dataframe])
         dataset_config_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
         dataset_split_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
         id2label_mapping_dataframe.input(gate_validate_btn,
-                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe],
-                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
         run_btn.click(
             try_submit,
             inputs=[

 from transformers.pipelines import TextClassificationPipeline
 from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
+from utils import read_scanners, write_scanners, convert_column_mapping_to_json
 HF_REPO_ID = 'HF_REPO_ID'
 HF_SPACE_ID = 'SPACE_ID'
 HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
 theme = gr.themes.Soft(
     primary_hue="green",
 )
             gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
+            gr.update(visible=False),       # feature mapping preview
         )
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load model": {ppl}')
             gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
+            gr.update(visible=False),       # feature mapping preview
         )
     # Validate dataset
             gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
+            gr.update(visible=False),       # feature mapping preview
         )
     # TODO: Validate column mapping by running once
         except Exception:
             column_mapping = {}
+        column_mapping, prediction_input, prediction_result, id2label_df, feature_df = \
             text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
         column_mapping = json.dumps(column_mapping, indent=2)
+    if prediction_result is None and id2label_df is not None:
         gr.Warning('The model failed to predict with the first row in the dataset. Please provide column mappings in "Advance" settings.')
         return (
             gr.update(interactive=False),   # Submit button
+            gr.update(visible=False),       # Loading row
+            gr.update(visible=True),        # Preview row
+            gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
             gr.update(visible=False),   # Model prediction preview
+            gr.update(value=id2label_df, visible=True, interactive=True),   # Label mapping preview
+            gr.update(value=feature_df, visible=True, interactive=True),   # feature mapping preview
         )
     elif id2label_df is None:
         gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
             gr.update(visible=True),        # Preview row
             gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
             gr.update(value=prediction_result, visible=True),   # Model prediction preview
+            gr.update(visible=True, interactive=True),   # Label mapping preview
+            gr.update(visible=True, interactive=True),   # feature mapping preview
         )
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
         gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
         gr.update(value=prediction_result, visible=True),   # Model prediction preview
         gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
+        gr.update(value=feature_df, visible=True, interactive=True),   # feature mapping preview
     )
             "--output_portal", "huggingface",
             # TODO: "--feature_mapping", json.dumps(column_mapping),
             "--label_mapping", json.dumps(label_mapping),
+            "--scan_config", "./scan_config.yaml",
         ]
         eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
                 gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
                 pass
+        def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split, id2label_mapping_dataframe=None, feature_mapping_dataframe=None):
             column_mapping = '{}'
+            _, ppl = check_model(model_id=model_id)
             if id2label_mapping_dataframe is not None:
+                labels = convert_column_mapping_to_json(id2label_mapping_dataframe.value, label="data")
+                features = convert_column_mapping_to_json(feature_mapping_dataframe.value, label="text")
+                column_mapping = json.dumps({**labels, **features}, indent=2)
+                print('229 >>>>> ', column_mapping)
             if check_column_mapping_keys_validity(column_mapping, ppl) is False:
                 gr.Warning('Label mapping table has invalid contents. Please check again.')
                 return (gr.update(interactive=False),
                         gr.update(),
                         gr.update(),
                         gr.update(),
+                        gr.update(),
                         gr.update())
             else:
                 if model_id and dataset_id and dataset_config and dataset_split:
+                    return try_validate(model_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping)
                 else:
                     return (gr.update(interactive=False),
                             gr.update(visible=True),
                             gr.update(visible=False),
                             gr.update(visible=False),
                             gr.update(visible=False),
+                            gr.update(visible=False),
                             gr.update(visible=False))
         with gr.Row():
             gr.Markdown('''
                 ''')
         with gr.Row():
             run_local = gr.Checkbox(value=True, label="Run in this Space")
+            run_inference = gr.Checkbox(value=False, label="Run with Inference API")
+        with gr.Row() as advanced_row:
+            selected = read_scanners('./scan_config.yaml')
+            scan_config = selected + ['data_leakage']
+            scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
         with gr.Row():
             model_id_input = gr.Textbox(
         with gr.Row(visible=True) as loading_row:
             gr.Markdown('''
+                        <p style="text-align: center;">
+                        🚀🐢Please validate your model and dataset first...
+                        </p>
                         ''')
         with gr.Row(visible=False) as preview_row:
             gr.Markdown('''
                 <h1 style="text-align: center;">
         with gr.Row():
             id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping", interactive=True, visible=False)
+            feature_mapping_dataframe = gr.DataFrame(label="Preview of feature mapping", interactive=True, visible=False)
         with gr.Row():
             example_input = gr.Markdown('Sample Input: ', visible=False)
         model_id_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
         dataset_id_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input,  example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
         dataset_config_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
         dataset_split_input.change(gate_validate_btn,
                                 inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
         id2label_mapping_dataframe.input(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
+        feature_mapping_dataframe.input(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
+        scanners.change(write_scanners, inputs=scanners)
         run_btn.click(
             try_submit,
             inputs=[

scan_config.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+detectors:
+  - ethical_bias
+  - text_perturbation
+  - robustness
+  - performance
+  - underconfidence
+  - overconfidence
+  - spurious_correlation

text_classification.py CHANGED Viewed

@@ -19,9 +19,8 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:
             if label in id2label_mapping.keys():
@@ -31,6 +30,8 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
                 model_label, label = text_classificaiton_match_label_case_unsensative(id2label_mapping, label)
             if model_label is not None:
                 id2label_mapping[model_label] = label
     return id2label_mapping, dataset_labels
@@ -52,15 +53,15 @@ def check_column_mapping_keys_validity(column_mapping, ppl):
 def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
     # We assume dataset is ok here
     ds = datasets.load_dataset(d_id, config)[split]
     try:
         dataset_features = ds.features
     except AttributeError:
         # Dataset does not have features, need to provide everything
-        return None, None, None
     # Check whether we need to infer the text input column
     infer_text_input_column = True
     if "text" in column_mapping.keys():
         dataset_text_column = column_mapping["text"]
         if dataset_text_column in dataset_features.keys():
@@ -71,12 +72,16 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
     if infer_text_input_column:
         # Try to retrieve one
         candidates = [f for f in dataset_features if dataset_features[f].dtype == "string"]
         if len(candidates) > 0:
             logging.debug(f"Candidates are {candidates}")
             column_mapping["text"] = candidates[0]
         else:
             # Not found a text feature
-            return column_mapping, None, None
     # Load dataset as DataFrame
     df = ds.to_pandas()
@@ -85,24 +90,14 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
     id2label_mapping = {}
     id2label = ppl.model.config.id2label
     label2id = {v: k for k, v in id2label.items()}
-    prediction_input = None
-    prediction_result = None
-    try:
-        # Use the first item to test prediction
-        prediction_input = df.head(1).at[0, column_mapping["text"]]
-        results = ppl({"text": prediction_input}, top_k=None)
-        prediction_result = {
-            f'{result["label"]}({label2id[result["label"]]})': result["score"] for result in results
-        }
-    except Exception:
-        # Pipeline prediction failed, need to provide labels
-        return column_mapping, None, None
     # Infer labels
     id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
     id2label_mapping_dataset_model = {
         v: k for k, v in id2label_mapping.items()
     }
     if "data" in column_mapping.keys():
         if isinstance(column_mapping["data"], list):
             # Use the column mapping passed by user
@@ -112,15 +107,30 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
         column_mapping["label"] = {
             i: None for i in id2label.keys()
         }
-        return column_mapping, prediction_result, None
-    prediction_result = {
-        f'[{label2id[result["label"]]}]{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result["score"] for result in results
-    }
     id2label_df = pd.DataFrame({
         "Dataset Labels": dataset_labels,
         "Model Prediction Labels": [id2label_mapping_dataset_model[label] for label in dataset_labels],
     })
     if "data" not in column_mapping.keys():
         # Column mapping should contain original model labels
@@ -128,4 +138,4 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
             str(i): id2label_mapping_dataset_model[label] for i, label in zip(id2label.keys(), dataset_labels)
         }
-    return column_mapping, prediction_input, prediction_result, id2label_df

             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:
             if label in id2label_mapping.keys():
                 model_label, label = text_classificaiton_match_label_case_unsensative(id2label_mapping, label)
             if model_label is not None:
                 id2label_mapping[model_label] = label
+            else:
+                print(f"Label {label} is not found in model labels")
     return id2label_mapping, dataset_labels
 def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
     # We assume dataset is ok here
     ds = datasets.load_dataset(d_id, config)[split]
     try:
         dataset_features = ds.features
     except AttributeError:
         # Dataset does not have features, need to provide everything
+        return None, None, None, None, None
     # Check whether we need to infer the text input column
     infer_text_input_column = True
+    feature_map_df = None
     if "text" in column_mapping.keys():
         dataset_text_column = column_mapping["text"]
         if dataset_text_column in dataset_features.keys():
     if infer_text_input_column:
         # Try to retrieve one
         candidates = [f for f in dataset_features if dataset_features[f].dtype == "string"]
+        feature_map_df = pd.DataFrame({
+            "Dataset Features": [candidates[0]],
+            "Model Input Features": ["text"]
+        })
         if len(candidates) > 0:
             logging.debug(f"Candidates are {candidates}")
             column_mapping["text"] = candidates[0]
         else:
             # Not found a text feature
+            return column_mapping, None, None, feature_map_df
     # Load dataset as DataFrame
     df = ds.to_pandas()
     id2label_mapping = {}
     id2label = ppl.model.config.id2label
     label2id = {v: k for k, v in id2label.items()}
     # Infer labels
     id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
     id2label_mapping_dataset_model = {
         v: k for k, v in id2label_mapping.items()
     }
+    # TODO: convert dataframe column mapping to json properly
     if "data" in column_mapping.keys():
         if isinstance(column_mapping["data"], list):
             # Use the column mapping passed by user
         column_mapping["label"] = {
             i: None for i in id2label.keys()
         }
+        return column_mapping, None, None, None, feature_map_df
     id2label_df = pd.DataFrame({
         "Dataset Labels": dataset_labels,
         "Model Prediction Labels": [id2label_mapping_dataset_model[label] for label in dataset_labels],
     })
+    prediction_input = None
+    prediction_result = None
+    try:
+        # Use the first item to test prediction
+        prediction_input = df.head(1).at[0, column_mapping["text"]]
+        results = ppl({"text": prediction_input}, top_k=None)
+        prediction_result = {
+            f'{result["label"]}({label2id[result["label"]]})': result["score"] for result in results
+        }
+    except Exception as e:
+        # Pipeline prediction failed, need to provide labels
+        print(e, '>>>> error')
+        return column_mapping, prediction_input, None, id2label_df, feature_map_df
+    prediction_result = {
+        f'[{label2id[result["label"]]}]{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result["score"] for result in results
+    }
     if "data" not in column_mapping.keys():
         # Column mapping should contain original model labels
             str(i): id2label_mapping_dataset_model[label] for i, label in zip(id2label.keys(), dataset_labels)
         }
+    return column_mapping, prediction_input, prediction_result, id2label_df, feature_map_df

utils.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import yaml
+import sys
+# read scanners from yaml file
+# return a list of scanners
+def read_scanners(path):
+    scanners = []
+    with open(path, "r") as f:
+        config = yaml.load(f, Loader=yaml.FullLoader)
+        scanners = config.get("detectors", None)
+    return scanners
+# convert a list of scanners to yaml file
+def write_scanners(scanners):
+    with open("./scan_config.yaml", "w") as f:
+        # save scanners to detectors in yaml
+        yaml.dump({"detectors": scanners}, f)
+# convert column mapping dataframe to json
+def convert_column_mapping_to_json(df, label=""):
+    column_mapping = {}
+    column_mapping[label] = []
+    for _, row in df.iterrows():
+        column_mapping[label].append(row.tolist())
+    return column_mapping