giskard-evaluator / app_legacy.py
inoki-giskard's picture
Use git to install giskard cicd repo with giskard_scanner executable
1784f11
raw
history blame
18.6 kB
import json
import logging
import os
import subprocess
import time
import datasets
import gradio as gr
import huggingface_hub
from transformers.pipelines import TextClassificationPipeline
from io_utils import (
convert_column_mapping_to_json,
read_inference_type,
read_scanners,
write_inference_type,
write_scanners,
)
from text_classification import (
check_column_mapping_keys_validity,
text_classification_fix_column_mapping,
)
from wordings import CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_MD
HF_REPO_ID = "HF_REPO_ID"
HF_SPACE_ID = "SPACE_ID"
HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
def check_model(model_id):
try:
task = huggingface_hub.model_info(model_id).pipeline_tag
except Exception:
return None, None
try:
from transformers import pipeline
ppl = pipeline(task=task, model=model_id)
return model_id, ppl
except Exception as e:
return model_id, e
def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
try:
configs = datasets.get_dataset_config_names(dataset_id)
except Exception:
# Dataset may not exist
return None, dataset_config, dataset_split
if dataset_config not in configs:
# Need to choose dataset subset (config)
return dataset_id, configs, dataset_split
ds = datasets.load_dataset(dataset_id, dataset_config)
if isinstance(ds, datasets.DatasetDict):
# Need to choose dataset split
if dataset_split not in ds.keys():
return dataset_id, None, list(ds.keys())
elif not isinstance(ds, datasets.Dataset):
# Unknown type
return dataset_id, None, None
return dataset_id, dataset_config, dataset_split
def try_validate(
m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping="{}"
):
# Validate model
if m_id is None:
gr.Warning(
"Model is not accessible. Please set your HF_TOKEN if it is a private model."
)
return (
gr.update(interactive=False), # Submit button
gr.update(visible=True), # Loading row
gr.update(visible=False), # Preview row
gr.update(visible=False), # Model prediction input
gr.update(visible=False), # Model prediction preview
gr.update(visible=False), # Label mapping preview
gr.update(visible=False), # feature mapping preview
)
if isinstance(ppl, Exception):
gr.Warning(f'Failed to load model": {ppl}')
return (
gr.update(interactive=False), # Submit button
gr.update(visible=True), # Loading row
gr.update(visible=False), # Preview row
gr.update(visible=False), # Model prediction input
gr.update(visible=False), # Model prediction preview
gr.update(visible=False), # Label mapping preview
gr.update(visible=False), # feature mapping preview
)
# Validate dataset
d_id, config, split = check_dataset(
dataset_id=dataset_id,
dataset_config=dataset_config,
dataset_split=dataset_split,
)
dataset_ok = False
if d_id is None:
gr.Warning(
f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.'
)
elif isinstance(config, list):
gr.Warning(
f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.'
)
config = gr.update(choices=config, value=config[0])
elif isinstance(split, list):
gr.Warning(
f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.'
)
split = gr.update(choices=split, value=split[0])
else:
dataset_ok = True
if not dataset_ok:
return (
gr.update(interactive=False), # Submit button
gr.update(visible=True), # Loading row
gr.update(visible=False), # Preview row
gr.update(visible=False), # Model prediction input
gr.update(visible=False), # Model prediction preview
gr.update(visible=False), # Label mapping preview
gr.update(visible=False), # feature mapping preview
)
# TODO: Validate column mapping by running once
prediction_result = None
id2label_df = None
if isinstance(ppl, TextClassificationPipeline):
try:
column_mapping = json.loads(column_mapping)
except Exception:
column_mapping = {}
(
column_mapping,
prediction_input,
prediction_result,
id2label_df,
feature_df,
) = text_classification_fix_column_mapping(
column_mapping, ppl, d_id, config, split
)
column_mapping = json.dumps(column_mapping, indent=2)
if prediction_result is None and id2label_df is not None:
gr.Warning(
'The model failed to predict with the first row in the dataset. Please provide feature mappings in "Advance" settings.'
)
return (
gr.update(interactive=False), # Submit button
gr.update(visible=False), # Loading row
gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
gr.update(
value=f"**Sample Input**: {prediction_input}", visible=True
), # Model prediction input
gr.update(visible=False), # Model prediction preview
gr.update(
value=id2label_df, visible=True, interactive=True
), # Label mapping preview
gr.update(
value=feature_df, visible=True, interactive=True
), # feature mapping preview
)
elif id2label_df is None:
gr.Warning(
'The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.'
)
return (
gr.update(interactive=False), # Submit button
gr.update(visible=False), # Loading row
gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
gr.update(
value=f"**Sample Input**: {prediction_input}", visible=True
), # Model prediction input
gr.update(
value=prediction_result, visible=True
), # Model prediction preview
gr.update(visible=True, interactive=True), # Label mapping preview
gr.update(visible=True, interactive=True), # feature mapping preview
)
gr.Info(
"Model and dataset validations passed. Your can submit the evaluation task."
)
return (
gr.update(interactive=True), # Submit button
gr.update(visible=False), # Loading row
gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
gr.update(
value=f"**Sample Input**: {prediction_input}", visible=True
), # Model prediction input
gr.update(value=prediction_result, visible=True), # Model prediction preview
gr.update(
value=id2label_df, visible=True, interactive=True
), # Label mapping preview
gr.update(
value=feature_df, visible=True, interactive=True
), # feature mapping preview
)
def try_submit(
m_id,
d_id,
config,
split,
id2label_mapping_dataframe,
feature_mapping_dataframe,
local,
):
label_mapping = {}
for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
label_mapping.update({str(i): label})
feature_mapping = {}
for i, feature in feature_mapping_dataframe["Dataset Features"].items():
feature_mapping.update(
{feature_mapping_dataframe["Model Input Features"][i]: feature}
)
# TODO: Set column mapping for some dataset such as `amazon_polarity`
if local:
command = [
"giskard_scanner",
"--loader",
"huggingface",
"--model",
m_id,
"--dataset",
d_id,
"--dataset_config",
config,
"--dataset_split",
split,
"--hf_token",
os.environ.get(HF_WRITE_TOKEN),
"--discussion_repo",
os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
"--output_format",
"markdown",
"--output_portal",
"huggingface",
"--feature_mapping",
json.dumps(feature_mapping),
"--label_mapping",
json.dumps(label_mapping),
"--scan_config",
"../config.yaml",
]
eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
start = time.time()
logging.info(f"Start local evaluation on {eval_str}")
evaluator = subprocess.Popen(
command,
cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
stderr=subprocess.STDOUT,
)
result = evaluator.wait()
logging.info(
f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
)
gr.Info(
f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
)
else:
gr.Info("TODO: Submit task to an endpoint")
return gr.update(interactive=True) # Submit button
def get_demo():
# gr.themes.Soft(
# primary_hue="green",
# )
def check_dataset_and_get_config(dataset_id):
try:
configs = datasets.get_dataset_config_names(dataset_id)
return gr.Dropdown(configs, value=configs[0], visible=True)
except Exception:
# Dataset may not exist
pass
def check_dataset_and_get_split(dataset_config, dataset_id):
try:
splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
return gr.Dropdown(splits, value=splits[0], visible=True)
except Exception as e:
# Dataset may not exist
gr.Warning(
f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
)
def clear_column_mapping_tables():
return [
gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
gr.update(value=[], visible=False, interactive=True),
gr.update(value=[], visible=False, interactive=True),
]
def gate_validate_btn(
model_id,
dataset_id,
dataset_config,
dataset_split,
id2label_mapping_dataframe=None,
feature_mapping_dataframe=None,
):
column_mapping = "{}"
_, ppl = check_model(model_id=model_id)
if id2label_mapping_dataframe is not None:
labels = convert_column_mapping_to_json(
id2label_mapping_dataframe.value, label="data"
)
features = convert_column_mapping_to_json(
feature_mapping_dataframe.value, label="text"
)
column_mapping = json.dumps({**labels, **features}, indent=2)
if check_column_mapping_keys_validity(column_mapping, ppl) is False:
gr.Warning("Label mapping table has invalid contents. Please check again.")
return (
gr.update(interactive=False),
gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
gr.update(),
gr.update(),
gr.update(),
gr.update(),
gr.update(),
)
else:
if model_id and dataset_id and dataset_config and dataset_split:
return try_validate(
model_id,
ppl,
dataset_id,
dataset_config,
dataset_split,
column_mapping,
)
else:
return (
gr.update(interactive=False),
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
)
with gr.Row():
gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
with gr.Row():
run_local = gr.Checkbox(value=True, label="Run in this Space")
use_inference = read_inference_type("./config.yaml") == "hf_inference_api"
run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
with gr.Row():
selected = read_scanners("./config.yaml")
scan_config = selected + ["data_leakage"]
scanners = gr.CheckboxGroup(
choices=scan_config, value=selected, label="Scan Settings", visible=True
)
with gr.Row():
model_id_input = gr.Textbox(
label="Hugging Face model id",
placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
)
dataset_id_input = gr.Textbox(
label="Hugging Face Dataset id",
placeholder="tweet_eval",
)
with gr.Row():
dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
with gr.Row(visible=True) as loading_row:
gr.Markdown(
"""
<p style="text-align: center;">
🚀🐢Please validate your model and dataset first...
</p>
"""
)
with gr.Row(visible=False) as preview_row:
gr.Markdown(
"""
<h1 style="text-align: center;">
Confirm Pre-processing Details
</h1>
Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
"""
)
with gr.Row():
id2label_mapping_dataframe = gr.DataFrame(
label="Preview of label mapping", interactive=True, visible=False
)
feature_mapping_dataframe = gr.DataFrame(
label="Preview of feature mapping", interactive=True, visible=False
)
with gr.Row():
example_input = gr.Markdown("Sample Input: ", visible=False)
with gr.Row():
example_labels = gr.Label(label="Model Prediction Sample", visible=False)
run_btn = gr.Button(
"Get Evaluation Result",
variant="primary",
interactive=False,
size="lg",
)
model_id_input.blur(
clear_column_mapping_tables,
outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
)
dataset_id_input.blur(
check_dataset_and_get_config, dataset_id_input, dataset_config_input
)
dataset_id_input.submit(
check_dataset_and_get_config, dataset_id_input, dataset_config_input
)
dataset_config_input.change(
check_dataset_and_get_split,
inputs=[dataset_config_input, dataset_id_input],
outputs=[dataset_split_input],
)
dataset_id_input.blur(
clear_column_mapping_tables,
outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
)
# model_id_input.blur(gate_validate_btn,
# inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
# outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
# dataset_id_input.blur(gate_validate_btn,
# inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
# outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
dataset_config_input.change(
gate_validate_btn,
inputs=[
model_id_input,
dataset_id_input,
dataset_config_input,
dataset_split_input,
],
outputs=[
run_btn,
loading_row,
preview_row,
example_input,
example_labels,
id2label_mapping_dataframe,
feature_mapping_dataframe,
],
)
dataset_split_input.change(
gate_validate_btn,
inputs=[
model_id_input,
dataset_id_input,
dataset_config_input,
dataset_split_input,
],
outputs=[
run_btn,
loading_row,
preview_row,
example_input,
example_labels,
id2label_mapping_dataframe,
feature_mapping_dataframe,
],
)
id2label_mapping_dataframe.input(
gate_validate_btn,
inputs=[
model_id_input,
dataset_id_input,
dataset_config_input,
dataset_split_input,
id2label_mapping_dataframe,
feature_mapping_dataframe,
],
outputs=[
run_btn,
loading_row,
preview_row,
example_input,
example_labels,
id2label_mapping_dataframe,
feature_mapping_dataframe,
],
)
feature_mapping_dataframe.input(
gate_validate_btn,
inputs=[
model_id_input,
dataset_id_input,
dataset_config_input,
dataset_split_input,
id2label_mapping_dataframe,
feature_mapping_dataframe,
],
outputs=[
run_btn,
loading_row,
preview_row,
example_input,
example_labels,
id2label_mapping_dataframe,
feature_mapping_dataframe,
],
)
scanners.change(write_scanners, inputs=scanners)
run_inference.change(write_inference_type, inputs=[run_inference])
run_btn.click(
try_submit,
inputs=[
model_id_input,
dataset_id_input,
dataset_config_input,
dataset_split_input,
id2label_mapping_dataframe,
feature_mapping_dataframe,
run_local,
],
outputs=[
run_btn,
],
)