giskard-evaluator

Sleeping

App Files Files Community

200

GSK-2509 fix not standard label columns (go_emotions)

#29

by ZeroCommand - opened Jan 10, 2024

base: refs/heads/main

←

from: refs/pr/29

Discussion Files changed

+222

-163

Files changed (6) hide show

app.py +1 -1
app_text_classification.py +43 -39
io_utils.py +21 -26
requirements.txt +1 -1
text_classification.py +11 -2
text_classification_ui_helpers.py +145 -94

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from run_jobs import start_process_run_job, stop_thread
 try:
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
         with gr.Tab("Text Classification"):
-            get_demo_text_classification(demo)
         with gr.Tab("Leaderboard"):
             get_demo_leaderboard()
         with gr.Tab("Logs(Debug)"):

 try:
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
         with gr.Tab("Text Classification"):
+            get_demo_text_classification()
         with gr.Tab("Leaderboard"):
             get_demo_leaderboard()
         with gr.Tab("Logs(Debug)"):

app_text_classification.py CHANGED Viewed

@@ -2,17 +2,17 @@ import uuid
 import gradio as gr
-from io_utils import (get_logs_file, read_inference_type, read_scanners,
-                      write_inference_type, write_scanners)
 from text_classification_ui_helpers import (check_dataset_and_get_config,
                                             check_dataset_and_get_split,
-                                            check_model_and_show_prediction,
                                             deselect_run_inference,
                                             select_run_mode, try_submit,
-                                            write_column_mapping_to_config)
 from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
-MAX_LABELS = 20
 MAX_FEATURES = 20
 EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
@@ -20,7 +20,7 @@ EXAMPLE_DATA_ID = "tweet_eval"
 CONFIG_PATH = "./config.yaml"
-def get_demo(demo):
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
         uid_label = gr.Textbox(
@@ -41,6 +41,13 @@ def get_demo(demo):
         dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
         dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
     with gr.Row():
         example_input = gr.HTML(visible=False)
     with gr.Row():
@@ -55,23 +62,17 @@ def get_demo(demo):
             column_mappings = []
             with gr.Row():
                 with gr.Column():
                     for _ in range(MAX_LABELS):
                         column_mappings.append(gr.Dropdown(visible=False))
                 with gr.Column():
                     for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
                         column_mappings.append(gr.Dropdown(visible=False))
     with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
-        run_inference = gr.Checkbox(value="False", label="Run with Inference API")
-        @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
-        def get_run_mode(uid):
-            return gr.update(
-                value=read_inference_type(uid) == "hf_inference_api"
-                and not run_local.value
-            )
         inference_token = gr.Textbox(
             value="",
             label="HF Token for Inference API",
@@ -97,13 +98,12 @@ def get_demo(demo):
         run_btn = gr.Button(
             "Get Evaluation Result",
             variant="primary",
-            interactive=True,
             size="lg",
         )
     with gr.Row():
-        logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
-        demo.load(get_logs_file, None, logs, every=0.5)
     dataset_id_input.change(
         check_dataset_and_get_config,
@@ -121,7 +121,7 @@ def get_demo(demo):
     run_inference.change(
         select_run_mode,
-        inputs=[run_inference, inference_token, uid_label],
         outputs=[inference_token, run_local],
     )
@@ -131,17 +131,10 @@ def get_demo(demo):
         outputs=[inference_token, run_inference],
     )
-    inference_token.change(
-        write_inference_type, inputs=[run_inference, inference_token, uid_label]
-    )
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,
         inputs=[
-            dataset_id_input,
-            dataset_config_input,
-            dataset_split_input,
             uid_label,
             *column_mappings,
         ],
@@ -152,9 +145,6 @@ def get_demo(demo):
         triggers=[label.input for label in column_mappings],
         fn=write_column_mapping_to_config,
         inputs=[
-            dataset_id_input,
-            dataset_config_input,
-            dataset_split_input,
             uid_label,
             *column_mappings,
         ],
@@ -165,19 +155,33 @@ def get_demo(demo):
             model_id_input.change,
             dataset_id_input.change,
             dataset_config_input.change,
-            dataset_split_input.change,
         ],
-        fn=check_model_and_show_prediction,
         inputs=[
             model_id_input,
             dataset_id_input,
             dataset_config_input,
             dataset_split_input,
         ],
         outputs=[
             example_input,
             example_prediction,
             column_mapping_accordion,
             *column_mappings,
         ],
     )
@@ -193,6 +197,8 @@ def get_demo(demo):
             dataset_config_input,
             dataset_split_input,
             run_local,
             uid_label,
         ],
         outputs=[run_btn, logs],
@@ -203,12 +209,10 @@ def get_demo(demo):
     gr.on(
         triggers=[
-            model_id_input.change,
-            dataset_config_input.change,
-            dataset_split_input.change,
-            run_inference.change,
-            run_local.change,
-            scanners.change,
         ],
         fn=enable_run_btn,
         inputs=None,
@@ -216,8 +220,8 @@ def get_demo(demo):
     )
     gr.on(
-        triggers=[label.change for label in column_mappings],
         fn=enable_run_btn,
-        inputs=None,
         outputs=[run_btn],
     )

 import gradio as gr
+from io_utils import (get_logs_file, read_scanners, write_scanners)
 from text_classification_ui_helpers import (check_dataset_and_get_config,
                                             check_dataset_and_get_split,
+                                            align_columns_and_show_prediction,
                                             deselect_run_inference,
                                             select_run_mode, try_submit,
+                                            write_column_mapping_to_config,
+                                            precheck_model_ds_enable_example_btn)
 from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
+MAX_LABELS = 40
 MAX_FEATURES = 20
 EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
 CONFIG_PATH = "./config.yaml"
+def get_demo():
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
         uid_label = gr.Textbox(
         dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
         dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
+    with gr.Row():
+        example_btn = gr.Button(
+            "Auto-align Columns & Get Sample Prediction",
+            visible=True,
+            variant="primary",
+            interactive=False)
     with gr.Row():
         example_input = gr.HTML(visible=False)
     with gr.Row():
             column_mappings = []
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("# Label Mapping")
                     for _ in range(MAX_LABELS):
                         column_mappings.append(gr.Dropdown(visible=False))
                 with gr.Column():
+                    gr.Markdown("# Feature Mapping")
                     for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
                         column_mappings.append(gr.Dropdown(visible=False))
     with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
+        run_inference = gr.Checkbox(value=False, label="Run with Inference API")
         inference_token = gr.Textbox(
             value="",
             label="HF Token for Inference API",
         run_btn = gr.Button(
             "Get Evaluation Result",
             variant="primary",
+            interactive=False,
             size="lg",
         )
     with gr.Row():
+        logs = gr.Textbox(value=get_logs_file, label="Giskard Bot Evaluation Log:", visible=False, every=0.5)
     dataset_id_input.change(
         check_dataset_and_get_config,
     run_inference.change(
         select_run_mode,
+        inputs=[run_inference],
         outputs=[inference_token, run_local],
     )
         outputs=[inference_token, run_inference],
     )
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,
         inputs=[
             uid_label,
             *column_mappings,
         ],
         triggers=[label.input for label in column_mappings],
         fn=write_column_mapping_to_config,
         inputs=[
             uid_label,
             *column_mappings,
         ],
             model_id_input.change,
             dataset_id_input.change,
             dataset_config_input.change,
+            dataset_split_input.change],
+        fn=precheck_model_ds_enable_example_btn,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+        ],
+        outputs=[example_btn])
+    gr.on(
+        triggers=[
+            example_btn.click,
         ],
+        fn=align_columns_and_show_prediction,
         inputs=[
             model_id_input,
             dataset_id_input,
             dataset_config_input,
             dataset_split_input,
+            uid_label,
         ],
         outputs=[
             example_input,
             example_prediction,
             column_mapping_accordion,
+            run_btn,
             *column_mappings,
         ],
     )
             dataset_config_input,
             dataset_split_input,
             run_local,
+            run_inference,
+            inference_token,
             uid_label,
         ],
         outputs=[run_btn, logs],
     gr.on(
         triggers=[
+            run_inference.input,
+            run_local.input,
+            inference_token.input,
+            scanners.input,
         ],
         fn=enable_run_btn,
         inputs=None,
     )
     gr.on(
+        triggers=[label.input for label in column_mappings],
         fn=enable_run_btn,
+        inputs=column_mappings,
         outputs=[run_btn],
     )

io_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import subprocess
 import yaml
@@ -6,6 +7,7 @@ import yaml
 import pipe
 YAML_PATH = "./cicd/configs"
 class Dumper(yaml.Dumper):
@@ -28,7 +30,6 @@ def read_scanners(uid):
     with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         scanners = config.get("detectors", [])
-    f.close()
     return scanners
@@ -38,11 +39,9 @@ def write_scanners(scanners, uid):
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             config["detectors"] = scanners
-    f.close()
     # save scanners to detectors in yaml
     with open(get_yaml_path(uid), "w") as f:
         yaml.dump(config, f, Dumper=Dumper)
-    f.close()
 # read model_type from yaml file
@@ -51,7 +50,6 @@ def read_inference_type(uid):
     with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         inference_type = config.get("inference_type", "")
-    f.close()
     return inference_type
@@ -66,11 +64,9 @@ def write_inference_type(use_inference, inference_token, uid):
             config["inference_type"] = "hf_pipeline"
             # FIXME: A quick and temp fix for missing token
             config["inference_token"] = ""
-    f.close()
     # save inference_type to inference_type in yaml
     with open(get_yaml_path(uid), "w") as f:
         yaml.dump(config, f, Dumper=Dumper)
-    f.close()
 # read column mapping from yaml file
@@ -80,7 +76,6 @@ def read_column_mapping(uid):
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             column_mapping = config.get("column_mapping", dict())
-    f.close()
     return column_mapping
@@ -88,7 +83,6 @@ def read_column_mapping(uid):
 def write_column_mapping(mapping, uid):
     with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
-    f.close()
     if config is None:
         return
@@ -96,10 +90,9 @@ def write_column_mapping(mapping, uid):
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
     with open(get_yaml_path(uid), "w") as f:
-        yaml.dump(config, f, Dumper=Dumper)
-    f.close()
 # convert column mapping dataframe to json
@@ -113,21 +106,20 @@ def convert_column_mapping_to_json(df, label=""):
 def get_logs_file():
     try:
-        file = open(f"./tmp/temp_log", "r")
-        return file.read()
     except Exception:
         return "Log file does not exist"
-def write_log_to_user_file(id, log):
-    with open(f"./tmp/temp_log", "a") as f:
         f.write(log)
-    f.close()
-def save_job_to_pipe(id, job, description, lock):
     with lock:
-        pipe.jobs.append((id, job, description))
 def pop_job_from_pipe():
@@ -135,14 +127,17 @@ def pop_job_from_pipe():
         return
     job_info = pipe.jobs.pop()
     pipe.current = job_info[2]
-    write_log_to_user_file(job_info[0], f"Running job id {job_info[0]}\n")
     command = job_info[1]
-    log_file = open(f"./tmp/temp_log", "a")
-    p = subprocess.Popen(
-        command,
-        stdout=log_file,
-        stderr=log_file,
-    )
-    p.wait()
     pipe.current = None

 import os
+from pathlib import Path
 import subprocess
 import yaml
 import pipe
 YAML_PATH = "./cicd/configs"
+LOG_FILE = "temp_log"
 class Dumper(yaml.Dumper):
     with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         scanners = config.get("detectors", [])
     return scanners
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             config["detectors"] = scanners
     # save scanners to detectors in yaml
     with open(get_yaml_path(uid), "w") as f:
         yaml.dump(config, f, Dumper=Dumper)
 # read model_type from yaml file
     with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         inference_type = config.get("inference_type", "")
     return inference_type
             config["inference_type"] = "hf_pipeline"
             # FIXME: A quick and temp fix for missing token
             config["inference_token"] = ""
     # save inference_type to inference_type in yaml
     with open(get_yaml_path(uid), "w") as f:
         yaml.dump(config, f, Dumper=Dumper)
 # read column mapping from yaml file
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             column_mapping = config.get("column_mapping", dict())
     return column_mapping
 def write_column_mapping(mapping, uid):
     with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if config is None:
         return
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
     with open(get_yaml_path(uid), "w") as f:
+        # yaml Dumper will by default sort the keys
+        yaml.dump(config, f, Dumper=Dumper, sort_keys=False)
 # convert column mapping dataframe to json
 def get_logs_file():
     try:
+        with open(LOG_FILE, "r") as file:
+            return file.read()
     except Exception:
         return "Log file does not exist"
+def write_log_to_user_file(task_id, log):
+    with open(f"./tmp/{task_id}.log", "a") as f:
         f.write(log)
+def save_job_to_pipe(task_id, job, description, lock):
     with lock:
+        pipe.jobs.append((task_id, job, description))
 def pop_job_from_pipe():
         return
     job_info = pipe.jobs.pop()
     pipe.current = job_info[2]
+    task_id = job_info[0]
+    write_log_to_user_file(task_id, f"Running job id {task_id}\n")
     command = job_info[1]
+    # Link to LOG_FILE
+    log_file_path = Path(LOG_FILE)
+    if log_file_path.exists():
+        log_file_path.unlink()
+    os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
+    with open(f"./tmp/{task_id}.log", "a") as log_file:
+        p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
+        p.wait()
     pipe.current = None

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-giskard >= 2.1.0, < 2.3.0
 huggingface_hub
 torch==2.0.1
 transformers

+giskard==2.1.2
 huggingface_hub
 torch==2.0.1
 transformers

text_classification.py CHANGED Viewed

@@ -15,8 +15,17 @@ def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         dataset_features = ds.features
-        labels = dataset_features["label"].names
-        features = [f for f in dataset_features.keys() if f != "label"]
         return labels, features
     except Exception as e:
         logging.warning(

     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         dataset_features = ds.features
+        label_keys = [i for i in dataset_features.keys() if i.startswith('label')]
+        if len(label_keys) == 0: # no labels found
+            # return everything for post processing
+            return list(dataset_features.keys()), list(dataset_features.keys())
+        if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
+            if hasattr(dataset_features[label_keys[0]], 'feature'):
+                label_feat = dataset_features[label_keys[0]].feature
+                labels = label_feat.names
+        else:
+            labels = [dataset_features[label_keys[0]].names]
+        features = [f for f in dataset_features.keys() if not f.startswith("label")]
         return labels, features
     except Exception as e:
         logging.warning(

text_classification_ui_helpers.py CHANGED Viewed

@@ -10,7 +10,7 @@ from transformers.pipelines import TextClassificationPipeline
 from wordings import get_styled_input
 from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
-                      write_column_mapping, write_inference_type,
                       write_log_to_user_file)
 from text_classification import (check_model, get_example_prediction,
                                  get_labels_and_features_from_dataset)
@@ -18,7 +18,7 @@ from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
                       CONFIRM_MAPPING_DETAILS_FAIL_RAW,
                       MAPPING_STYLED_ERROR_WARNING)
-MAX_LABELS = 20
 MAX_FEATURES = 20
 HF_REPO_ID = "HF_REPO_ID"
@@ -51,10 +51,8 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
         pass
-def select_run_mode(run_inf, inf_token, uid):
     if run_inf:
-        if len(inf_token) > 0:
-            write_inference_type(run_inf, inf_token, uid)
         return (gr.update(visible=True), gr.update(value=False))
     else:
         return (gr.update(visible=False), gr.update(value=True))
@@ -68,46 +66,62 @@ def deselect_run_inference(run_local):
 def write_column_mapping_to_config(
-    dataset_id, dataset_config, dataset_split, uid, *labels
 ):
     # TODO: Substitute 'text' with more features for zero-shot
     # we are not using ds features because we only support "text" for now
-    ds_labels, _ = get_labels_and_features_from_dataset(
-        dataset_id, dataset_config, dataset_split
-    )
     if labels is None:
         return
-    all_mappings = dict()
-    if "labels" not in all_mappings.keys():
-        all_mappings["labels"] = dict()
-    for i, label in enumerate(labels[:MAX_LABELS]):
-        if label:
-            all_mappings["labels"][label] = ds_labels[i % len(ds_labels)]
-    if "features" not in all_mappings.keys():
-        all_mappings["features"] = dict()
-    for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
-        if feat:
-            # TODO: Substitute 'text' with more features for zero-shot
-            all_mappings["features"]["text"] = feat
     write_column_mapping(all_mappings, uid)
-def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
     model_labels = list(model_id2label.values())
-    len_model_labels = len(model_labels)
     lables = [
         gr.Dropdown(
             label=f"{label}",
             choices=model_labels,
-            value=model_id2label[i % len_model_labels],
             interactive=True,
             visible=True,
         )
-        for i, label in enumerate(ds_labels[:MAX_LABELS])
     ]
     lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
     # TODO: Substitute 'text' with more features for zero-shot
     features = [
         gr.Dropdown(
@@ -122,11 +136,27 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label
     features += [
         gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
     ]
     return lables + features
-def check_model_and_show_prediction(
-    model_id, dataset_id, dataset_config, dataset_split
 ):
     ppl = check_model(model_id)
     if ppl is None or not isinstance(ppl, TextClassificationPipeline):
@@ -134,6 +164,8 @@ def check_model_and_show_prediction(
         return (
             gr.update(visible=False),
             gr.update(visible=False),
             *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
         )
@@ -147,6 +179,7 @@ def check_model_and_show_prediction(
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
             *dropdown_placement,
         )
     model_id2label = ppl.model.config.id2label
@@ -161,6 +194,7 @@ def check_model_and_show_prediction(
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
             *dropdown_placement,
         )
@@ -168,6 +202,7 @@ def check_model_and_show_prediction(
         ds_labels,
         ds_features,
         model_id2label,
     )
     # when labels or features are not aligned
@@ -180,6 +215,7 @@ def check_model_and_show_prediction(
             gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
             gr.update(visible=False),
             gr.update(visible=True, open=True),
             *column_mappings,
         )
@@ -190,13 +226,11 @@ def check_model_and_show_prediction(
         gr.update(value=get_styled_input(prediction_input), visible=True),
         gr.update(value=prediction_output, visible=True),
         gr.update(visible=True, open=False),
         *column_mappings,
     )
-def try_submit(m_id, d_id, config, split, local, uid):
-    all_mappings = read_column_mapping(uid)
     if all_mappings is None:
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
@@ -204,6 +238,8 @@ def try_submit(m_id, d_id, config, split, local, uid):
     if "labels" not in all_mappings.keys():
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     label_mapping = {}
     for i, label in zip(
         range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()
@@ -214,73 +250,88 @@ def try_submit(m_id, d_id, config, split, local, uid):
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     feature_mapping = all_mappings["features"]
     leaderboard_dataset = None
     if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
         leaderboard_dataset = "ZeroCommand/test-giskard-report"
     # TODO: Set column mapping for some dataset such as `amazon_polarity`
-    if local:
-        command = [
-            "giskard_scanner",
-            "--loader",
-            "huggingface",
-            "--model",
-            m_id,
-            "--dataset",
-            d_id,
-            "--dataset_config",
-            config,
-            "--dataset_split",
-            split,
-            "--hf_token",
-            os.environ.get(HF_WRITE_TOKEN),
-            "--discussion_repo",
-            os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
-            "--output_format",
-            "markdown",
-            "--output_portal",
-            "huggingface",
-            "--feature_mapping",
-            json.dumps(feature_mapping),
-            "--label_mapping",
-            json.dumps(label_mapping),
-            "--scan_config",
-            get_yaml_path(uid),
-            "--leaderboard_dataset",
-            leaderboard_dataset,
-        ]
-        if os.environ.get(HF_GSK_HUB_KEY):
-            command.append("--giskard_hub_api_key")
-            command.append(os.environ.get(HF_GSK_HUB_KEY))
-            if os.environ.get(HF_GSK_HUB_URL):
-                command.append("--giskard_hub_url")
-                command.append(os.environ.get(HF_GSK_HUB_URL))
-            if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
-                command.append("--giskard_hub_project_key")
-                command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
-            if os.environ.get(HF_GSK_HUB_HF_TOKEN):
-                command.append("--giskard_hub_hf_token")
-                command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
-            if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
-                command.append("--giskard_hub_unlock_token")
-                command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
-        eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
-        logging.info(f"Start local evaluation on {eval_str}")
-        save_job_to_pipe(uid, command, eval_str, threading.Lock())
-        write_log_to_user_file(
-            uid,
-            f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
-        )
-        gr.Info(f"Start local evaluation on {eval_str}")
-        return (
-            gr.update(interactive=False),
-            gr.update(lines=5, visible=True, interactive=False),
-        )
-    else:
-        gr.Info("TODO: Submit task to an endpoint")
-    return (gr.update(interactive=True), gr.update(visible=False))  # Submit button

 from wordings import get_styled_input
 from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
+                      write_column_mapping,
                       write_log_to_user_file)
 from text_classification import (check_model, get_example_prediction,
                                  get_labels_and_features_from_dataset)
                       CONFIRM_MAPPING_DETAILS_FAIL_RAW,
                       MAPPING_STYLED_ERROR_WARNING)
+MAX_LABELS = 40
 MAX_FEATURES = 20
 HF_REPO_ID = "HF_REPO_ID"
         pass
+def select_run_mode(run_inf):
     if run_inf:
         return (gr.update(visible=True), gr.update(value=False))
     else:
         return (gr.update(visible=False), gr.update(value=True))
 def write_column_mapping_to_config(
+    uid, *labels
 ):
     # TODO: Substitute 'text' with more features for zero-shot
     # we are not using ds features because we only support "text" for now
+    all_mappings = read_column_mapping(uid)
     if labels is None:
         return
+    all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
+    all_mappings = export_mappings(all_mappings, "features", ["text"], labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)])
     write_column_mapping(all_mappings, uid)
+def export_mappings(all_mappings, key, subkeys, values):
+    if key not in all_mappings.keys():
+        all_mappings[key] = dict()
+    if subkeys is None:
+        subkeys = list(all_mappings[key].keys())
+    if not subkeys:
+        logging.debug(f"subkeys is empty for {key}")
+        return all_mappings
+    for i, subkey in enumerate(subkeys):
+        if subkey:
+            all_mappings[key][subkey] = values[i % len(values)]
+    return all_mappings
+def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label, uid):
     model_labels = list(model_id2label.values())
+    all_mappings = read_column_mapping(uid)
+    # For flattened raw datasets with no labels
+    # check if there are shared labels between model and dataset
+    shared_labels = set(model_labels).intersection(set(ds_labels))
+    if shared_labels:
+        ds_labels = list(shared_labels)
+    if len(ds_labels) > MAX_LABELS:
+        ds_labels = ds_labels[:MAX_LABELS]
+        gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
+    ds_labels.sort()
+    model_labels.sort()
     lables = [
         gr.Dropdown(
             label=f"{label}",
             choices=model_labels,
+            value=model_id2label[i % len(model_labels)],
             interactive=True,
             visible=True,
         )
+        for i, label in enumerate(ds_labels)
     ]
     lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
+    all_mappings = export_mappings(all_mappings, "labels", ds_labels, model_labels)
     # TODO: Substitute 'text' with more features for zero-shot
     features = [
         gr.Dropdown(
     features += [
         gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
     ]
+    all_mappings = export_mappings(all_mappings, "features", ["text"], ds_features)
+    write_column_mapping(all_mappings, uid)
     return lables + features
+def precheck_model_ds_enable_example_btn(model_id, dataset_id, dataset_config, dataset_split):
+    ppl = check_model(model_id)
+    if ppl is None or not isinstance(ppl, TextClassificationPipeline):
+        gr.Warning("Please check your model.")
+        return gr.update(interactive=False)
+    ds_labels, ds_features = get_labels_and_features_from_dataset(
+        dataset_id, dataset_config, dataset_split
+    )
+    if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
+        gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
+        return gr.update(interactive=False)
+    return gr.update(interactive=True)
+def align_columns_and_show_prediction(
+    model_id, dataset_id, dataset_config, dataset_split, uid
 ):
     ppl = check_model(model_id)
     if ppl is None or not isinstance(ppl, TextClassificationPipeline):
         return (
             gr.update(visible=False),
             gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            gr.update(interactive=False),
             *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
         )
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
+            gr.update(interactive=False),
             *dropdown_placement,
         )
     model_id2label = ppl.model.config.id2label
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False, open=False),
+            gr.update(interactive=False),
             *dropdown_placement,
         )
         ds_labels,
         ds_features,
         model_id2label,
+        uid,
     )
     # when labels or features are not aligned
             gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
             gr.update(visible=False),
             gr.update(visible=True, open=True),
+            gr.update(interactive=True),
             *column_mappings,
         )
         gr.update(value=get_styled_input(prediction_input), visible=True),
         gr.update(value=prediction_output, visible=True),
         gr.update(visible=True, open=False),
+        gr.update(interactive=True),
         *column_mappings,
     )
+def check_column_mapping_keys_validity(all_mappings):
     if all_mappings is None:
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     if "labels" not in all_mappings.keys():
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
+def construct_label_and_feature_mapping(all_mappings):
     label_mapping = {}
     for i, label in zip(
         range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     feature_mapping = all_mappings["features"]
+    return label_mapping, feature_mapping
+def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid):
+    all_mappings = read_column_mapping(uid)
+    check_column_mapping_keys_validity(all_mappings)
+    label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings)
     leaderboard_dataset = None
     if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
         leaderboard_dataset = "ZeroCommand/test-giskard-report"
+    if local:
+        inference_type = "hf_pipeline"
+    if inference and inference_token:
+        inference_type = "hf_inference_api"
     # TODO: Set column mapping for some dataset such as `amazon_polarity`
+    command = [
+        "giskard_scanner",
+        "--loader",
+        "huggingface",
+        "--model",
+        m_id,
+        "--dataset",
+        d_id,
+        "--dataset_config",
+        config,
+        "--dataset_split",
+        split,
+        "--hf_token",
+        os.environ.get(HF_WRITE_TOKEN),
+        "--discussion_repo",
+        os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
+        "--output_format",
+        "markdown",
+        "--output_portal",
+        "huggingface",
+        "--feature_mapping",
+        json.dumps(feature_mapping),
+        "--label_mapping",
+        json.dumps(label_mapping),
+        "--scan_config",
+        get_yaml_path(uid),
+        "--leaderboard_dataset",
+        leaderboard_dataset,
+        "--inference_type",
+        inference_type,
+        "--inference_token",
+        inference_token,
+    ]
+    if os.environ.get(HF_GSK_HUB_KEY):
+        command.append("--giskard_hub_api_key")
+        command.append(os.environ.get(HF_GSK_HUB_KEY))
+        if os.environ.get(HF_GSK_HUB_URL):
+            command.append("--giskard_hub_url")
+            command.append(os.environ.get(HF_GSK_HUB_URL))
+        if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
+            command.append("--giskard_hub_project_key")
+            command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
+        if os.environ.get(HF_GSK_HUB_HF_TOKEN):
+            command.append("--giskard_hub_hf_token")
+            command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
+        if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
+            command.append("--giskard_hub_unlock_token")
+            command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
+    eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
+    logging.info(f"Start local evaluation on {eval_str}")
+    save_job_to_pipe(uid, command, eval_str, threading.Lock())
+    print(command)
+    write_log_to_user_file(
+        uid,
+        f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
+    )
+    gr.Info(f"Start local evaluation on {eval_str}")
+    return (
+        gr.update(interactive=False),
+        gr.update(lines=5, visible=True, interactive=False),
+    )
+    # TODO: Submit task to an endpoint")
+    # return (gr.update(interactive=True), gr.update(visible=False))  # Submit button