giskard-evaluator

Running

App Files Files Community

200

ZeroCommand commited on Jan 2, 2024

Commit

f04482d

1 Parent(s): be473e6

update log area

Browse files

Files changed (6) hide show

app.py +23 -8
app_text_classification.py +12 -179
io_utils.py +44 -1
run_jobs.py +29 -0
text_classification_ui_helpers.py +184 -0
tmp/pipe +0 -0

app.py CHANGED Viewed

@@ -3,15 +3,30 @@
 # from pathlib import Path
 import gradio as gr
 from app_text_classification import get_demo as get_demo_text_classification
 from app_leaderboard import get_demo as get_demo_leaderboard
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
-    with gr.Tab("Text Classification"):
-        get_demo_text_classification()
-    with gr.Tab("Leaderboard"):
-        get_demo_leaderboard()
-demo.queue(max_size=100)
-demo.launch(share=False)

 # from pathlib import Path
 import gradio as gr
+import atexit
 from app_text_classification import get_demo as get_demo_text_classification
 from app_leaderboard import get_demo as get_demo_leaderboard
+from run_jobs import start_process_run_job, stop_thread
+import threading
+if threading.current_thread() is not threading.main_thread():
+    t = threading.current_thread()
+    print(t.do_run)
+try:
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
+        with gr.Tab("Text Classification"):
+            get_demo_text_classification()
+        with gr.Tab("Leaderboard"):
+            get_demo_leaderboard()
+    start_process_run_job()
+    demo.queue(max_size=100)
+    demo.launch(share=False)
+    atexit.register(stop_thread)
+except Exception:
+    print("stop background thread")
+    stop_thread()

app_text_classification.py CHANGED Viewed

@@ -1,22 +1,7 @@
 import gradio as gr
-import datasets
-import os
-import time
-import subprocess
-import logging
-import collections
-import json
-from transformers.pipelines import TextClassificationPipeline
-from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
-from io_utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type
-from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
-HF_REPO_ID = 'HF_REPO_ID'
-HF_SPACE_ID = 'SPACE_ID'
-HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
 MAX_LABELS = 20
 MAX_FEATURES = 20
@@ -25,75 +10,6 @@ EXAMPLE_MODEL_ID = 'cardiffnlp/twitter-roberta-base-sentiment-latest'
 EXAMPLE_DATA_ID = 'tweet_eval'
 CONFIG_PATH='./config.yaml'
-def try_submit(m_id, d_id, config, split, local):
-    all_mappings = read_column_mapping(CONFIG_PATH)
-    if "labels" not in all_mappings.keys():
-        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-        return gr.update(interactive=True)
-    label_mapping = all_mappings["labels"]
-    if "features" not in all_mappings.keys():
-        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-        return gr.update(interactive=True)
-    feature_mapping = all_mappings["features"]
-    # TODO: Set column mapping for some dataset such as `amazon_polarity`
-    if local:
-        command = [
-            "python",
-            "cli.py",
-            "--loader", "huggingface",
-            "--model", m_id,
-            "--dataset", d_id,
-            "--dataset_config", config,
-            "--dataset_split", split,
-            "--hf_token", os.environ.get(HF_WRITE_TOKEN),
-            "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
-            "--output_format", "markdown",
-            "--output_portal", "huggingface",
-            "--feature_mapping", json.dumps(feature_mapping),
-            "--label_mapping", json.dumps(label_mapping),
-            "--scan_config", "../config.yaml",
-        ]
-        eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
-        start = time.time()
-        logging.info(f"Start local evaluation on {eval_str}")
-        evaluator = subprocess.Popen(
-            command,
-            cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
-            stderr=subprocess.STDOUT,
-        )
-        result = evaluator.wait()
-        logging.info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
-        gr.Info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
-    else:
-        gr.Info("TODO: Submit task to an endpoint")
-    return gr.update(interactive=True)  # Submit button
-def check_dataset_and_get_config(dataset_id):
-    try:
-        configs = datasets.get_dataset_config_names(dataset_id)
-        return gr.Dropdown(configs, value=configs[0], visible=True)
-    except Exception:
-        # Dataset may not exist
-        pass
-def check_dataset_and_get_split(dataset_id, dataset_config):
-    try:
-        splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
-        return gr.Dropdown(splits, value=splits[0], visible=True)
-    except Exception:
-        # Dataset may not exist
-        # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
-        pass
 def get_demo():
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
@@ -147,102 +63,18 @@ def get_demo():
             interactive=True,
             size="lg",
         )
-    @gr.on(triggers=[label.change for label in column_mappings],
            inputs=[dataset_id_input, dataset_config_input, dataset_split_input, *column_mappings])
-    def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
-        ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
-        if labels is None:
-            return
-        labels = [*labels]
-        all_mappings = read_column_mapping(CONFIG_PATH)
-        if "labels" not in all_mappings.keys():
-            all_mappings["labels"] = dict()
-        for i, label in enumerate(labels[:MAX_LABELS]):
-            if label:
-                all_mappings["labels"][label] = ds_labels[i]
-        if "features" not in all_mappings.keys():
-            all_mappings["features"] = dict()
-        for i, feat in enumerate(labels[MAX_LABELS:(MAX_LABELS + MAX_FEATURES)]):
-            if feat:
-                all_mappings["features"][feat] = ds_features[i]
-        write_column_mapping(all_mappings)
-    def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
-        model_labels = list(model_id2label.values())
-        lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
-        lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
-        # TODO: Substitute 'text' with more features for zero-shot
-        features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
-        features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
-        return lables + features
-    @gr.on(triggers=[model_id_input.change, dataset_config_input.change])
-    def clear_column_mapping_config():
-        write_column_mapping(None)
-    @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
         outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
-    def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
-        ppl = check_model(model_id)
-        if ppl is None or not isinstance(ppl, TextClassificationPipeline):
-            gr.Warning("Please check your model.")
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
-            )
-        dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
-        if ppl is None: # pipeline not found
-            gr.Warning("Model not found")
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False, open=False),
-                *dropdown_placement
-            )
-        model_id2label = ppl.model.config.id2label
-        ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
-        # when dataset does not have labels or features
-        if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
-            gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False, open=False),
-                *dropdown_placement
-            )
-        column_mappings = list_labels_and_features_from_dataset(
-            ds_labels,
-            ds_features,
-            model_id2label,
-        )
-        # when labels or features are not aligned
-        # show manually column mapping
-        if collections.Counter(model_id2label.items()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
-            gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=True, open=True),
-                *column_mappings
-            )
-        prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
-        return (
-            gr.update(value=prediction_input, visible=True),
-            gr.update(value=prediction_output, visible=True),
-            gr.update(visible=True, open=False),
-            *column_mappings
-        )
     dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
@@ -267,4 +99,5 @@ def get_demo():
             ],
         fn=try_submit,
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, run_local],
-        outputs=[run_btn])

 import gradio as gr
+from io_utils import read_scanners, write_scanners, read_inference_type, write_inference_type
+from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
+from text_classification_ui_helpers import try_submit, check_dataset_and_get_config, check_dataset_and_get_split, check_model_and_show_prediction, write_column_mapping_to_config
 MAX_LABELS = 20
 MAX_FEATURES = 20
 EXAMPLE_DATA_ID = 'tweet_eval'
 CONFIG_PATH='./config.yaml'
 def get_demo():
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
             interactive=True,
             size="lg",
         )
+    with gr.Row():
+        logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
+    gr.on(triggers=[label.change for label in column_mappings],
+          fn=write_column_mapping_to_config,
            inputs=[dataset_id_input, dataset_config_input, dataset_split_input, *column_mappings])
+    gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
+        fn=check_model_and_show_prediction,
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
         outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
     dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
             ],
         fn=try_submit,
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, run_local],
+        outputs=[run_btn, logs])

io_utils.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import yaml
 YAML_PATH = "./config.yaml"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
@@ -56,7 +59,9 @@ def read_column_mapping(path):
 def write_column_mapping(mapping):
     with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
-    if mapping is None:
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
@@ -71,3 +76,41 @@ def convert_column_mapping_to_json(df, label=""):
     for _, row in df.iterrows():
         column_mapping[label].append(row.tolist())
     return column_mapping

 import yaml
+import subprocess
+import os
 YAML_PATH = "./config.yaml"
+PIPE_PATH = "./tmp/pipe"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
 def write_column_mapping(mapping):
     with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
+    if config is None:
+        return
+    if mapping is None and "column_mapping" in config.keys():
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
     for _, row in df.iterrows():
         column_mapping[label].append(row.tolist())
     return column_mapping
+def write_log_to_user_file(id, log):
+    with open(f"./tmp/{id}_log", "a") as f:
+        f.write(log)
+def save_job_to_pipe(id, job, lock):
+    if not os.path.exists('./tmp'):
+        os.makedirs('./tmp')
+    job = [str(i) for i in job]
+    job = ",".join(job)
+    print(job)
+    with lock:
+        with open(PIPE_PATH, "a") as f:
+            # write each element in job
+            f.write(f'{id}@{job}\n')
+def pop_job_from_pipe():
+    if not os.path.exists(PIPE_PATH):
+        return
+    with open(PIPE_PATH, "r+") as f:
+        jobs = f.readlines()
+        f.write("\n".join(jobs[1:]))
+        f.close()
+    if len(jobs) == 0:
+        return
+    job_info = jobs[0].split('\n')[0].split("@")
+    if len(job_info) != 2:
+        raise ValueError("Invalid job info: ", job_info)
+    print(f"Running job {job_info}")
+    command = job_info[1].split(",")
+    print(command)
+    log_file = open(f"./tmp/{job_info[0]}_log", "w")
+    subprocess.Popen(
+        command,
+        cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
+        stdout=log_file,
+        stderr=log_file,
+    )

run_jobs.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from io_utils import pop_job_from_pipe
+import time
+import threading
+def start_process_run_job():
+    try:
+        print("Running jobs in thread")
+        global thread
+        thread = threading.Thread(target=run_job)
+        thread.daemon = True
+        thread.do_run = True
+        thread.start()
+    except Exception as e:
+        print("Failed to start thread: ", e)
+def stop_thread():
+    print("Stop thread")
+    thread.do_run = False
+def run_job():
+    while True:
+        print(thread.do_run)
+        try:
+            pop_job_from_pipe()
+            time.sleep(10)
+        except KeyboardInterrupt:
+            print("KeyboardInterrupt stop background thread")
+            stop_thread()
+            break

text_classification_ui_helpers.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import gradio as gr
+from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW
+import json
+import os
+import logging
+import uuid
+import threading
+from io_utils import read_column_mapping, write_column_mapping, save_job_to_pipe, write_log_to_user_file
+import datasets
+import collections
+from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
+from transformers.pipelines import TextClassificationPipeline
+MAX_LABELS = 20
+MAX_FEATURES = 20
+HF_REPO_ID = 'HF_REPO_ID'
+HF_SPACE_ID = 'SPACE_ID'
+HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
+CONFIG_PATH = "./config.yaml"
+def check_dataset_and_get_config(dataset_id):
+    try:
+        write_column_mapping(None)
+        configs = datasets.get_dataset_config_names(dataset_id)
+        return gr.Dropdown(configs, value=configs[0], visible=True)
+    except Exception:
+        # Dataset may not exist
+        pass
+def check_dataset_and_get_split(dataset_id, dataset_config):
+    try:
+        splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
+        return gr.Dropdown(splits, value=splits[0], visible=True)
+    except Exception:
+        # Dataset may not exist
+        # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
+        pass
+def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
+    ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
+    if labels is None:
+        return
+    labels = [*labels]
+    all_mappings = read_column_mapping(CONFIG_PATH)
+    if "labels" not in all_mappings.keys():
+        all_mappings["labels"] = dict()
+    for i, label in enumerate(labels[:MAX_LABELS]):
+        if label:
+            all_mappings["labels"][label] = ds_labels[i]
+    if "features" not in all_mappings.keys():
+        all_mappings["features"] = dict()
+    for i, feat in enumerate(labels[MAX_LABELS:(MAX_LABELS + MAX_FEATURES)]):
+        if feat:
+            all_mappings["features"][feat] = ds_features[i]
+    write_column_mapping(all_mappings)
+def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
+    model_labels = list(model_id2label.values())
+    lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
+    lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
+    # TODO: Substitute 'text' with more features for zero-shot
+    features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
+    features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
+    return lables + features
+def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
+    ppl = check_model(model_id)
+    if ppl is None or not isinstance(ppl, TextClassificationPipeline):
+        gr.Warning("Please check your model.")
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
+        )
+    dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
+    if ppl is None: # pipeline not found
+        gr.Warning("Model not found")
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            *dropdown_placement
+        )
+    model_id2label = ppl.model.config.id2label
+    ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
+    # when dataset does not have labels or features
+    if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            *dropdown_placement
+        )
+    column_mappings = list_labels_and_features_from_dataset(
+        ds_labels,
+        ds_features,
+        model_id2label,
+    )
+    # when labels or features are not aligned
+    # show manually column mapping
+    if collections.Counter(model_id2label.values()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=True, open=True),
+            *column_mappings
+        )
+    prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
+    return (
+        gr.update(value=prediction_input, visible=True),
+        gr.update(value=prediction_output, visible=True),
+        gr.update(visible=True, open=False),
+        *column_mappings
+    )
+def get_logs_file(uid):
+    file = open(f"./tmp/{uid}_log")
+    contents = file.readlines()
+    file.close()
+    return '\n'.join(contents)
+def try_submit(m_id, d_id, config, split, local):
+    all_mappings = read_column_mapping(CONFIG_PATH)
+    if all_mappings is None:
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return gr.update(interactive=True)
+    if "labels" not in all_mappings.keys():
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return gr.update(interactive=True)
+    label_mapping = all_mappings["labels"]
+    if "features" not in all_mappings.keys():
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return gr.update(interactive=True)
+    feature_mapping = all_mappings["features"]
+    # TODO: Set column mapping for some dataset such as `amazon_polarity`
+    if local:
+        command = [
+            "python",
+            "cli.py",
+            "--loader", "huggingface",
+            "--model", m_id,
+            "--dataset", d_id,
+            "--dataset_config", config,
+            "--dataset_split", split,
+            "--hf_token", os.environ.get(HF_WRITE_TOKEN),
+            "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
+            "--output_format", "markdown",
+            "--output_portal", "huggingface",
+            "--feature_mapping", json.dumps(feature_mapping),
+            "--label_mapping", json.dumps(label_mapping),
+            "--scan_config", "../config.yaml",
+        ]
+        eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
+        logging.info(f"Start local evaluation on {eval_str}")
+        uid = uuid.uuid4()
+        save_job_to_pipe(uid, command, threading.Lock())
+        write_log_to_user_file(uid, f"Start local evaluation on {eval_str}. Please wait for your job to start...\n")
+        gr.Info(f"Start local evaluation on {eval_str}")
+        return (
+            gr.update(interactive=False),
+            gr.update(value=get_logs_file(uid), visible=True, interactive=False))
+    else:
+        gr.Info("TODO: Submit task to an endpoint")
+    return (gr.update(interactive=True),  # Submit button
+            gr.update(visible=False))

tmp/pipe ADDED Viewed

The diff for this file is too large to render. See raw diff