giskard-evaluator

Sleeping

App Files Files Community

200

inoki-giskard commited on Jan 20

Commit

4641c89

•

2 Parent(s): 35be7f4 ed3fe33

Merge branch 'feature/gsk-2457-secure-scanner-running-based-on-virtualenv' into giskard-main

Browse files

Files changed (8) hide show

app_env.py +9 -0
app_leaderboard.py +1 -2
io_utils.py +9 -30
isolated_env.py +34 -0
leaderboard.py +3 -1
run_jobs.py +148 -1
text_classification_ui_helpers.py +26 -99
tmp/venvs/.gitkeep +0 -0

app_env.py ADDED Viewed

	@@ -0,0 +1,9 @@

+HF_REPO_ID = "HF_REPO_ID"
+HF_SPACE_ID = "SPACE_ID"
+HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
+HF_GSK_HUB_URL = "GSK_HUB_URL"
+HF_GSK_HUB_PROJECT_KEY = "GSK_HUB_PROJECT_KEY"
+HF_GSK_HUB_KEY = "GSK_API_KEY"
+HF_GSK_HUB_HF_TOKEN = "GSK_HF_TOKEN"
+HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"

app_leaderboard.py CHANGED Viewed

@@ -5,7 +5,6 @@ import gradio as gr
 from fetch_utils import (check_dataset_and_get_config,
                          check_dataset_and_get_split)
-from text_classification_ui_helpers import LEADERBOARD
 import leaderboard
@@ -75,7 +74,7 @@ def get_display_df(df):
 def get_demo():
-    leaderboard.records = get_records_from_dataset_repo(LEADERBOARD)
     records = leaderboard.records
     model_ids = get_model_ids(records)

 from fetch_utils import (check_dataset_and_get_config,
                          check_dataset_and_get_split)
 import leaderboard
 def get_demo():
+    leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
     records = leaderboard.records
     model_ids = get_model_ids(records)

io_utils.py CHANGED Viewed

@@ -1,11 +1,7 @@
 import os
-from pathlib import Path
-import subprocess
 import yaml
-import pipe
 YAML_PATH = "./cicd/configs"
 LOG_FILE = "temp_log"
@@ -104,6 +100,15 @@ def convert_column_mapping_to_json(df, label=""):
     return column_mapping
 def get_logs_file():
     try:
         with open(LOG_FILE, "r") as file:
@@ -115,29 +120,3 @@ def get_logs_file():
 def write_log_to_user_file(task_id, log):
     with open(f"./tmp/{task_id}.log", "a") as f:
         f.write(log)
-def save_job_to_pipe(task_id, job, description, lock):
-    with lock:
-        pipe.jobs.append((task_id, job, description))
-def pop_job_from_pipe():
-    if len(pipe.jobs) == 0:
-        return
-    job_info = pipe.jobs.pop()
-    pipe.current = job_info[2]
-    task_id = job_info[0]
-    write_log_to_user_file(task_id, f"Running job id {task_id}\n")
-    command = job_info[1]
-    # Link to LOG_FILE
-    log_file_path = Path(LOG_FILE)
-    if log_file_path.exists():
-        log_file_path.unlink()
-    os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
-    with open(f"./tmp/{task_id}.log", "a") as log_file:
-        p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
-        p.wait()
-    pipe.current = None

 import os
 import yaml
 YAML_PATH = "./cicd/configs"
 LOG_FILE = "temp_log"
     return column_mapping
+def get_log_file_with_uid(uid):
+    try:
+        print(f"Loading {uid}.log")
+        with open(f"./tmp/{uid}.log", "a") as file:
+            return file.read()
+    except Exception:
+        return "Log file does not exist"
 def get_logs_file():
     try:
         with open(LOG_FILE, "r") as file:
 def write_log_to_user_file(task_id, log):
     with open(f"./tmp/{task_id}.log", "a") as f:
         f.write(log)

isolated_env.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import subprocess
+from io_utils import write_log_to_user_file
+def prepare_venv(execution_id, deps):
+    python_executable = "python"
+    venv_base = f"tmp/venvs/{execution_id}"
+    pip_executable = os.path.join(venv_base, "bin", "pip")
+    # Check pyver
+    write_log_to_user_file(execution_id, "Checking Python version\n")
+    p = subprocess.run([python_executable, "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    write_log_to_user_file(execution_id, p.stdout.decode())
+    if p.returncode != 0:
+        raise RuntimeError(f"{p.args} ended with {p.returncode}")
+    # Create venv
+    write_log_to_user_file(execution_id, "Creating virtual environment\n")
+    p = subprocess.run([python_executable, "-m", "venv", venv_base, "--clear"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    write_log_to_user_file(execution_id, p.stdout.decode())
+    if p.returncode != 0:
+        raise RuntimeError(f"{p.args} ended with {p.returncode}")
+    # Output requirements.txt
+    requirement_file = os.path.join(venv_base, "requirements.txt")
+    with open(requirement_file, "w") as f:
+        f.writelines(deps)
+    # Install deps
+    write_log_to_user_file(execution_id, "Installing dependencies\n")
+    p = subprocess.run([pip_executable, "install", "-r", requirement_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    write_log_to_user_file(execution_id, p.stdout.decode())
+    if p.returncode != 0:
+        raise RuntimeError(f"{p.args} ended with {p.returncode}")
+    return os.path.join(venv_base, "bin", "giskard_scanner")

leaderboard.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import pandas as pd
-records = pd.DataFrame()

 import pandas as pd
+records = pd.DataFrame()
+LEADERBOARD = "giskard-bot/evaluator-leaderboard"

run_jobs.py CHANGED Viewed

@@ -1,11 +1,30 @@
 import logging
 import threading
 import time
-from io_utils import pop_job_from_pipe
 is_running = False
 def start_process_run_job():
     try:
@@ -26,6 +45,134 @@ def stop_thread():
     is_running = False
 def run_job():
     global is_running
     while is_running:

+import json
 import logging
+import os
+import subprocess
 import threading
 import time
+from pathlib import Path
+import pipe
+from app_env import (
+    HF_GSK_HUB_HF_TOKEN,
+    HF_GSK_HUB_KEY,
+    HF_GSK_HUB_PROJECT_KEY,
+    HF_GSK_HUB_UNLOCK_TOKEN,
+    HF_GSK_HUB_URL,
+    HF_REPO_ID,
+    HF_SPACE_ID,
+    HF_WRITE_TOKEN,
+)
+from io_utils import LOG_FILE, get_yaml_path, write_log_to_user_file
+from isolated_env import prepare_venv
+from leaderboard import LEADERBOARD
 is_running = False
+logger = logging.getLogger(__file__)
 def start_process_run_job():
     try:
     is_running = False
+def prepare_env_and_get_command(
+    m_id,
+    d_id,
+    config,
+    split,
+    inference,
+    inference_token,
+    uid,
+    label_mapping,
+    feature_mapping,
+):
+    leaderboard_dataset = None
+    if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
+        leaderboard_dataset = LEADERBOARD
+    inference_type = "hf_pipeline"
+    if inference and inference_token:
+        inference_type = "hf_inference_api"
+    executable = "giskard_scanner"
+    try:
+        # Copy the current requirements (might be changed)
+        with open("requirements.txt", "r") as f:
+            executable = prepare_venv(
+                uid,
+                "\n".join(f.readlines()),
+            )
+        logger.info(f"Using {executable} as executable")
+    except Exception as e:
+        logger.warn(f"Create env failed due to {e}, using the current env as fallback.")
+        executable = "giskard_scanner"
+    command = [
+        executable,
+        "--loader",
+        "huggingface",
+        "--model",
+        m_id,
+        "--dataset",
+        d_id,
+        "--dataset_config",
+        config,
+        "--dataset_split",
+        split,
+        "--output_format",
+        "markdown",
+        "--output_portal",
+        "huggingface",
+        "--feature_mapping",
+        json.dumps(feature_mapping),
+        "--label_mapping",
+        json.dumps(label_mapping),
+        "--scan_config",
+        get_yaml_path(uid),
+        "--inference_type",
+        inference_type,
+        "--inference_api_token",
+        inference_token,
+    ]
+    # The token to publish post
+    if os.environ.get(HF_WRITE_TOKEN):
+        command.append("--hf_token")
+        command.append(os.environ.get(HF_WRITE_TOKEN))
+    # The repo to publish post
+    if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID):
+        command.append("--discussion_repo")
+        # TODO: Replace by the model id
+        command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID))
+    # The repo to publish for ranking
+    if leaderboard_dataset:
+        command.append("--leaderboard_dataset")
+        command.append(leaderboard_dataset)
+    # The info to upload to Giskard hub
+    if os.environ.get(HF_GSK_HUB_KEY):
+        command.append("--giskard_hub_api_key")
+        command.append(os.environ.get(HF_GSK_HUB_KEY))
+        if os.environ.get(HF_GSK_HUB_URL):
+            command.append("--giskard_hub_url")
+            command.append(os.environ.get(HF_GSK_HUB_URL))
+        if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
+            command.append("--giskard_hub_project_key")
+            command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
+        if os.environ.get(HF_GSK_HUB_HF_TOKEN):
+            command.append("--giskard_hub_hf_token")
+            command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
+        if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
+            command.append("--giskard_hub_unlock_token")
+            command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
+    eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
+    write_log_to_user_file(
+        uid,
+        f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
+    )
+    return command
+def save_job_to_pipe(task_id, job, description, lock):
+    with lock:
+        pipe.jobs.append((task_id, job, description))
+def pop_job_from_pipe():
+    if len(pipe.jobs) == 0:
+        return
+    job_info = pipe.jobs.pop()
+    pipe.current = job_info[2]
+    task_id = job_info[0]
+    write_log_to_user_file(task_id, f"Running job id {task_id}\n")
+    command = prepare_env_and_get_command(*job_info[1])
+    # Link to LOG_FILE
+    log_file_path = Path(LOG_FILE)
+    if log_file_path.exists():
+        log_file_path.unlink()
+    os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
+    with open(f"./tmp/{task_id}.log", "a") as log_file:
+        p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
+        p.wait()
+    pipe.current = None
 def run_job():
     global is_running
     while is_running:

text_classification_ui_helpers.py CHANGED Viewed

@@ -1,22 +1,15 @@
 import collections
-import json
 import logging
-import os
 import threading
 import uuid
-import leaderboard
 import datasets
 import gradio as gr
 import pandas as pd
-from io_utils import (
-    get_yaml_path,
-    read_column_mapping,
-    save_job_to_pipe,
-    write_column_mapping,
-    write_log_to_user_file,
-)
 from text_classification import (
     check_model_task,
     get_example_prediction,
@@ -32,21 +25,10 @@ from wordings import (
 MAX_LABELS = 40
 MAX_FEATURES = 20
-HF_REPO_ID = "HF_REPO_ID"
-HF_SPACE_ID = "SPACE_ID"
-HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
-HF_GSK_HUB_URL = "GSK_HUB_URL"
-HF_GSK_HUB_PROJECT_KEY = "GSK_HUB_PROJECT_KEY"
-HF_GSK_HUB_KEY = "GSK_API_KEY"
-HF_GSK_HUB_HF_TOKEN = "GSK_HF_TOKEN"
-HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
-LEADERBOARD = "giskard-bot/evaluator-leaderboard"
-global ds_dict, ds_config
 ds_dict = None
 ds_config = None
 def get_related_datasets_from_leaderboard(model_id):
     records = leaderboard.records
     model_records = records[records["model_id"] == model_id]
@@ -203,7 +185,13 @@ def precheck_model_ds_enable_example_btn(
 def align_columns_and_show_prediction(
-    model_id, dataset_id, dataset_config, dataset_split, uid, run_inference, inference_token
 ):
     model_task = check_model_task(model_id)
     if model_task is None or model_task != "text-classification":
@@ -303,85 +291,24 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
     check_column_mapping_keys_validity(all_mappings)
     label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings)
-    leaderboard_dataset = None
-    if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
-        leaderboard_dataset = LEADERBOARD
-    if inference:
-        inference_type = "hf_inference_api"
-    # TODO: Set column mapping for some dataset such as `amazon_polarity`
-    command = [
-        "giskard_scanner",
-        "--loader",
-        "huggingface",
-        "--model",
-        m_id,
-        "--dataset",
-        d_id,
-        "--dataset_config",
-        config,
-        "--dataset_split",
-        split,
-        "--output_format",
-        "markdown",
-        "--output_portal",
-        "huggingface",
-        "--feature_mapping",
-        json.dumps(feature_mapping),
-        "--label_mapping",
-        json.dumps(label_mapping),
-        "--scan_config",
-        get_yaml_path(uid),
-        "--inference_type",
-        inference_type,
-        "--inference_api_token",
-        inference_token,
-    ]
-    # The token to publish post
-    if os.environ.get(HF_WRITE_TOKEN):
-        command.append("--hf_token")
-        command.append(os.environ.get(HF_WRITE_TOKEN))
-    # The repo to publish post
-    if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID):
-        command.append("--discussion_repo")
-        # TODO: Replace by the model id
-        command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID))
-    # The repo to publish for ranking
-    if leaderboard_dataset:
-        command.append("--leaderboard_dataset")
-        command.append(leaderboard_dataset)
-    # The info to upload to Giskard hub
-    if os.environ.get(HF_GSK_HUB_KEY):
-        command.append("--giskard_hub_api_key")
-        command.append(os.environ.get(HF_GSK_HUB_KEY))
-        if os.environ.get(HF_GSK_HUB_URL):
-            command.append("--giskard_hub_url")
-            command.append(os.environ.get(HF_GSK_HUB_URL))
-        if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
-            command.append("--giskard_hub_project_key")
-            command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
-        if os.environ.get(HF_GSK_HUB_HF_TOKEN):
-            command.append("--giskard_hub_hf_token")
-            command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
-        if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
-            command.append("--giskard_hub_unlock_token")
-            command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
     eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
-    logging.info(f"Start local evaluation on {eval_str}")
-    save_job_to_pipe(uid, command, eval_str, threading.Lock())
-    write_log_to_user_file(
         uid,
-        f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
     )
-    gr.Info(f"Start local evaluation on {eval_str}")
     return (
         gr.update(interactive=False),  # Submit button

 import collections
 import logging
 import threading
 import uuid
 import datasets
 import gradio as gr
 import pandas as pd
+import leaderboard
+from io_utils import read_column_mapping, write_column_mapping
+from run_jobs import save_job_to_pipe
 from text_classification import (
     check_model_task,
     get_example_prediction,
 MAX_LABELS = 40
 MAX_FEATURES = 20
 ds_dict = None
 ds_config = None
 def get_related_datasets_from_leaderboard(model_id):
     records = leaderboard.records
     model_records = records[records["model_id"] == model_id]
 def align_columns_and_show_prediction(
+    model_id,
+    dataset_id,
+    dataset_config,
+    dataset_split,
+    uid,
+    run_inference,
+    inference_token,
 ):
     model_task = check_model_task(model_id)
     if model_task is None or model_task != "text-classification":
     check_column_mapping_keys_validity(all_mappings)
     label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings)
     eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
+    save_job_to_pipe(
         uid,
+        (
+            m_id,
+            d_id,
+            config,
+            split,
+            inference,
+            inference_token,
+            uid,
+            label_mapping,
+            feature_mapping,
+        ),
+        eval_str,
+        threading.Lock(),
     )
+    gr.Info("Your evaluation is submitted")
     return (
         gr.update(interactive=False),  # Submit button

tmp/venvs/.gitkeep ADDED Viewed

File without changes