fix debug section with files loading

#27
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  title: Giskard Evaluator
3
- emoji: 🐢🔍
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
  title: Giskard Evaluator
3
+ emoji: 🦀
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import atexit
2
- import traceback
3
 
4
  import gradio as gr
5
 
@@ -7,18 +6,15 @@ from app_debug import get_demo as get_demo_debug
7
  from app_leaderboard import get_demo as get_demo_leaderboard
8
  from app_text_classification import get_demo as get_demo_text_classification
9
  from run_jobs import start_process_run_job, stop_thread
10
- from wordings import TITLE_MD, FOOTER_HTML
11
 
12
  try:
13
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
14
- gr.Markdown(TITLE_MD)
15
  with gr.Tab("Text Classification"):
16
- get_demo_text_classification()
17
- with gr.Tab("Leaderboard") as leaderboard_tab:
18
- get_demo_leaderboard(leaderboard_tab)
19
  with gr.Tab("Logs(Debug)"):
20
- get_demo_debug()
21
- gr.HTML(FOOTER_HTML)
22
 
23
  start_process_run_job()
24
 
@@ -27,7 +23,5 @@ try:
27
  atexit.register(stop_thread)
28
 
29
  except Exception as e:
30
- # Print stack trace back
31
- print(traceback.format_exc())
32
  print("stop background thread: ", e)
33
  stop_thread()
 
1
  import atexit
 
2
 
3
  import gradio as gr
4
 
 
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
  from run_jobs import start_process_run_job, stop_thread
 
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
 
12
  with gr.Tab("Text Classification"):
13
+ get_demo_text_classification(demo)
14
+ with gr.Tab("Leaderboard"):
15
+ get_demo_leaderboard()
16
  with gr.Tab("Logs(Debug)"):
17
+ get_demo_debug(demo)
 
18
 
19
  start_process_run_job()
20
 
 
23
  atexit.register(stop_thread)
24
 
25
  except Exception as e:
 
 
26
  print("stop background thread: ", e)
27
  stop_thread()
app_debug.py CHANGED
@@ -1,20 +1,17 @@
1
  from os import listdir
2
  from os.path import isfile, join
3
- import html
4
 
5
  import gradio as gr
6
- import os
7
  import pipe
8
  from io_utils import get_logs_file
9
- from app_env import HF_WRITE_TOKEN
10
 
11
  LOG_PATH = "./tmp"
12
- CONFIG_PATH = "./cicd/configs/submitted/"
13
  MAX_FILES_NUM = 20
14
 
15
 
16
  def get_accordions_of_files(path, files):
17
- components = [None for _ in range(0, MAX_FILES_NUM)]
18
  for i in range(0, len(files)):
19
  if i >= MAX_FILES_NUM:
20
  break
@@ -38,94 +35,17 @@ def get_accordions_of_config_files():
38
  ]
39
  return get_accordions_of_files(CONFIG_PATH, config_files)
40
 
41
-
42
  def get_config_files():
43
- config_files = [
44
- join(CONFIG_PATH, f)
45
- for f in listdir(CONFIG_PATH)
46
- if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")
47
- ]
48
  return config_files
49
 
50
-
51
- def get_log_files():
52
- return [
53
- join(LOG_PATH, f)
54
- for f in listdir(LOG_PATH)
55
- if isfile(join(LOG_PATH, f)) and f.endswith("log")
56
- ]
57
-
58
-
59
- def get_jobs_info_in_queue():
60
- return [
61
- f"⌛️job id {html.escape(job[0])}: {html.escape(job[2])}<br/>"
62
- for job in pipe.jobs
63
- ]
64
-
65
-
66
- def get_queue_status():
67
- if len(pipe.jobs) > 0 or pipe.current is not None:
68
- current = pipe.current
69
- if current is None:
70
- current = "None"
71
- return f'<div style="padding-top: 5%">Current job: {html.escape(current)} <br/> Job queue: <br/> {"".join(get_jobs_info_in_queue())}</div>'
72
- else:
73
- return '<div style="padding-top: 5%">No jobs waiting, please submit an evaluation task from Text-Classification tab.</div>'
74
-
75
-
76
- def can_write_this_space(hf_token):
77
- # Only the user owning `HF_WRITE_TOKEN` is able to manage this space
78
- if hf_token == os.getenv(HF_WRITE_TOKEN, ""):
79
- return True
80
- return False
81
-
82
-
83
- def stop_current_job(hf_token):
84
- if not can_write_this_space(hf_token):
85
- gr.Warning(
86
- "You cannot stop the current job, "
87
- "because your token does not match `HF_WRITE_TOKEN` in this space."
88
- )
89
- return
90
-
91
- task_uuid = pipe.current
92
- if not task_uuid:
93
- gr.Warning("No job in progress")
94
- return
95
-
96
- # Interrupt and stop the task
97
- pipe.current = None
98
- gr.Info(f"Job {task_uuid} interrupted by admin.")
99
-
100
-
101
- def get_demo():
102
- if not os.path.exists(CONFIG_PATH):
103
- os.makedirs(CONFIG_PATH)
104
  with gr.Row():
105
- gr.HTML(
106
- value=get_queue_status,
107
- every=5,
108
- )
109
- with gr.Accordion(label="Admin", open=False):
110
- with gr.Row():
111
- hf_write_token_input = gr.Textbox(
112
- label="HF write token",
113
- type="password",
114
- placeholder="Please input HF_WRITE_TOKEN configured in the current space",
115
- )
116
- with gr.Row():
117
- stop_job_btn = gr.Button(value="Stop current job", variant="stop")
118
- stop_job_btn.click(stop_current_job, inputs=hf_write_token_input)
119
- with gr.Accordion(label="Log Files", open=True):
120
- with gr.Row():
121
- gr.Textbox(
122
- value=get_logs_file,
123
- every=0.5,
124
- lines=10,
125
- visible=True,
126
- label="Current Log File",
127
- )
128
- with gr.Row():
129
- gr.Files(value=get_log_files, label="Log Files", every=10)
130
  with gr.Accordion(label="Config Files", open=False):
131
  gr.Files(value=get_config_files, label="Config Files", every=10)
 
1
  from os import listdir
2
  from os.path import isfile, join
 
3
 
4
  import gradio as gr
 
5
  import pipe
6
  from io_utils import get_logs_file
 
7
 
8
  LOG_PATH = "./tmp"
9
+ CONFIG_PATH = "./cicd/configs/"
10
  MAX_FILES_NUM = 20
11
 
12
 
13
  def get_accordions_of_files(path, files):
14
+ components = [None for _ in range (0, MAX_FILES_NUM)]
15
  for i in range(0, len(files)):
16
  if i >= MAX_FILES_NUM:
17
  break
 
35
  ]
36
  return get_accordions_of_files(CONFIG_PATH, config_files)
37
 
 
38
  def get_config_files():
39
+ config_files = [join(CONFIG_PATH, f) for f in listdir(CONFIG_PATH) if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")]
 
 
 
 
40
  return config_files
41
 
42
+ def get_demo(demo):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  with gr.Row():
44
+ # check if jobs is an attribute of pipe
45
+ if hasattr(pipe, "jobs"):
46
+ gr.Markdown(f"current jobs in queue: {len(pipe.jobs)}")
47
+ with gr.Accordion(label="Log Files", open=False):
48
+ logs = gr.Textbox(lines=10, visible=True, label="Log File")
49
+ demo.load(get_logs_file, None, logs, every=0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  with gr.Accordion(label="Config Files", open=False):
51
  gr.Files(value=get_config_files, label="Config Files", every=10)
app_env.py DELETED
@@ -1,9 +0,0 @@
1
-
2
- HF_REPO_ID = "HF_REPO_ID"
3
- HF_SPACE_ID = "SPACE_ID"
4
- HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
5
- HF_GSK_HUB_URL = "GSK_HUB_URL"
6
- HF_GSK_HUB_PROJECT_KEY = "GSK_HUB_PROJECT_KEY"
7
- HF_GSK_HUB_KEY = "GSK_API_KEY"
8
- HF_GSK_HUB_HF_TOKEN = "GSK_HF_TOKEN"
9
- HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
 
 
 
 
 
 
 
 
 
 
app_leaderboard.py CHANGED
@@ -2,35 +2,29 @@ import logging
2
 
3
  import datasets
4
  import gradio as gr
5
- import pandas as pd
6
- import datetime
7
 
8
  from fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
- import leaderboard
12
- logger = logging.getLogger(__name__)
13
- global update_time
14
- update_time = datetime.datetime.fromtimestamp(0)
15
 
16
  def get_records_from_dataset_repo(dataset_id):
17
  dataset_config = check_dataset_and_get_config(dataset_id)
18
 
19
- logger.info(f"Dataset {dataset_id} has configs {dataset_config}")
20
  dataset_split = check_dataset_and_get_split(dataset_id, dataset_config[0])
21
- logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
22
 
23
  try:
24
- ds = datasets.load_dataset(dataset_id, dataset_config[0], split=dataset_split[0])
25
  df = ds.to_pandas()
26
  return df
27
  except Exception as e:
28
- logger.warning(
29
  f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
30
  )
31
- return pd.DataFrame()
 
32
 
33
-
34
  def get_model_ids(ds):
35
  logging.info(f"Dataset {ds} column names: {ds['model_id']}")
36
  models = ds["model_id"].tolist()
@@ -63,58 +57,37 @@ def get_display_df(df):
63
  columns = display_df.columns.tolist()
64
  if "model_id" in columns:
65
  display_df["model_id"] = display_df["model_id"].apply(
66
- lambda x: f'<a href="https://huggingface.co/{x}" target="_blank" style="color:blue">🔗{x}</a>'
67
  )
68
  # style all elements in the dataset_id column
69
  if "dataset_id" in columns:
70
  display_df["dataset_id"] = display_df["dataset_id"].apply(
71
- lambda x: f'<a href="https://huggingface.co/datasets/{x}" target="_blank" style="color:blue">🔗{x}</a>'
72
  )
73
  # style all elements in the report_link column
74
  if "report_link" in columns:
75
  display_df["report_link"] = display_df["report_link"].apply(
76
- lambda x: f'<a href="{x}" target="_blank" style="color:blue">🔗{x}</a>'
77
  )
78
  return display_df
79
 
80
- def get_demo(leaderboard_tab):
81
- global update_time
82
- update_time = datetime.datetime.now()
83
- logger.info("Loading leaderboard records")
84
- leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
85
- records = leaderboard.records
86
 
87
  model_ids = get_model_ids(records)
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
91
- issue_columns = column_names[:11]
92
- info_columns = column_names[15:]
93
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
94
  default_df = records[default_columns] # extract columns selected
95
  types = get_types(default_df)
96
  display_df = get_display_df(default_df) # the styled dataframe to display
97
 
98
- with gr.Row():
99
- with gr.Column():
100
- info_columns_select = gr.CheckboxGroup(
101
- label="Info Columns",
102
- choices=info_columns,
103
- value=default_columns,
104
- interactive=True,
105
- )
106
- with gr.Column():
107
- issue_columns_select = gr.CheckboxGroup(
108
- label="Issue Columns",
109
- choices=issue_columns,
110
- value=[],
111
- interactive=True,
112
- )
113
-
114
  with gr.Row():
115
  task_select = gr.Dropdown(
116
  label="Task",
117
- choices=["text_classification"],
118
  value="text_classification",
119
  interactive=True,
120
  )
@@ -129,36 +102,27 @@ def get_demo(leaderboard_tab):
129
  )
130
 
131
  with gr.Row():
132
- leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
133
-
134
- def update_leaderboard_records(model_id, dataset_id, issue_columns, info_columns, task):
135
- global update_time
136
- if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
137
- return gr.update()
138
- update_time = datetime.datetime.now()
139
- logger.info("Updating leaderboard records")
140
- leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
141
- return filter_table(model_id, dataset_id, issue_columns, info_columns, task)
142
 
143
- leaderboard_tab.select(
144
- fn=update_leaderboard_records,
145
- inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
146
- outputs=[leaderboard_df])
147
 
148
  @gr.on(
149
  triggers=[
150
  model_select.change,
151
  dataset_select.change,
152
- issue_columns_select.change,
153
- info_columns_select.change,
154
  task_select.change,
155
  ],
156
- inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
157
  outputs=[leaderboard_df],
158
  )
159
- def filter_table(model_id, dataset_id, issue_columns, info_columns, task):
160
- logger.info("Filtering leaderboard records")
161
- records = leaderboard.records
162
  # filter the table based on task
163
  df = records[(records["task"] == task)]
164
  # filter the table based on the model_id and dataset_id
@@ -167,9 +131,8 @@ def get_demo(leaderboard_tab):
167
  if dataset_id and dataset_id != "Any":
168
  df = df[(df["dataset_id"] == dataset_id)]
169
 
170
- # filter the table based on the columns
171
- issue_columns.sort()
172
- df = df[info_columns + issue_columns]
173
  types = get_types(df)
174
  display_df = get_display_df(df)
175
- return gr.update(value=display_df, datatype=types, interactive=False)
 
2
 
3
  import datasets
4
  import gradio as gr
 
 
5
 
6
  from fetch_utils import (check_dataset_and_get_config,
7
  check_dataset_and_get_split)
8
 
 
 
 
 
9
 
10
  def get_records_from_dataset_repo(dataset_id):
11
  dataset_config = check_dataset_and_get_config(dataset_id)
12
 
13
+ logging.info(f"Dataset {dataset_id} has configs {dataset_config}")
14
  dataset_split = check_dataset_and_get_split(dataset_id, dataset_config[0])
15
+ logging.info(f"Dataset {dataset_id} has splits {dataset_split}")
16
 
17
  try:
18
+ ds = datasets.load_dataset(dataset_id, dataset_config[0])[dataset_split[0]]
19
  df = ds.to_pandas()
20
  return df
21
  except Exception as e:
22
+ logging.warning(
23
  f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
24
  )
25
+ return None
26
+
27
 
 
28
  def get_model_ids(ds):
29
  logging.info(f"Dataset {ds} column names: {ds['model_id']}")
30
  models = ds["model_id"].tolist()
 
57
  columns = display_df.columns.tolist()
58
  if "model_id" in columns:
59
  display_df["model_id"] = display_df["model_id"].apply(
60
+ lambda x: f'<p href="https://huggingface.co/{x}" style="color:blue">🔗{x}</p>'
61
  )
62
  # style all elements in the dataset_id column
63
  if "dataset_id" in columns:
64
  display_df["dataset_id"] = display_df["dataset_id"].apply(
65
+ lambda x: f'<p href="https://huggingface.co/datasets/{x}" style="color:blue">🔗{x}</p>'
66
  )
67
  # style all elements in the report_link column
68
  if "report_link" in columns:
69
  display_df["report_link"] = display_df["report_link"].apply(
70
+ lambda x: f'<p href="{x}" style="color:blue">🔗{x}</p>'
71
  )
72
  return display_df
73
 
74
+
75
+ def get_demo():
76
+ records = get_records_from_dataset_repo("ZeroCommand/test-giskard-report")
 
 
 
77
 
78
  model_ids = get_model_ids(records)
79
  dataset_ids = get_dataset_ids(records)
80
 
81
  column_names = records.columns.tolist()
 
 
82
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
83
  default_df = records[default_columns] # extract columns selected
84
  types = get_types(default_df)
85
  display_df = get_display_df(default_df) # the styled dataframe to display
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  with gr.Row():
88
  task_select = gr.Dropdown(
89
  label="Task",
90
+ choices=["text_classification", "tabular"],
91
  value="text_classification",
92
  interactive=True,
93
  )
 
102
  )
103
 
104
  with gr.Row():
105
+ columns_select = gr.CheckboxGroup(
106
+ label="Show columns",
107
+ choices=column_names,
108
+ value=default_columns,
109
+ interactive=True,
110
+ )
 
 
 
 
111
 
112
+ with gr.Row():
113
+ leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
 
 
114
 
115
  @gr.on(
116
  triggers=[
117
  model_select.change,
118
  dataset_select.change,
119
+ columns_select.change,
 
120
  task_select.change,
121
  ],
122
+ inputs=[model_select, dataset_select, columns_select, task_select],
123
  outputs=[leaderboard_df],
124
  )
125
+ def filter_table(model_id, dataset_id, columns, task):
 
 
126
  # filter the table based on task
127
  df = records[(records["task"] == task)]
128
  # filter the table based on the model_id and dataset_id
 
131
  if dataset_id and dataset_id != "Any":
132
  df = df[(df["dataset_id"] == dataset_id)]
133
 
134
+ # filter the table based on the columns
135
+ df = df[columns]
 
136
  types = get_types(df)
137
  display_df = get_display_df(df)
138
+ return gr.update(value=display_df, datatype=types, interactive=False)
app_text_classification.py CHANGED
@@ -2,90 +2,49 @@ import uuid
2
 
3
  import gradio as gr
4
 
5
- from io_utils import read_scanners, write_scanners
6
- from text_classification_ui_helpers import (
7
- get_related_datasets_from_leaderboard,
8
- align_columns_and_show_prediction,
9
- get_dataset_splits,
10
- check_dataset,
11
- show_hf_token_info,
12
- precheck_model_ds_enable_example_btn,
13
- try_submit,
14
- empty_column_mapping,
15
- write_column_mapping_to_config,
16
- enable_run_btn,
17
- )
18
-
19
- import logging
20
- from wordings import (
21
- EXAMPLE_MODEL_ID,
22
- CONFIRM_MAPPING_DETAILS_MD,
23
- INTRODUCTION_MD,
24
- USE_INFERENCE_API_TIP,
25
- USE_INFERENCE_API_NOTICE,
26
- CHECK_LOG_SECTION_RAW,
27
- HF_TOKEN_INVALID_STYLED,
28
- )
29
-
30
- MAX_LABELS = 40
31
  MAX_FEATURES = 20
32
 
 
 
33
  CONFIG_PATH = "./config.yaml"
34
- logger = logging.getLogger(__name__)
35
 
36
 
37
- def get_demo():
38
  with gr.Row():
39
  gr.Markdown(INTRODUCTION_MD)
40
  uid_label = gr.Textbox(
41
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
42
  )
43
  with gr.Row():
44
- with gr.Column():
45
- with gr.Row():
46
- model_id_input = gr.Textbox(
47
- label="Hugging Face Model id",
48
- placeholder=f"e.g. {EXAMPLE_MODEL_ID}",
49
- )
50
- with gr.Row():
51
- model_id_error_info = gr.HTML(visible=False)
52
-
53
- with gr.Column():
54
- dataset_id_input = gr.Dropdown(
55
- choices=[],
56
- value="",
57
- allow_custom_value=True,
58
- label="Hugging Face Dataset id",
59
- )
60
-
61
- with gr.Row():
62
- dataset_config_input = gr.Dropdown(
63
- label="Dataset Config", visible=False, allow_custom_value=True
64
- )
65
- dataset_split_input = gr.Dropdown(
66
- label="Dataset Split", visible=False, allow_custom_value=True
67
  )
68
 
69
- with gr.Row():
70
- first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
71
- with gr.Row():
72
- loading_dataset_info = gr.HTML(visible=True)
73
- with gr.Row():
74
- example_btn = gr.Button(
75
- "Validate Model & Dataset",
76
- visible=True,
77
- variant="primary",
78
- interactive=False,
79
  )
 
80
  with gr.Row():
81
- loading_validation = gr.HTML(visible=True)
 
 
82
  with gr.Row():
83
- validation_result = gr.HTML(visible=False)
84
  with gr.Row():
85
- example_input = gr.Textbox(
86
- label="Example Input", visible=False, interactive=False
87
- )
88
- example_prediction = gr.Label(label="Model Sample Prediction", visible=False)
89
 
90
  with gr.Row():
91
  with gr.Accordion(
@@ -96,54 +55,40 @@ def get_demo():
96
  column_mappings = []
97
  with gr.Row():
98
  with gr.Column():
99
- gr.Markdown("# Label Mapping")
100
  for _ in range(MAX_LABELS):
101
  column_mappings.append(gr.Dropdown(visible=False))
102
  with gr.Column():
103
- gr.Markdown("# Feature Mapping")
104
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
105
  column_mappings.append(gr.Dropdown(visible=False))
106
 
107
- with gr.Accordion(label="Model Wrap Advanced Config", open=True):
108
- gr.HTML(USE_INFERENCE_API_TIP)
 
 
 
 
 
 
 
 
 
109
  inference_token = gr.Textbox(
110
- placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
111
  value="",
112
  label="HF Token for Inference API",
113
- visible=True,
114
  interactive=True,
115
  )
116
- inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
117
- gr.HTML(USE_INFERENCE_API_NOTICE)
118
-
119
- inference_token.change(
120
- fn=show_hf_token_info,
121
- inputs=[inference_token],
122
- outputs=[inference_token_info],
123
- )
124
-
125
- with gr.Accordion(label="Scanner Advanced Config (optional)", open=False):
126
- with gr.Group():
127
- verbose_mode_checkbox = gr.Checkbox(label="Verbose mode")
128
 
129
- scanners = gr.CheckboxGroup(visible=True)
 
130
 
131
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
132
  def get_scanners(uid):
133
  selected = read_scanners(uid)
134
- # we remove data_leakage from the default scanners
135
  # Reason: data_leakage barely raises any issues and takes too many requests
136
  # when using inference API, causing rate limit error
137
- scan_config = [
138
- "ethical_bias",
139
- "text_perturbation",
140
- "robustness",
141
- "performance",
142
- "underconfidence",
143
- "overconfidence",
144
- "spurious_correlation",
145
- "data_leakage",
146
- ]
147
  return gr.update(
148
  choices=scan_config, value=selected, label="Scan Settings", visible=True
149
  )
@@ -152,58 +97,51 @@ def get_demo():
152
  run_btn = gr.Button(
153
  "Get Evaluation Result",
154
  variant="primary",
155
- interactive=False,
156
  size="lg",
157
  )
158
 
159
  with gr.Row():
160
- logs = gr.Textbox(
161
- value=CHECK_LOG_SECTION_RAW,
162
- label="Log",
163
- visible=False,
164
- every=0.5,
165
- )
166
-
167
- scanners.change(write_scanners, inputs=[scanners, uid_label])
168
-
169
- gr.on(
170
- triggers=[model_id_input.change],
171
- fn=get_related_datasets_from_leaderboard,
172
- inputs=[model_id_input, dataset_id_input],
173
- outputs=[dataset_id_input],
174
- ).then(
175
- fn=check_dataset,
176
- inputs=[dataset_id_input],
177
- outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
178
- )
179
 
180
- gr.on(
181
- triggers=[dataset_id_input.input, dataset_id_input.select],
182
- fn=check_dataset,
183
  inputs=[dataset_id_input],
184
- outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
185
  )
186
 
187
  dataset_config_input.change(
188
- fn=get_dataset_splits,
189
  inputs=[dataset_id_input, dataset_config_input],
190
  outputs=[dataset_split_input],
191
  )
192
 
193
- gr.on(
194
- triggers=[
195
- model_id_input.change,
196
- dataset_id_input.change,
197
- dataset_config_input.change,
198
- ],
199
- fn=empty_column_mapping,
200
- inputs=[uid_label],
 
 
 
 
 
 
 
 
201
  )
202
 
203
  gr.on(
204
  triggers=[label.change for label in column_mappings],
205
  fn=write_column_mapping_to_config,
206
  inputs=[
 
 
 
207
  uid_label,
208
  *column_mappings,
209
  ],
@@ -214,6 +152,9 @@ def get_demo():
214
  triggers=[label.input for label in column_mappings],
215
  fn=write_column_mapping_to_config,
216
  inputs=[
 
 
 
217
  uid_label,
218
  *column_mappings,
219
  ],
@@ -226,44 +167,17 @@ def get_demo():
226
  dataset_config_input.change,
227
  dataset_split_input.change,
228
  ],
229
- fn=precheck_model_ds_enable_example_btn,
230
- inputs=[
231
- model_id_input,
232
- dataset_id_input,
233
- dataset_config_input,
234
- dataset_split_input,
235
- ],
236
- outputs=[
237
- example_btn,
238
- first_line_ds,
239
- validation_result,
240
- example_input,
241
- example_prediction,
242
- column_mapping_accordion,
243
- model_id_error_info,
244
- ],
245
- )
246
-
247
- gr.on(
248
- triggers=[
249
- example_btn.click,
250
- ],
251
- fn=align_columns_and_show_prediction,
252
  inputs=[
253
  model_id_input,
254
  dataset_id_input,
255
  dataset_config_input,
256
  dataset_split_input,
257
- uid_label,
258
- inference_token,
259
  ],
260
  outputs=[
261
- validation_result,
262
  example_input,
263
  example_prediction,
264
  column_mapping_accordion,
265
- run_btn,
266
- loading_validation,
267
  *column_mappings,
268
  ],
269
  )
@@ -278,48 +192,32 @@ def get_demo():
278
  dataset_id_input,
279
  dataset_config_input,
280
  dataset_split_input,
281
- inference_token,
282
  uid_label,
283
- verbose_mode_checkbox,
284
- ],
285
- outputs=[
286
- run_btn,
287
- logs,
288
- uid_label,
289
- validation_result,
290
- example_input,
291
- example_prediction,
292
- column_mapping_accordion,
293
  ],
 
294
  )
295
 
 
 
 
296
  gr.on(
297
  triggers=[
298
- inference_token.input,
299
- scanners.input,
 
 
 
 
300
  ],
301
  fn=enable_run_btn,
302
- inputs=[
303
- uid_label,
304
- inference_token,
305
- model_id_input,
306
- dataset_id_input,
307
- dataset_config_input,
308
- dataset_split_input,
309
- ],
310
  outputs=[run_btn],
311
  )
312
 
313
  gr.on(
314
- triggers=[label.input for label in column_mappings],
315
  fn=enable_run_btn,
316
- inputs=[
317
- uid_label,
318
- inference_token,
319
- model_id_input,
320
- dataset_id_input,
321
- dataset_config_input,
322
- dataset_split_input,
323
- ], # FIXME
324
  outputs=[run_btn],
325
  )
 
2
 
3
  import gradio as gr
4
 
5
+ from io_utils import (get_logs_file, read_inference_type, read_scanners,
6
+ write_inference_type, write_scanners)
7
+ from text_classification_ui_helpers import (check_dataset_and_get_config,
8
+ check_dataset_and_get_split,
9
+ check_model_and_show_prediction,
10
+ deselect_run_inference,
11
+ select_run_mode, try_submit,
12
+ write_column_mapping_to_config)
13
+ from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
14
+
15
+ MAX_LABELS = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  MAX_FEATURES = 20
17
 
18
+ EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
19
+ EXAMPLE_DATA_ID = "tweet_eval"
20
  CONFIG_PATH = "./config.yaml"
 
21
 
22
 
23
+ def get_demo(demo):
24
  with gr.Row():
25
  gr.Markdown(INTRODUCTION_MD)
26
  uid_label = gr.Textbox(
27
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
28
  )
29
  with gr.Row():
30
+ model_id_input = gr.Textbox(
31
+ label="Hugging Face model id",
32
+ placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  )
34
 
35
+ dataset_id_input = gr.Textbox(
36
+ label="Hugging Face Dataset id",
37
+ placeholder=EXAMPLE_DATA_ID + " (press enter to confirm)",
 
 
 
 
 
 
 
38
  )
39
+
40
  with gr.Row():
41
+ dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
42
+ dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
43
+
44
  with gr.Row():
45
+ example_input = gr.HTML(visible=False)
46
  with gr.Row():
47
+ example_prediction = gr.Label(label="Model Prediction Sample", visible=False)
 
 
 
48
 
49
  with gr.Row():
50
  with gr.Accordion(
 
55
  column_mappings = []
56
  with gr.Row():
57
  with gr.Column():
 
58
  for _ in range(MAX_LABELS):
59
  column_mappings.append(gr.Dropdown(visible=False))
60
  with gr.Column():
 
61
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
62
  column_mappings.append(gr.Dropdown(visible=False))
63
 
64
+ with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
65
+ run_local = gr.Checkbox(value=True, label="Run in this Space")
66
+ run_inference = gr.Checkbox(value="False", label="Run with Inference API")
67
+
68
+ @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
69
+ def get_run_mode(uid):
70
+ return gr.update(
71
+ value=read_inference_type(uid) == "hf_inference_api"
72
+ and not run_local.value
73
+ )
74
+
75
  inference_token = gr.Textbox(
 
76
  value="",
77
  label="HF Token for Inference API",
78
+ visible=False,
79
  interactive=True,
80
  )
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
83
+ scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
84
 
85
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
86
  def get_scanners(uid):
87
  selected = read_scanners(uid)
88
+ # currently we remove data_leakage from the default scanners
89
  # Reason: data_leakage barely raises any issues and takes too many requests
90
  # when using inference API, causing rate limit error
91
+ scan_config = selected + ["data_leakage"]
 
 
 
 
 
 
 
 
 
92
  return gr.update(
93
  choices=scan_config, value=selected, label="Scan Settings", visible=True
94
  )
 
97
  run_btn = gr.Button(
98
  "Get Evaluation Result",
99
  variant="primary",
100
+ interactive=True,
101
  size="lg",
102
  )
103
 
104
  with gr.Row():
105
+ logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
106
+ demo.load(get_logs_file, None, logs, every=0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ dataset_id_input.change(
109
+ check_dataset_and_get_config,
 
110
  inputs=[dataset_id_input],
111
+ outputs=[dataset_config_input],
112
  )
113
 
114
  dataset_config_input.change(
115
+ check_dataset_and_get_split,
116
  inputs=[dataset_id_input, dataset_config_input],
117
  outputs=[dataset_split_input],
118
  )
119
 
120
+ scanners.change(write_scanners, inputs=[scanners, uid_label])
121
+
122
+ run_inference.change(
123
+ select_run_mode,
124
+ inputs=[run_inference, inference_token, uid_label],
125
+ outputs=[inference_token, run_local],
126
+ )
127
+
128
+ run_local.change(
129
+ deselect_run_inference,
130
+ inputs=[run_local],
131
+ outputs=[inference_token, run_inference],
132
+ )
133
+
134
+ inference_token.change(
135
+ write_inference_type, inputs=[run_inference, inference_token, uid_label]
136
  )
137
 
138
  gr.on(
139
  triggers=[label.change for label in column_mappings],
140
  fn=write_column_mapping_to_config,
141
  inputs=[
142
+ dataset_id_input,
143
+ dataset_config_input,
144
+ dataset_split_input,
145
  uid_label,
146
  *column_mappings,
147
  ],
 
152
  triggers=[label.input for label in column_mappings],
153
  fn=write_column_mapping_to_config,
154
  inputs=[
155
+ dataset_id_input,
156
+ dataset_config_input,
157
+ dataset_split_input,
158
  uid_label,
159
  *column_mappings,
160
  ],
 
167
  dataset_config_input.change,
168
  dataset_split_input.change,
169
  ],
170
+ fn=check_model_and_show_prediction,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  inputs=[
172
  model_id_input,
173
  dataset_id_input,
174
  dataset_config_input,
175
  dataset_split_input,
 
 
176
  ],
177
  outputs=[
 
178
  example_input,
179
  example_prediction,
180
  column_mapping_accordion,
 
 
181
  *column_mappings,
182
  ],
183
  )
 
192
  dataset_id_input,
193
  dataset_config_input,
194
  dataset_split_input,
195
+ run_local,
196
  uid_label,
 
 
 
 
 
 
 
 
 
 
197
  ],
198
+ outputs=[run_btn, logs],
199
  )
200
 
201
+ def enable_run_btn():
202
+ return gr.update(interactive=True)
203
+
204
  gr.on(
205
  triggers=[
206
+ model_id_input.change,
207
+ dataset_config_input.change,
208
+ dataset_split_input.change,
209
+ run_inference.change,
210
+ run_local.change,
211
+ scanners.change,
212
  ],
213
  fn=enable_run_btn,
214
+ inputs=None,
 
 
 
 
 
 
 
215
  outputs=[run_btn],
216
  )
217
 
218
  gr.on(
219
+ triggers=[label.change for label in column_mappings],
220
  fn=enable_run_btn,
221
+ inputs=None,
 
 
 
 
 
 
 
222
  outputs=[run_btn],
223
  )
config.yaml CHANGED
@@ -1,8 +1,6 @@
1
  configuration:
2
  ethical_bias:
3
- threshold: 0.05
4
- performance:
5
- alpha: 0.05
6
  detectors:
7
  - ethical_bias
8
  - text_perturbation
@@ -11,3 +9,4 @@ detectors:
11
  - underconfidence
12
  - overconfidence
13
  - spurious_correlation
 
 
1
  configuration:
2
  ethical_bias:
3
+ threshold: 0.01
 
 
4
  detectors:
5
  - ethical_bias
6
  - text_perturbation
 
9
  - underconfidence
10
  - overconfidence
11
  - spurious_correlation
12
+ inference_type: hf_pipeline
fetch_utils.py CHANGED
@@ -5,7 +5,7 @@ import datasets
5
 
6
  def check_dataset_and_get_config(dataset_id):
7
  try:
8
- configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
9
  return configs
10
  except Exception:
11
  # Dataset may not exist
@@ -14,7 +14,7 @@ def check_dataset_and_get_config(dataset_id):
14
 
15
  def check_dataset_and_get_split(dataset_id, dataset_config):
16
  try:
17
- ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
18
  except Exception as e:
19
  # Dataset may not exist
20
  logging.warning(
 
5
 
6
  def check_dataset_and_get_config(dataset_id):
7
  try:
8
+ configs = datasets.get_dataset_config_names(dataset_id)
9
  return configs
10
  except Exception:
11
  # Dataset may not exist
 
14
 
15
  def check_dataset_and_get_split(dataset_id, dataset_config):
16
  try:
17
+ ds = datasets.load_dataset(dataset_id, dataset_config)
18
  except Exception as e:
19
  # Dataset may not exist
20
  logging.warning(
io_utils.py CHANGED
@@ -1,25 +1,17 @@
1
  import os
2
- import logging
 
3
  import yaml
4
 
 
 
5
  YAML_PATH = "./cicd/configs"
6
- LOG_FILE = "temp_log"
7
 
8
- logger = logging.getLogger(__name__)
9
 
10
  class Dumper(yaml.Dumper):
11
  def increase_indent(self, flow=False, *args, **kwargs):
12
  return super().increase_indent(flow=flow, indentless=False)
13
 
14
- def get_submitted_yaml_path(uid):
15
- if not os.path.exists(f"{YAML_PATH}/submitted"):
16
- os.makedirs(f"{YAML_PATH}/submitted")
17
- if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
- logger.error(f"config.yaml does not exist for {uid}")
19
- os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
20
- if not os.path.exists(f"{YAML_PATH}/submitted/{uid}_config.yaml"):
21
- os.system(f"cp {YAML_PATH}/{uid}_config.yaml {YAML_PATH}/submitted/{uid}_config.yaml")
22
- return f"{YAML_PATH}/submitted/{uid}_config.yaml"
23
 
24
  def get_yaml_path(uid):
25
  if not os.path.exists(YAML_PATH):
@@ -36,6 +28,7 @@ def read_scanners(uid):
36
  with open(get_yaml_path(uid), "r") as f:
37
  config = yaml.load(f, Loader=yaml.FullLoader)
38
  scanners = config.get("detectors", [])
 
39
  return scanners
40
 
41
 
@@ -45,9 +38,11 @@ def write_scanners(scanners, uid):
45
  config = yaml.load(f, Loader=yaml.FullLoader)
46
  if config:
47
  config["detectors"] = scanners
 
48
  # save scanners to detectors in yaml
49
  with open(get_yaml_path(uid), "w") as f:
50
  yaml.dump(config, f, Dumper=Dumper)
 
51
 
52
 
53
  # read model_type from yaml file
@@ -56,6 +51,7 @@ def read_inference_type(uid):
56
  with open(get_yaml_path(uid), "r") as f:
57
  config = yaml.load(f, Loader=yaml.FullLoader)
58
  inference_type = config.get("inference_type", "")
 
59
  return inference_type
60
 
61
 
@@ -70,9 +66,11 @@ def write_inference_type(use_inference, inference_token, uid):
70
  config["inference_type"] = "hf_pipeline"
71
  # FIXME: A quick and temp fix for missing token
72
  config["inference_token"] = ""
 
73
  # save inference_type to inference_type in yaml
74
  with open(get_yaml_path(uid), "w") as f:
75
  yaml.dump(config, f, Dumper=Dumper)
 
76
 
77
 
78
  # read column mapping from yaml file
@@ -82,8 +80,7 @@ def read_column_mapping(uid):
82
  config = yaml.load(f, Loader=yaml.FullLoader)
83
  if config:
84
  column_mapping = config.get("column_mapping", dict())
85
- if column_mapping is None:
86
- column_mapping = {}
87
  return column_mapping
88
 
89
 
@@ -91,6 +88,7 @@ def read_column_mapping(uid):
91
  def write_column_mapping(mapping, uid):
92
  with open(get_yaml_path(uid), "r") as f:
93
  config = yaml.load(f, Loader=yaml.FullLoader)
 
94
 
95
  if config is None:
96
  return
@@ -98,9 +96,10 @@ def write_column_mapping(mapping, uid):
98
  del config["column_mapping"]
99
  else:
100
  config["column_mapping"] = mapping
 
101
  with open(get_yaml_path(uid), "w") as f:
102
- # yaml Dumper will by default sort the keys
103
- yaml.dump(config, f, Dumper=Dumper, sort_keys=False)
104
 
105
 
106
  # convert column mapping dataframe to json
@@ -112,23 +111,35 @@ def convert_column_mapping_to_json(df, label=""):
112
  return column_mapping
113
 
114
 
115
- def get_log_file_with_uid(uid):
116
- try:
117
- print(f"Loading {uid}.log")
118
- with open(f"./tmp/{uid}.log", "a") as file:
119
- return file.read()
120
- except Exception:
121
- return "Log file does not exist"
122
-
123
-
124
  def get_logs_file():
125
  try:
126
- with open(LOG_FILE, "r") as file:
127
- return file.read()
128
  except Exception:
129
  return "Log file does not exist"
130
 
131
 
132
- def write_log_to_user_file(task_id, log):
133
- with open(f"./tmp/{task_id}.log", "a") as f:
134
  f.write(log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import subprocess
3
+
4
  import yaml
5
 
6
+ import pipe
7
+
8
  YAML_PATH = "./cicd/configs"
 
9
 
 
10
 
11
  class Dumper(yaml.Dumper):
12
  def increase_indent(self, flow=False, *args, **kwargs):
13
  return super().increase_indent(flow=flow, indentless=False)
14
 
 
 
 
 
 
 
 
 
 
15
 
16
  def get_yaml_path(uid):
17
  if not os.path.exists(YAML_PATH):
 
28
  with open(get_yaml_path(uid), "r") as f:
29
  config = yaml.load(f, Loader=yaml.FullLoader)
30
  scanners = config.get("detectors", [])
31
+ f.close()
32
  return scanners
33
 
34
 
 
38
  config = yaml.load(f, Loader=yaml.FullLoader)
39
  if config:
40
  config["detectors"] = scanners
41
+ f.close()
42
  # save scanners to detectors in yaml
43
  with open(get_yaml_path(uid), "w") as f:
44
  yaml.dump(config, f, Dumper=Dumper)
45
+ f.close()
46
 
47
 
48
  # read model_type from yaml file
 
51
  with open(get_yaml_path(uid), "r") as f:
52
  config = yaml.load(f, Loader=yaml.FullLoader)
53
  inference_type = config.get("inference_type", "")
54
+ f.close()
55
  return inference_type
56
 
57
 
 
66
  config["inference_type"] = "hf_pipeline"
67
  # FIXME: A quick and temp fix for missing token
68
  config["inference_token"] = ""
69
+ f.close()
70
  # save inference_type to inference_type in yaml
71
  with open(get_yaml_path(uid), "w") as f:
72
  yaml.dump(config, f, Dumper=Dumper)
73
+ f.close()
74
 
75
 
76
  # read column mapping from yaml file
 
80
  config = yaml.load(f, Loader=yaml.FullLoader)
81
  if config:
82
  column_mapping = config.get("column_mapping", dict())
83
+ f.close()
 
84
  return column_mapping
85
 
86
 
 
88
  def write_column_mapping(mapping, uid):
89
  with open(get_yaml_path(uid), "r") as f:
90
  config = yaml.load(f, Loader=yaml.FullLoader)
91
+ f.close()
92
 
93
  if config is None:
94
  return
 
96
  del config["column_mapping"]
97
  else:
98
  config["column_mapping"] = mapping
99
+
100
  with open(get_yaml_path(uid), "w") as f:
101
+ yaml.dump(config, f, Dumper=Dumper)
102
+ f.close()
103
 
104
 
105
  # convert column mapping dataframe to json
 
111
  return column_mapping
112
 
113
 
 
 
 
 
 
 
 
 
 
114
  def get_logs_file():
115
  try:
116
+ file = open(f"./tmp/temp_log", "r")
117
+ return file.read()
118
  except Exception:
119
  return "Log file does not exist"
120
 
121
 
122
+ def write_log_to_user_file(id, log):
123
+ with open(f"./tmp/temp_log", "a") as f:
124
  f.write(log)
125
+ f.close()
126
+
127
+
128
+ def save_job_to_pipe(id, job, lock):
129
+ with lock:
130
+ pipe.jobs.append((id, job))
131
+
132
+
133
+ def pop_job_from_pipe():
134
+ if len(pipe.jobs) == 0:
135
+ return
136
+ job_info = pipe.jobs.pop()
137
+ write_log_to_user_file(job_info[0], f"Running job id {job_info[0]}\n")
138
+ command = job_info[1]
139
+
140
+ log_file = open(f"./tmp/temp_log", "a")
141
+ subprocess.Popen(
142
+ command,
143
+ stdout=log_file,
144
+ stderr=log_file,
145
+ )
isolated_env.py DELETED
@@ -1,34 +0,0 @@
1
- import os
2
- import subprocess
3
-
4
- from io_utils import write_log_to_user_file
5
-
6
-
7
- def prepare_venv(execution_id, deps):
8
- python_executable = "python"
9
- venv_base = f"tmp/venvs/{execution_id}"
10
-
11
- pip_executable = os.path.join(venv_base, "bin", "pip")
12
- # Check pyver
13
- write_log_to_user_file(execution_id, "Checking Python version\n")
14
- p = subprocess.run([python_executable, "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
15
- write_log_to_user_file(execution_id, p.stdout.decode())
16
- if p.returncode != 0:
17
- raise RuntimeError(f"{p.args} ended with {p.returncode}")
18
- # Create venv
19
- write_log_to_user_file(execution_id, "Creating virtual environment\n")
20
- p = subprocess.run([python_executable, "-m", "venv", venv_base, "--clear"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
21
- write_log_to_user_file(execution_id, p.stdout.decode())
22
- if p.returncode != 0:
23
- raise RuntimeError(f"{p.args} ended with {p.returncode}")
24
- # Output requirements.txt
25
- requirement_file = os.path.join(venv_base, "requirements.txt")
26
- with open(requirement_file, "w") as f:
27
- f.writelines(deps)
28
- # Install deps
29
- write_log_to_user_file(execution_id, "Installing dependencies\n")
30
- p = subprocess.run([pip_executable, "install", "-r", requirement_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
31
- write_log_to_user_file(execution_id, p.stdout.decode())
32
- if p.returncode != 0:
33
- raise RuntimeError(f"{p.args} ended with {p.returncode}")
34
- return os.path.join(venv_base, "bin", "giskard_scanner")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard.py DELETED
@@ -1,5 +0,0 @@
1
- import pandas as pd
2
-
3
- records = pd.DataFrame()
4
-
5
- LEADERBOARD = "giskard-bot/evaluator-leaderboard"
 
 
 
 
 
 
pipe.py CHANGED
@@ -1,3 +1,3 @@
1
-
2
- jobs = list()
3
- current = None
 
1
+ def init():
2
+ global jobs
3
+ jobs = list()
requirements.txt CHANGED
@@ -1,11 +1,6 @@
1
- giskard<2.15.0
2
  huggingface_hub
3
- hf-transfer
4
  torch==2.0.1
5
  transformers
6
  datasets
7
- tabulate
8
- avidtools
9
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
10
- numpy<2.0
11
- python-multipart==0.0.12
 
1
+ giskard >= 2.1.0, < 2.3.0
2
  huggingface_hub
 
3
  torch==2.0.1
4
  transformers
5
  datasets
 
 
6
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
 
 
run_jobs.py CHANGED
@@ -1,30 +1,12 @@
1
- import json
2
  import logging
3
- import os
4
- import subprocess
5
  import threading
6
  import time
7
- from pathlib import Path
8
 
9
  import pipe
10
- from app_env import (
11
- HF_GSK_HUB_HF_TOKEN,
12
- HF_GSK_HUB_KEY,
13
- HF_GSK_HUB_PROJECT_KEY,
14
- HF_GSK_HUB_UNLOCK_TOKEN,
15
- HF_GSK_HUB_URL,
16
- HF_REPO_ID,
17
- HF_SPACE_ID,
18
- HF_WRITE_TOKEN,
19
- )
20
- from io_utils import LOG_FILE, get_submitted_yaml_path, write_log_to_user_file
21
- from isolated_env import prepare_venv
22
- from leaderboard import LEADERBOARD
23
 
24
  is_running = False
25
 
26
- logger = logging.getLogger(__file__)
27
-
28
 
29
  def start_process_run_job():
30
  try:
@@ -33,6 +15,7 @@ def start_process_run_job():
33
  thread = threading.Thread(target=run_job)
34
  thread.daemon = True
35
  is_running = True
 
36
  thread.start()
37
 
38
  except Exception as e:
@@ -45,145 +28,6 @@ def stop_thread():
45
  is_running = False
46
 
47
 
48
- def prepare_env_and_get_command(
49
- m_id,
50
- d_id,
51
- config,
52
- split,
53
- inference_token,
54
- uid,
55
- label_mapping,
56
- feature_mapping,
57
- verbose,
58
- ):
59
- leaderboard_dataset = None
60
- if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
61
- leaderboard_dataset = LEADERBOARD
62
-
63
- executable = "giskard_scanner"
64
- try:
65
- # Copy the current requirements (might be changed)
66
- with open("requirements.txt", "r") as f:
67
- executable = prepare_venv(
68
- uid,
69
- "\n".join(f.readlines()),
70
- )
71
- logger.info(f"Using {executable} as executable")
72
- except Exception as e:
73
- logger.warning(f"Create env failed due to {e}, using the current env as fallback.")
74
- executable = "giskard_scanner"
75
-
76
- command = [
77
- executable,
78
- "--loader",
79
- "huggingface",
80
- "--model",
81
- m_id,
82
- "--dataset",
83
- d_id,
84
- "--dataset_config",
85
- config,
86
- "--dataset_split",
87
- split,
88
- "--output_format",
89
- "markdown",
90
- "--output_portal",
91
- "huggingface",
92
- "--feature_mapping",
93
- json.dumps(feature_mapping),
94
- "--label_mapping",
95
- json.dumps(label_mapping),
96
- "--scan_config",
97
- get_submitted_yaml_path(uid),
98
- "--inference_type",
99
- "hf_inference_api",
100
- "--inference_api_token",
101
- inference_token,
102
- "--persist_scan",
103
- ]
104
- # The token to publish post
105
- if os.environ.get(HF_WRITE_TOKEN):
106
- command.append("--hf_token")
107
- command.append(os.environ.get(HF_WRITE_TOKEN))
108
-
109
- # The repo to publish for ranking
110
- if leaderboard_dataset:
111
- command.append("--leaderboard_dataset")
112
- command.append(leaderboard_dataset)
113
-
114
- # The info to upload to Giskard hub
115
- if os.environ.get(HF_GSK_HUB_KEY):
116
- command.append("--giskard_hub_api_key")
117
- command.append(os.environ.get(HF_GSK_HUB_KEY))
118
- if os.environ.get(HF_GSK_HUB_URL):
119
- command.append("--giskard_hub_url")
120
- command.append(os.environ.get(HF_GSK_HUB_URL))
121
- if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
122
- command.append("--giskard_hub_project_key")
123
- command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
124
- if os.environ.get(HF_GSK_HUB_HF_TOKEN):
125
- command.append("--giskard_hub_hf_token")
126
- command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
127
- if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
128
- command.append("--giskard_hub_unlock_token")
129
- command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
130
-
131
- if verbose:
132
- command.append("--verbose")
133
-
134
- eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
135
-
136
- write_log_to_user_file(
137
- uid,
138
- f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
139
- )
140
-
141
- return command
142
-
143
-
144
- def save_job_to_pipe(task_id, job, description, lock):
145
- with lock:
146
- pipe.jobs.append((task_id, job, description))
147
-
148
-
149
- def pop_job_from_pipe():
150
- if len(pipe.jobs) == 0:
151
- return
152
- job_info = pipe.jobs.pop()
153
- pipe.current = job_info[2]
154
- task_id = job_info[0]
155
-
156
- # Link to LOG_FILE
157
- log_file_path = Path(LOG_FILE)
158
- if log_file_path.exists():
159
- log_file_path.unlink()
160
- os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
161
-
162
- write_log_to_user_file(task_id, f"Running job id {task_id}\n")
163
- command = prepare_env_and_get_command(*job_info[1])
164
-
165
- with open(f"./tmp/{task_id}.log", "a") as log_file:
166
- return_code = None
167
- p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
168
- while pipe.current and return_code is None:
169
- # Wait for finishing
170
- try:
171
- return_code = p.wait(timeout=1)
172
- except subprocess.TimeoutExpired:
173
- return_code = None
174
-
175
- if not pipe.current:
176
- # Job interrupted before finishing
177
- p.kill()
178
-
179
- log_file.write(f"\nJob interrupted by admin at {time.asctime()}\n")
180
-
181
- if return_code:
182
- log_file.write(f"\nJob finished with {return_code} at {time.asctime()}\n")
183
-
184
- pipe.current = None
185
-
186
-
187
  def run_job():
188
  global is_running
189
  while is_running:
 
 
1
  import logging
 
 
2
  import threading
3
  import time
 
4
 
5
  import pipe
6
+ from io_utils import pop_job_from_pipe
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  is_running = False
9
 
 
 
10
 
11
  def start_process_run_job():
12
  try:
 
15
  thread = threading.Thread(target=run_job)
16
  thread.daemon = True
17
  is_running = True
18
+ pipe.init()
19
  thread.start()
20
 
21
  except Exception as e:
 
28
  is_running = False
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def run_job():
32
  global is_running
33
  while is_running:
text_classification.py CHANGED
@@ -5,119 +5,27 @@ import datasets
5
  import huggingface_hub
6
  import pandas as pd
7
  from transformers import pipeline
8
- import requests
9
- import os
10
- from app_env import HF_WRITE_TOKEN
11
 
12
- logger = logging.getLogger(__name__)
13
- AUTH_CHECK_URL = "https://huggingface.co/api/whoami-v2"
14
 
15
- logger = logging.getLogger(__file__)
16
-
17
-
18
- class HuggingFaceInferenceAPIResponse:
19
- def __init__(self, message):
20
- self.message = message
21
-
22
-
23
- def get_labels_and_features_from_dataset(ds):
24
  try:
 
25
  dataset_features = ds.features
26
- label_keys = [i for i in dataset_features.keys() if i.startswith("label")]
27
- features = [f for f in dataset_features.keys() if not f.startswith("label")]
28
-
29
- if len(label_keys) == 0: # no labels found
30
- # return everything for post processing
31
- return list(dataset_features.keys()), list(dataset_features.keys()), None
32
-
33
- labels = None
34
- if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
35
- if hasattr(dataset_features[label_keys[0]], "feature"):
36
- label_feat = dataset_features[label_keys[0]].feature
37
- labels = label_feat.names
38
- else:
39
- labels = ds.unique(label_keys[0])
40
- else:
41
- labels = dataset_features[label_keys[0]].names
42
- return labels, features, label_keys
43
  except Exception as e:
44
- logging.warning(f"Get Labels/Features Failed for dataset: {e}")
45
- return None, None, None
46
-
47
-
48
- def check_model_task(model_id):
49
- # check if model is valid on huggingface
50
- try:
51
- task = huggingface_hub.model_info(model_id).pipeline_tag
52
- if task is None:
53
- return None
54
- return task
55
- except Exception:
56
- return None
57
-
58
-
59
- def get_model_labels(model_id, example_input):
60
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
61
- payload = {"inputs": example_input, "options": {"use_cache": True}}
62
- response = hf_inference_api(model_id, hf_token, payload)
63
- if "error" in response:
64
- return None
65
- return extract_from_response(response, "label")
66
-
67
-
68
- def extract_from_response(data, key):
69
- results = []
70
-
71
- if isinstance(data, dict):
72
- res = data.get(key)
73
- if res is not None:
74
- results.append(res)
75
-
76
- for value in data.values():
77
- results.extend(extract_from_response(value, key))
78
-
79
- elif isinstance(data, list):
80
- for element in data:
81
- results.extend(extract_from_response(element, key))
82
-
83
- return results
84
-
85
-
86
- def hf_inference_api(model_id, hf_token, payload):
87
- hf_inference_api_endpoint = os.environ.get(
88
- "HF_INFERENCE_ENDPOINT", default="https://api-inference.huggingface.co"
89
- )
90
- url = f"{hf_inference_api_endpoint}/models/{model_id}"
91
- headers = {"Authorization": f"Bearer {hf_token}"}
92
- response = requests.post(url, headers=headers, json=payload)
93
-
94
- if not hasattr(response, "status_code") or response.status_code != 200:
95
- logger.warning(f"Request to inference API returns {response}")
96
-
97
- try:
98
- output = response.json()
99
- if "error" in output and "Input is too long" in output["error"]:
100
- payload.update({"parameters": {"truncation": True, "max_length": 512}})
101
- response = requests.post(url, headers=headers, json=payload)
102
- if not hasattr(response, "status_code") or response.status_code != 200:
103
- logger.warning(f"Request to inference API returns {response}")
104
- return response.json()
105
- except Exception:
106
- return {"error": response.content}
107
-
108
-
109
- def preload_hf_inference_api(model_id):
110
- payload = {
111
- "inputs": "This is a test",
112
- "options": {
113
- "use_cache": True,
114
- },
115
- }
116
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
117
- hf_inference_api(model_id, hf_token, payload)
118
 
119
 
120
- def check_model_pipeline(model_id):
121
  try:
122
  task = huggingface_hub.model_info(model_id).pipeline_tag
123
  except Exception:
@@ -278,7 +186,7 @@ def infer_output_label_column(
278
 
279
  def check_dataset_features_validity(d_id, config, split):
280
  # We assume dataset is ok here
281
- ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
282
  try:
283
  dataset_features = ds.features
284
  except AttributeError:
@@ -290,52 +198,27 @@ def check_dataset_features_validity(d_id, config, split):
290
  return df, dataset_features
291
 
292
 
293
- def select_the_first_string_column(ds):
294
- for feature in ds.features.keys():
295
- if isinstance(ds[0][feature], str):
296
- return feature
297
- return None
298
-
299
-
300
- def get_example_prediction(
301
- model_id, dataset_id, dataset_config, dataset_split, hf_token
302
- ):
303
  # get a sample prediction from the model on the dataset
304
  prediction_input = None
305
  prediction_result = None
306
  try:
307
  # Use the first item to test prediction
308
- ds = datasets.load_dataset(
309
- dataset_id, dataset_config, split=dataset_split, trust_remote_code=True
310
- )
311
  if "text" not in ds.features.keys():
312
  # Dataset does not have text column
313
- prediction_input = ds[0][select_the_first_string_column(ds)]
314
  else:
315
  prediction_input = ds[0]["text"]
316
 
317
- payload = {"inputs": prediction_input, "options": {"use_cache": True}}
318
- results = hf_inference_api(model_id, hf_token, payload)
319
-
320
- if isinstance(results, dict) and "error" in results.keys():
321
- if "estimated_time" in results.keys():
322
- return prediction_input, HuggingFaceInferenceAPIResponse(
323
- f"Estimated time: {int(results['estimated_time'])}s. Please try again later."
324
- )
325
- return prediction_input, HuggingFaceInferenceAPIResponse(
326
- f"Inference Error: {results['error']}."
327
- )
328
-
329
- while isinstance(results, list):
330
- if isinstance(results[0], dict):
331
- break
332
- results = results[0]
333
  prediction_result = {
334
  f'{result["label"]}': result["score"] for result in results
335
  }
336
- except Exception as e:
337
- # inference api prediction failed, show the error message
338
- logger.error(f"Get example prediction failed {e}")
339
  return prediction_input, None
340
 
341
  return prediction_input, prediction_result
@@ -407,16 +290,3 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
407
  id2label_df,
408
  feature_map_df,
409
  )
410
-
411
-
412
- def check_hf_token_validity(hf_token):
413
- if hf_token == "":
414
- return False
415
- if not isinstance(hf_token, str):
416
- return False
417
- # use huggingface api to check the token
418
- headers = {"Authorization": f"Bearer {hf_token}"}
419
- response = requests.get(AUTH_CHECK_URL, headers=headers)
420
- if response.status_code != 200:
421
- return False
422
- return True
 
5
  import huggingface_hub
6
  import pandas as pd
7
  from transformers import pipeline
 
 
 
8
 
 
 
9
 
10
+ def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
11
+ if not dataset_config:
12
+ dataset_config = "default"
13
+ if not split:
14
+ split = "train"
 
 
 
 
15
  try:
16
+ ds = datasets.load_dataset(dataset_id, dataset_config)[split]
17
  dataset_features = ds.features
18
+ labels = dataset_features["label"].names
19
+ features = [f for f in dataset_features.keys() if f != "label"]
20
+ return labels, features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  except Exception as e:
22
+ logging.warning(
23
+ f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
24
+ )
25
+ return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
+ def check_model(model_id):
29
  try:
30
  task = huggingface_hub.model_info(model_id).pipeline_tag
31
  except Exception:
 
186
 
187
  def check_dataset_features_validity(d_id, config, split):
188
  # We assume dataset is ok here
189
+ ds = datasets.load_dataset(d_id, config)[split]
190
  try:
191
  dataset_features = ds.features
192
  except AttributeError:
 
198
  return df, dataset_features
199
 
200
 
201
+ def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
 
 
 
 
 
 
 
 
 
202
  # get a sample prediction from the model on the dataset
203
  prediction_input = None
204
  prediction_result = None
205
  try:
206
  # Use the first item to test prediction
207
+ ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
 
 
208
  if "text" not in ds.features.keys():
209
  # Dataset does not have text column
210
+ prediction_input = ds[0][ds.features.keys()[0]]
211
  else:
212
  prediction_input = ds[0]["text"]
213
 
214
+ print("prediction_input", prediction_input)
215
+ results = ppl(prediction_input, top_k=None)
216
+ # Display results in original label and mapped label
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  prediction_result = {
218
  f'{result["label"]}': result["score"] for result in results
219
  }
220
+ except Exception:
221
+ # Pipeline prediction failed, need to provide labels
 
222
  return prediction_input, None
223
 
224
  return prediction_input, prediction_result
 
290
  id2label_df,
291
  feature_map_df,
292
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
text_classification_ui_helpers.py CHANGED
@@ -1,181 +1,108 @@
1
  import collections
 
2
  import logging
 
3
  import threading
4
- import uuid
5
 
6
  import datasets
7
  import gradio as gr
8
- import pandas as pd
9
-
10
- import leaderboard
11
- from io_utils import (
12
- read_column_mapping,
13
- write_column_mapping,
14
- read_scanners,
15
- write_scanners,
16
- )
17
- from run_jobs import save_job_to_pipe
18
- from text_classification import (
19
- check_model_task,
20
- preload_hf_inference_api,
21
- get_example_prediction,
22
- get_labels_and_features_from_dataset,
23
- check_hf_token_validity,
24
- HuggingFaceInferenceAPIResponse,
25
- )
26
- from wordings import (
27
- EXAMPLE_MODEL_ID,
28
- CHECK_CONFIG_OR_SPLIT_RAW,
29
- CONFIRM_MAPPING_DETAILS_FAIL_RAW,
30
- MAPPING_STYLED_ERROR_WARNING,
31
- NOT_FOUND_DATASET_RAW,
32
- NOT_FOUND_MODEL_RAW,
33
- NOT_TEXT_CLASSIFICATION_MODEL_RAW,
34
- UNMATCHED_MODEL_DATASET_STYLED_ERROR,
35
- CHECK_LOG_SECTION_RAW,
36
- VALIDATED_MODEL_DATASET_STYLED,
37
- get_dataset_fetch_error_raw,
38
- )
39
- import os
40
- from app_env import HF_WRITE_TOKEN
41
-
42
- MAX_LABELS = 40
43
  MAX_FEATURES = 20
44
 
45
- ds_dict = None
46
- ds_config = None
47
-
48
-
49
- def get_related_datasets_from_leaderboard(model_id, dataset_id_input):
50
- records = leaderboard.records
51
- model_records = records[records["model_id"] == model_id]
52
- datasets_unique = list(model_records["dataset_id"].unique())
53
-
54
- if len(datasets_unique) == 0:
55
- return gr.update(choices=[])
56
 
57
- if dataset_id_input in datasets_unique:
58
- return gr.update(choices=datasets_unique)
59
 
60
- return gr.update(choices=datasets_unique, value="")
61
-
62
-
63
- logger = logging.getLogger(__file__)
64
-
65
-
66
- def get_dataset_splits(dataset_id, dataset_config):
67
  try:
68
- splits = datasets.get_dataset_split_names(
69
- dataset_id, dataset_config, trust_remote_code=True
70
- )
71
- return gr.update(choices=splits, value=splits[0], visible=True)
72
- except Exception as e:
73
- logger.warning(
74
- f"Check your dataset {dataset_id} and config {dataset_config}: {e}"
75
- )
76
- return gr.update(visible=False)
77
-
78
 
79
- def check_dataset(dataset_id):
80
- logger.info(f"Loading {dataset_id}")
81
- if not dataset_id or len(dataset_id) == 0:
82
- return (gr.update(visible=False), gr.update(visible=False), "")
83
 
 
84
  try:
85
- configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
86
- if len(configs) == 0:
87
- return (gr.update(visible=False), gr.update(visible=False), "")
88
- splits = datasets.get_dataset_split_names(
89
- dataset_id, configs[0], trust_remote_code=True
90
- )
91
- return (
92
- gr.update(choices=configs, value=configs[0], visible=True),
93
- gr.update(choices=splits, value=splits[0], visible=True),
94
- "",
95
- )
96
- except Exception as e:
97
- logger.warning(f"Check your dataset {dataset_id}: {e}")
98
- if "doesn't exist on the Hub or cannot be accessed" in str(e):
99
- gr.Warning(NOT_FOUND_DATASET_RAW)
100
- elif "forbidden" in str(e).lower():
101
- # GSK-2770: illegal name
102
- gr.Warning(get_dataset_fetch_error_raw(e))
103
- else:
104
- # Unknown error
105
- gr.Warning(get_dataset_fetch_error_raw(e))
106
 
107
- return (gr.update(visible=False), gr.update(visible=False), "")
108
 
 
 
 
 
 
 
 
109
 
110
- def empty_column_mapping(uid):
111
- write_column_mapping(None, uid)
112
 
 
 
 
 
 
113
 
114
- def write_column_mapping_to_config(uid, *labels):
 
 
 
115
  # TODO: Substitute 'text' with more features for zero-shot
116
  # we are not using ds features because we only support "text" for now
117
- all_mappings = read_column_mapping(uid)
118
-
 
119
  if labels is None:
120
  return
121
- all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
122
- all_mappings = export_mappings(
123
- all_mappings,
124
- "features",
125
- ["text"],
126
- labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)],
127
- )
128
-
129
- write_column_mapping(all_mappings, uid)
130
-
131
-
132
- def export_mappings(all_mappings, key, subkeys, values):
133
- if key not in all_mappings.keys():
134
- all_mappings[key] = dict()
135
- if subkeys is None:
136
- subkeys = list(all_mappings[key].keys())
137
 
138
- if not subkeys:
139
- logging.debug(f"subkeys is empty for {key}")
140
- return all_mappings
141
 
142
- for i, subkey in enumerate(subkeys):
143
- if subkey:
144
- all_mappings[key][subkey] = values[i % len(values)]
145
- return all_mappings
146
-
147
-
148
- def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
149
- all_mappings = read_column_mapping(uid)
150
- # For flattened raw datasets with no labels
151
- # check if there are shared labels between model and dataset
152
- shared_labels = set(model_labels).intersection(set(ds_labels))
153
- if shared_labels:
154
- ds_labels = list(shared_labels)
155
- if len(ds_labels) > MAX_LABELS:
156
- ds_labels = ds_labels[:MAX_LABELS]
157
- gr.Warning(
158
- f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd."
159
- )
160
 
161
- # sort labels to make sure the order is consistent
162
- # prediction gives the order based on probability
163
- ds_labels.sort()
164
- model_labels.sort()
165
 
 
 
 
166
  lables = [
167
  gr.Dropdown(
168
  label=f"{label}",
169
  choices=model_labels,
170
- value=model_labels[i % len(model_labels)],
171
  interactive=True,
172
  visible=True,
173
  )
174
- for i, label in enumerate(ds_labels)
175
  ]
176
  lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
177
- all_mappings = export_mappings(all_mappings, "labels", ds_labels, model_labels)
178
-
179
  # TODO: Substitute 'text' with more features for zero-shot
180
  features = [
181
  gr.Dropdown(
@@ -190,122 +117,18 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
190
  features += [
191
  gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
192
  ]
193
- all_mappings = export_mappings(all_mappings, "features", ["text"], ds_features)
194
- write_column_mapping(all_mappings, uid)
195
-
196
  return lables + features
197
 
198
 
199
- def precheck_model_ds_enable_example_btn(
200
  model_id, dataset_id, dataset_config, dataset_split
201
  ):
202
- model_task = check_model_task(model_id)
203
- if not model_task:
204
- # Model might be not found
205
- error_msg_html = f"<p style='color: red;'>{NOT_FOUND_MODEL_RAW}</p>"
206
- if model_id.startswith("http://") or model_id.startswith("https://"):
207
- error_msg = f"Please input your model id, such as {EXAMPLE_MODEL_ID}, instead of URL"
208
- error_msg_html = f"<p style='color: red;'>{error_msg}</p>"
209
-
210
  return (
211
- gr.update(interactive=False),
212
- gr.update(visible=False),
213
- gr.update(visible=False),
214
- gr.update(visible=False),
215
- gr.update(visible=False),
216
- gr.update(visible=False),
217
- gr.update(value=error_msg_html, visible=True),
218
- )
219
-
220
- if model_task != "text-classification":
221
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
222
- return (
223
- gr.update(interactive=False),
224
- gr.update(value=df, visible=True),
225
- gr.update(visible=False),
226
- gr.update(visible=False),
227
- gr.update(visible=False),
228
- gr.update(visible=False),
229
- gr.update(
230
- value=f"<p style='color: red;'>{NOT_TEXT_CLASSIFICATION_MODEL_RAW}",
231
- visible=True,
232
- ),
233
- )
234
-
235
- preload_hf_inference_api(model_id)
236
-
237
- if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
238
- return (
239
- gr.update(interactive=False),
240
- gr.update(visible=False),
241
- gr.update(visible=False),
242
- gr.update(visible=False),
243
- gr.update(visible=False),
244
- gr.update(visible=False),
245
- gr.update(visible=False),
246
- )
247
-
248
- try:
249
- ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
250
- df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
251
- ds_labels, ds_features, _ = get_labels_and_features_from_dataset(
252
- ds[dataset_split]
253
- )
254
-
255
- if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
256
- gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
257
- return (
258
- gr.update(interactive=False),
259
- gr.update(value=df, visible=True),
260
- gr.update(visible=False),
261
- gr.update(visible=False),
262
- gr.update(visible=False),
263
- gr.update(visible=False),
264
- gr.update(visible=False),
265
- )
266
-
267
- return (
268
- gr.update(interactive=True),
269
- gr.update(value=df, visible=True),
270
- gr.update(visible=False),
271
- gr.update(visible=False),
272
- gr.update(visible=False),
273
- gr.update(visible=False),
274
- gr.update(visible=False),
275
- )
276
- except Exception as e:
277
- # Config or split wrong
278
- logger.warning(
279
- f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}"
280
- )
281
- return (
282
- gr.update(interactive=False),
283
- gr.update(visible=False),
284
- gr.update(visible=False),
285
- gr.update(visible=False),
286
  gr.update(visible=False),
287
  gr.update(visible=False),
288
- gr.update(visible=False),
289
- )
290
-
291
-
292
- def align_columns_and_show_prediction(
293
- model_id,
294
- dataset_id,
295
- dataset_config,
296
- dataset_split,
297
- uid,
298
- inference_token,
299
- ):
300
- model_task = check_model_task(model_id)
301
- if model_task is None or model_task != "text-classification":
302
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
303
- return (
304
- gr.update(visible=False),
305
- gr.update(visible=False),
306
- gr.update(visible=False, open=False),
307
- gr.update(interactive=False),
308
- "",
309
  *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
310
  )
311
 
@@ -313,229 +136,131 @@ def align_columns_and_show_prediction(
313
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
314
  ]
315
 
316
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
317
-
318
- prediction_input, prediction_response = get_example_prediction(
319
- model_id, dataset_id, dataset_config, dataset_split, hf_token
320
- )
321
-
322
- if prediction_input is None or prediction_response is None:
323
- return (
324
- gr.update(visible=False),
325
- gr.update(visible=False),
326
- gr.update(visible=False),
327
- gr.update(visible=False, open=False),
328
- gr.update(interactive=False),
329
- "",
330
- *dropdown_placement,
331
- )
332
-
333
- if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
334
  return (
335
- gr.update(visible=False),
336
  gr.update(visible=False),
337
  gr.update(visible=False),
338
  gr.update(visible=False, open=False),
339
- gr.update(interactive=False),
340
- f"Hugging Face Inference API is loading your model. {prediction_response.message}",
341
  *dropdown_placement,
342
  )
343
-
344
- model_labels = list(prediction_response.keys())
345
-
346
- ds = datasets.load_dataset(
347
- dataset_id, dataset_config, split=dataset_split, trust_remote_code=True
348
  )
349
- ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
350
 
351
  # when dataset does not have labels or features
352
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
353
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
354
  return (
355
- gr.update(visible=False),
356
- gr.update(visible=False),
357
- gr.update(visible=False),
358
- gr.update(visible=False, open=False),
359
- gr.update(interactive=False),
360
- "",
361
- *dropdown_placement,
362
- )
363
-
364
- if len(ds_labels) != len(model_labels):
365
- return (
366
- gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
367
  gr.update(visible=False),
368
  gr.update(visible=False),
369
  gr.update(visible=False, open=False),
370
- gr.update(interactive=False),
371
- "",
372
  *dropdown_placement,
373
  )
374
 
375
  column_mappings = list_labels_and_features_from_dataset(
376
  ds_labels,
377
  ds_features,
378
- model_labels,
379
- uid,
380
  )
381
 
382
  # when labels or features are not aligned
383
  # show manually column mapping
384
  if (
385
- collections.Counter(model_labels) != collections.Counter(ds_labels)
386
  or ds_features[0] != "text"
387
  ):
388
  return (
389
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
390
- gr.update(
391
- value=prediction_input,
392
- lines=min(len(prediction_input) // 225 + 1, 5),
393
- visible=True,
394
- ),
395
- gr.update(value=prediction_response, visible=True),
396
  gr.update(visible=True, open=True),
397
- gr.update(interactive=(inference_token != "")),
398
- "",
399
  *column_mappings,
400
  )
401
 
 
 
 
402
  return (
403
- gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
404
- gr.update(
405
- value=prediction_input,
406
- lines=min(len(prediction_input) // 225 + 1, 5),
407
- visible=True,
408
- ),
409
- gr.update(value=prediction_response, visible=True),
410
  gr.update(visible=True, open=False),
411
- gr.update(interactive=(inference_token != "")),
412
- "",
413
  *column_mappings,
414
  )
415
 
416
 
417
- def check_column_mapping_keys_validity(all_mappings):
 
 
418
  if all_mappings is None:
419
- logger.warning("all_mapping is None")
420
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
421
- return False
422
 
423
  if "labels" not in all_mappings.keys():
424
- logger.warning(f"Label mapping is not valid, all_mappings: {all_mappings}")
425
- return False
426
-
427
- return True
428
-
429
-
430
- def enable_run_btn(
431
- uid, inference_token, model_id, dataset_id, dataset_config, dataset_split
432
- ):
433
- if inference_token == "":
434
- logger.warning("Inference API is not enabled")
435
- return gr.update(interactive=False)
436
- if (
437
- model_id == ""
438
- or dataset_id == ""
439
- or dataset_config == ""
440
- or dataset_split == ""
441
- ):
442
- logger.warning("Model id or dataset id is not selected")
443
- return gr.update(interactive=False)
444
-
445
- all_mappings = read_column_mapping(uid)
446
- if not check_column_mapping_keys_validity(all_mappings):
447
- logger.warning("Column mapping is not valid")
448
- return gr.update(interactive=False)
449
-
450
- if not check_hf_token_validity(inference_token):
451
- logger.warning("HF token is not valid")
452
- return gr.update(interactive=False)
453
- return gr.update(interactive=True)
454
-
455
-
456
- def construct_label_and_feature_mapping(
457
- all_mappings, ds_labels, ds_features, label_keys=None
458
- ):
459
  label_mapping = {}
460
- if len(all_mappings["labels"].keys()) != len(ds_labels):
461
- logger.warning(
462
- f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
463
- \nall_mappings: {all_mappings}\nds_labels: {ds_labels}"""
464
- )
465
-
466
- if len(all_mappings["features"].keys()) != len(ds_features):
467
- logger.warning(
468
- f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
469
- \nall_mappings: {all_mappings}\nds_features: {ds_features}"""
470
- )
471
-
472
- for i, label in zip(range(len(ds_labels)), ds_labels):
473
- # align the saved labels with dataset labels order
474
- label_mapping.update({str(i): all_mappings["labels"][label]})
475
 
476
  if "features" not in all_mappings.keys():
477
- logger.warning("features not in all_mappings")
478
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
479
-
480
- feature_mapping = all_mappings["features"]
481
- if len(label_keys) > 0:
482
- feature_mapping.update({"label": label_keys[0]})
483
- return label_mapping, feature_mapping
484
-
485
-
486
- def show_hf_token_info(token):
487
- valid = check_hf_token_validity(token)
488
- if not valid:
489
- return gr.update(visible=True)
490
- return gr.update(visible=False)
491
-
492
-
493
- def try_submit(m_id, d_id, config, split, inference_token, uid, verbose):
494
- all_mappings = read_column_mapping(uid)
495
- if not check_column_mapping_keys_validity(all_mappings):
496
  return (gr.update(interactive=True), gr.update(visible=False))
 
497
 
498
- # get ds labels and features again for alignment
499
- ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
500
- ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
501
- label_mapping, feature_mapping = construct_label_and_feature_mapping(
502
- all_mappings, ds_labels, ds_features, label_keys
503
- )
504
-
505
- eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
506
- save_job_to_pipe(
507
- uid,
508
- (
509
  m_id,
 
510
  d_id,
 
511
  config,
 
512
  split,
513
- inference_token,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  uid,
515
- label_mapping,
516
- feature_mapping,
517
- verbose,
518
- ),
519
- eval_str,
520
- threading.Lock(),
521
- )
522
- gr.Info("Your evaluation has been submitted")
523
 
524
- new_uid = uuid.uuid4()
525
- scanners = read_scanners(uid)
526
- write_scanners(scanners, new_uid)
527
 
528
- return (
529
- gr.update(interactive=False), # Submit button
530
- gr.update(
531
- value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ",
532
- lines=5,
533
- visible=True,
534
- interactive=False,
535
- ),
536
- new_uid, # Allocate a new uuid
537
- gr.update(visible=False),
538
- gr.update(visible=False),
539
- gr.update(visible=False),
540
- gr.update(visible=False),
541
- )
 
1
  import collections
2
+ import json
3
  import logging
4
+ import os
5
  import threading
 
6
 
7
  import datasets
8
  import gradio as gr
9
+ from transformers.pipelines import TextClassificationPipeline
10
+ from wordings import get_styled_input
11
+
12
+ from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
13
+ write_column_mapping, write_inference_type,
14
+ write_log_to_user_file)
15
+ from text_classification import (check_model, get_example_prediction,
16
+ get_labels_and_features_from_dataset)
17
+ from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
18
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW,
19
+ MAPPING_STYLED_ERROR_WARNING)
20
+
21
+ MAX_LABELS = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  MAX_FEATURES = 20
23
 
24
+ HF_REPO_ID = "HF_REPO_ID"
25
+ HF_SPACE_ID = "SPACE_ID"
26
+ HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
 
 
 
 
 
 
 
 
27
 
 
 
28
 
29
+ def check_dataset_and_get_config(dataset_id):
 
 
 
 
 
 
30
  try:
31
+ # write_column_mapping(None, uid) # reset column mapping
32
+ configs = datasets.get_dataset_config_names(dataset_id)
33
+ return gr.Dropdown(configs, value=configs[0], visible=True)
34
+ except Exception:
35
+ # Dataset may not exist
36
+ pass
 
 
 
 
37
 
 
 
 
 
38
 
39
+ def check_dataset_and_get_split(dataset_id, dataset_config):
40
  try:
41
+ splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
42
+ return gr.Dropdown(splits, value=splits[0], visible=True)
43
+ except Exception:
44
+ # Dataset may not exist
45
+ # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
46
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
 
48
 
49
+ def select_run_mode(run_inf, inf_token, uid):
50
+ if run_inf:
51
+ if len(inf_token) > 0:
52
+ write_inference_type(run_inf, inf_token, uid)
53
+ return (gr.update(visible=True), gr.update(value=False))
54
+ else:
55
+ return (gr.update(visible=False), gr.update(value=True))
56
 
 
 
57
 
58
+ def deselect_run_inference(run_local):
59
+ if run_local:
60
+ return (gr.update(visible=False), gr.update(value=False))
61
+ else:
62
+ return (gr.update(visible=True), gr.update(value=True))
63
 
64
+
65
+ def write_column_mapping_to_config(
66
+ dataset_id, dataset_config, dataset_split, uid, *labels
67
+ ):
68
  # TODO: Substitute 'text' with more features for zero-shot
69
  # we are not using ds features because we only support "text" for now
70
+ ds_labels, _ = get_labels_and_features_from_dataset(
71
+ dataset_id, dataset_config, dataset_split
72
+ )
73
  if labels is None:
74
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ all_mappings = dict()
 
 
77
 
78
+ if "labels" not in all_mappings.keys():
79
+ all_mappings["labels"] = dict()
80
+ for i, label in enumerate(labels[:MAX_LABELS]):
81
+ if label:
82
+ all_mappings["labels"][label] = ds_labels[i % len(ds_labels)]
83
+ if "features" not in all_mappings.keys():
84
+ all_mappings["features"] = dict()
85
+ for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
86
+ if feat:
87
+ # TODO: Substitute 'text' with more features for zero-shot
88
+ all_mappings["features"]["text"] = feat
89
+ write_column_mapping(all_mappings, uid)
 
 
 
 
 
 
90
 
 
 
 
 
91
 
92
+ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
93
+ model_labels = list(model_id2label.values())
94
+ len_model_labels = len(model_labels)
95
  lables = [
96
  gr.Dropdown(
97
  label=f"{label}",
98
  choices=model_labels,
99
+ value=model_id2label[i % len_model_labels],
100
  interactive=True,
101
  visible=True,
102
  )
103
+ for i, label in enumerate(ds_labels[:MAX_LABELS])
104
  ]
105
  lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
 
 
106
  # TODO: Substitute 'text' with more features for zero-shot
107
  features = [
108
  gr.Dropdown(
 
117
  features += [
118
  gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
119
  ]
 
 
 
120
  return lables + features
121
 
122
 
123
+ def check_model_and_show_prediction(
124
  model_id, dataset_id, dataset_config, dataset_split
125
  ):
126
+ ppl = check_model(model_id)
127
+ if ppl is None or not isinstance(ppl, TextClassificationPipeline):
128
+ gr.Warning("Please check your model.")
 
 
 
 
 
129
  return (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  gr.update(visible=False),
131
  gr.update(visible=False),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
133
  )
134
 
 
136
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
137
  ]
138
 
139
+ if ppl is None: # pipeline not found
140
+ gr.Warning("Model not found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  return (
 
142
  gr.update(visible=False),
143
  gr.update(visible=False),
144
  gr.update(visible=False, open=False),
 
 
145
  *dropdown_placement,
146
  )
147
+ model_id2label = ppl.model.config.id2label
148
+ ds_labels, ds_features = get_labels_and_features_from_dataset(
149
+ dataset_id, dataset_config, dataset_split
 
 
150
  )
 
151
 
152
  # when dataset does not have labels or features
153
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
154
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
155
  return (
 
 
 
 
 
 
 
 
 
 
 
 
156
  gr.update(visible=False),
157
  gr.update(visible=False),
158
  gr.update(visible=False, open=False),
 
 
159
  *dropdown_placement,
160
  )
161
 
162
  column_mappings = list_labels_and_features_from_dataset(
163
  ds_labels,
164
  ds_features,
165
+ model_id2label,
 
166
  )
167
 
168
  # when labels or features are not aligned
169
  # show manually column mapping
170
  if (
171
+ collections.Counter(model_id2label.values()) != collections.Counter(ds_labels)
172
  or ds_features[0] != "text"
173
  ):
174
  return (
175
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
176
+ gr.update(visible=False),
 
 
 
 
 
177
  gr.update(visible=True, open=True),
 
 
178
  *column_mappings,
179
  )
180
 
181
+ prediction_input, prediction_output = get_example_prediction(
182
+ ppl, dataset_id, dataset_config, dataset_split
183
+ )
184
  return (
185
+ gr.update(value=get_styled_input(prediction_input), visible=True),
186
+ gr.update(value=prediction_output, visible=True),
 
 
 
 
 
187
  gr.update(visible=True, open=False),
 
 
188
  *column_mappings,
189
  )
190
 
191
 
192
+ def try_submit(m_id, d_id, config, split, local, uid):
193
+ all_mappings = read_column_mapping(uid)
194
+
195
  if all_mappings is None:
 
196
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
197
+ return (gr.update(interactive=True), gr.update(visible=False))
198
 
199
  if "labels" not in all_mappings.keys():
200
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
201
+ return (gr.update(interactive=True), gr.update(visible=False))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  label_mapping = {}
203
+ for i, label in zip(
204
+ range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()
205
+ ):
206
+ label_mapping.update({str(i): label})
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  if "features" not in all_mappings.keys():
 
209
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  return (gr.update(interactive=True), gr.update(visible=False))
211
+ feature_mapping = all_mappings["features"]
212
 
213
+ leaderboard_dataset = None
214
+ if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
215
+ leaderboard_dataset = "ZeroCommand/test-giskard-report"
216
+
217
+ # TODO: Set column mapping for some dataset such as `amazon_polarity`
218
+ if local:
219
+ command = [
220
+ "giskard_scanner",
221
+ "--loader",
222
+ "huggingface",
223
+ "--model",
224
  m_id,
225
+ "--dataset",
226
  d_id,
227
+ "--dataset_config",
228
  config,
229
+ "--dataset_split",
230
  split,
231
+ "--hf_token",
232
+ os.environ.get(HF_WRITE_TOKEN),
233
+ "--discussion_repo",
234
+ os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
235
+ "--output_format",
236
+ "markdown",
237
+ "--output_portal",
238
+ "huggingface",
239
+ "--feature_mapping",
240
+ json.dumps(feature_mapping),
241
+ "--label_mapping",
242
+ json.dumps(label_mapping),
243
+ "--scan_config",
244
+ get_yaml_path(uid),
245
+ "--leaderboard_dataset",
246
+ leaderboard_dataset,
247
+ ]
248
+
249
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
250
+ logging.info(f"Start local evaluation on {eval_str}")
251
+ save_job_to_pipe(uid, command, threading.Lock())
252
+ write_log_to_user_file(
253
  uid,
254
+ f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
255
+ )
256
+ gr.Info(f"Start local evaluation on {eval_str}")
257
+
258
+ return (
259
+ gr.update(interactive=False),
260
+ gr.update(lines=5, visible=True, interactive=False),
261
+ )
262
 
263
+ else:
264
+ gr.Info("TODO: Submit task to an endpoint")
 
265
 
266
+ return (gr.update(interactive=True), gr.update(visible=False)) # Submit button
 
 
 
 
 
 
 
 
 
 
 
 
 
tmp/venvs/.gitkeep DELETED
File without changes
wordings.py CHANGED
@@ -1,119 +1,44 @@
1
- EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
2
-
3
- TITLE_MD = """
4
- <div style="display: flex; justify-content: center;">
5
- <h1 style="text-align: center;">
6
- 🐢Giskard Evaluator
7
- </h1>
8
- </div>
9
- Welcome to the Giskard Evaluator Space!
10
- Get a model vulnerability report immediately and freely by simply providing the model and dataset id on Hugging Face.
11
- You can also checkout our library documentation <a href="https://docs.giskard.ai/en/latest/getting_started/quickstart/index.html">here</a>.
12
- <!-- Don’t hesitate to give the us a <a href="https://github.com/Giskard-AI/giskard">star on GitHub</a> ⭐️ if you find this work useful! -->
13
- """
14
-
15
- FOOTER_HTML = """
16
- <div style="display: flex; justify-content: center;">
17
- <p style="text-align: center;">
18
- This evaluator is built on top of&nbsp;<a href="https://github.com/Giskard-AI/giskard">Giskard Open Source library</a>
19
- and <a href="https://github.com/Giskard-AI/cicd">Giskard CI/CD</a>.
20
- <br/>
21
- If you like our work, please give the projects ⭐️ star on&nbsp;<a href="https://github.com/Giskard-AI/giskard">GitHub</a>.
22
- </p>
23
- </div>
24
- """
25
-
26
  INTRODUCTION_MD = """
27
- <div style="display: flex; justify-content: center;">
28
- <h2 style="text-align: center;">
29
- Text Classification
30
- </h2>
31
- </div>
32
  """
33
  CONFIRM_MAPPING_DETAILS_MD = """
34
  <h1 style="text-align: center;">
35
  Confirm Pre-processing Details
36
  </h1>
37
- Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model. You can select the output variable's labels from the dropdowns below.
38
  """
39
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
40
  <h1 style="text-align: center;">
41
  Confirm Pre-processing Details
42
  </h1>
43
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
44
  """
45
 
46
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
47
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
48
  """
49
 
50
  CHECK_CONFIG_OR_SPLIT_RAW = """
51
  Please check your dataset config or split.
52
  """
53
 
54
- CHECK_LOG_SECTION_RAW = """Your have successfully submitted a Giskard evaluation job. Further details are available in the Logs tab. You can find your report posted in your model's community discussion section."""
55
-
56
  PREDICTION_SAMPLE_MD = """
57
  <h1 style="text-align: center;">
58
  Model Prediction Sample
59
  </h1>
60
- Here's a sample of your model's prediction on an example from the dataset.
61
  """
62
 
63
  MAPPING_STYLED_ERROR_WARNING = """
64
- <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
65
- ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
66
  </h3>
67
  """
68
 
69
- UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
70
- <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
71
- Your model and dataset have different numbers of labels. Please double check your model and dataset.
72
- </h3>
73
- """
74
-
75
- NOT_FOUND_MODEL_RAW = """
76
- We cannot find your model on Hugging Face. Please check that the model id is correct and publicly accessible.
77
- """
78
-
79
- NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
80
- Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
81
- """
82
-
83
- USE_INFERENCE_API_TIP = """
84
- We use the <b>free</b>
85
- <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
86
- Hugging Face Inference API
87
- </a>
88
- to evaluate the models with a <a href="https://huggingface.co/docs/hub/security-tokens#user-access-tokens">Hugging Face acces token</a>.
89
- <br/>
90
- Please <a href="https://huggingface.co/settings/tokens">get your token</a> and input here. This would help us to avoid the API calling rate limit and make the evaluation faster.
91
- <br/>
92
- """
93
-
94
- USE_INFERENCE_API_NOTICE = """
95
- <b>Notice:</b>
96
- <ul>
97
- <li><b>Your token will only be used for your own evaluation, and will not be saved.</b></li>
98
- <li><b>You will not be charged for the free Hugging Face Inference API.</b></li>
99
- </ul>
100
- """
101
-
102
- HF_TOKEN_INVALID_STYLED = """
103
- <p style="text-align: left;color: red; ">
104
- Your Hugging Face token is invalid. Please double check your token.
105
- </p>
106
- """
107
-
108
- VALIDATED_MODEL_DATASET_STYLED = """
109
- <h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
110
- Your model and dataset have been validated!
111
  </h3>"""
112
-
113
-
114
- NOT_FOUND_DATASET_RAW = """
115
- We cannot find your dataset on Hugging Face. Please check that the dataset id is correct and publicly accessible.
116
- """
117
-
118
- def get_dataset_fetch_error_raw(error):
119
- return f"""Sorry you cannot use this dataset because of an unknwon error: "{error}"."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  INTRODUCTION_MD = """
2
+ <h1 style="text-align: center;">
3
+ 🐢Giskard Evaluator
4
+ </h1>
5
+ Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
 
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
9
  Confirm Pre-processing Details
10
  </h1>
11
+ Please confirm the pre-processing details below. Align the column names of your model in the <b>dropdown</b> menu to your dataset's. If you are not sure, please double check your model and dataset.
12
  """
13
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details
16
  </h1>
17
+ Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
+ Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
22
  """
23
 
24
  CHECK_CONFIG_OR_SPLIT_RAW = """
25
  Please check your dataset config or split.
26
  """
27
 
 
 
28
  PREDICTION_SAMPLE_MD = """
29
  <h1 style="text-align: center;">
30
  Model Prediction Sample
31
  </h1>
32
+ Here is a sample prediction from your model based on your dataset.
33
  """
34
 
35
  MAPPING_STYLED_ERROR_WARNING = """
36
+ <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
37
+ Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
38
  </h3>
39
  """
40
 
41
+ def get_styled_input(input):
42
+ return f"""<h3 style="text-align: center;color: #5ec26a; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
43
+ Sample input: {input}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  </h3>"""