ZeroCommand commited on
Commit
0607989
·
verified ·
1 Parent(s): 5f9a95f

GSK-2774-GSK-2771-GSK-2772 (#101)

Browse files

- remove overused warnings & fix wording & prevent un-matchable models and datasets submissions (f9983aba4d3aaf2e17a17669a4086819e65c09ae)
- add job id and rephrase (ed207aeeb43f12280829553f761cc837273da1ac)
- fix bypassing validation possibility (1ead652bc86135baf9fa7b42b391b649a951960a)
- add trust remote code param for dataset with scripts (52ba35194688f68a903fc477676209e2d3aa2708)
- add trust remote code to get dataset config names (4b5940140b89343e08e95bbe6ce2bb7f0b4c753b)
- add persistent error code when number of labels not matching (c680d9a2f682bf992e6753ec3aebb2ffe9938de3)
- add wording for guiding user to find the report (8a71b006571950be12c658575f5633127dc6fd9d)
- add hf token validation (346fe42776f7ce2da20956a78170c6d81f1820fd)
- add error msg for token invalid (20294008e5d228049c136e3fe9013feb27694bec)
- change hf token valid wording style (0c7a6488ba03aa0b9768118cc9c1e71865448e8c)
- wrap hf dataset error (55c122a303573ff10da3b52213a1158c7e7fc66e)

app_leaderboard.py CHANGED
@@ -21,7 +21,7 @@ def get_records_from_dataset_repo(dataset_id):
21
  logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
22
 
23
  try:
24
- ds = datasets.load_dataset(dataset_id, dataset_config[0])[dataset_split[0]]
25
  df = ds.to_pandas()
26
  return df
27
  except Exception as e:
 
21
  logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
22
 
23
  try:
24
+ ds = datasets.load_dataset(dataset_id, dataset_config[0], split=dataset_split[0])
25
  df = ds.to_pandas()
26
  return df
27
  except Exception as e:
app_text_classification.py CHANGED
@@ -2,7 +2,7 @@ import uuid
2
 
3
  import gradio as gr
4
 
5
- from io_utils import get_logs_file, read_scanners, write_scanners
6
  from text_classification_ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
@@ -11,7 +11,19 @@ from text_classification_ui_helpers import (
11
  try_submit,
12
  write_column_mapping_to_config,
13
  )
14
- from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD, USE_INFERENCE_API_TIP
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  MAX_LABELS = 40
17
  MAX_FEATURES = 20
@@ -89,6 +101,13 @@ def get_demo():
89
  visible=True,
90
  interactive=True,
91
  )
 
 
 
 
 
 
 
92
 
93
  with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
94
  scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
@@ -96,7 +115,7 @@ def get_demo():
96
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
97
  def get_scanners(uid):
98
  selected = read_scanners(uid)
99
- # currently we remove data_leakage from the default scanners
100
  # Reason: data_leakage barely raises any issues and takes too many requests
101
  # when using inference API, causing rate limit error
102
  scan_config = selected + ["data_leakage"]
@@ -114,8 +133,8 @@ def get_demo():
114
 
115
  with gr.Row():
116
  logs = gr.Textbox(
117
- value=get_logs_file,
118
- label="Giskard Bot Evaluation Log:",
119
  visible=False,
120
  every=0.5,
121
  )
@@ -135,7 +154,7 @@ def get_demo():
135
  )
136
 
137
  gr.on(
138
- triggers=[dataset_id_input.input],
139
  fn=check_dataset,
140
  inputs=[dataset_id_input],
141
  outputs=[dataset_config_input, dataset_split_input, loading_status]
@@ -223,6 +242,14 @@ def get_demo():
223
  return gr.update(interactive=False)
224
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
225
  return gr.update(interactive=False)
 
 
 
 
 
 
 
 
226
  return gr.update(interactive=True)
227
 
228
  gr.on(
 
2
 
3
  import gradio as gr
4
 
5
+ from io_utils import read_scanners, write_scanners
6
  from text_classification_ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
 
11
  try_submit,
12
  write_column_mapping_to_config,
13
  )
14
+
15
+ from text_classification import (
16
+ get_example_prediction,
17
+ check_hf_token_validity,
18
+ HuggingFaceInferenceAPIResponse
19
+ )
20
+ from wordings import (
21
+ CONFIRM_MAPPING_DETAILS_MD,
22
+ INTRODUCTION_MD,
23
+ USE_INFERENCE_API_TIP,
24
+ CHECK_LOG_SECTION_RAW,
25
+ HF_TOKEN_INVALID_STYLED
26
+ )
27
 
28
  MAX_LABELS = 40
29
  MAX_FEATURES = 20
 
101
  visible=True,
102
  interactive=True,
103
  )
104
+ inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
105
+
106
+ inference_token.change(
107
+ lambda token: gr.update(visible=lambda: check_hf_token_validity(token)),
108
+ inputs=[inference_token],
109
+ outputs=[inference_token_info],
110
+ )
111
 
112
  with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
113
  scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
 
115
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
116
  def get_scanners(uid):
117
  selected = read_scanners(uid)
118
+ # we remove data_leakage from the default scanners
119
  # Reason: data_leakage barely raises any issues and takes too many requests
120
  # when using inference API, causing rate limit error
121
  scan_config = selected + ["data_leakage"]
 
133
 
134
  with gr.Row():
135
  logs = gr.Textbox(
136
+ value=CHECK_LOG_SECTION_RAW,
137
+ label="Giskard Bot Evaluation Guide:",
138
  visible=False,
139
  every=0.5,
140
  )
 
154
  )
155
 
156
  gr.on(
157
+ triggers=[dataset_id_input.change],
158
  fn=check_dataset,
159
  inputs=[dataset_id_input],
160
  outputs=[dataset_config_input, dataset_split_input, loading_status]
 
242
  return gr.update(interactive=False)
243
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
244
  return gr.update(interactive=False)
245
+ if not column_mapping_accordion.visible:
246
+ return gr.update(interactive=False)
247
+ _, prediction_response = get_example_prediction(
248
+ model_id, dataset_id, dataset_config, dataset_split, inference_token
249
+ )
250
+ if not isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
251
+ gr.warning("Your HF token is invalid. Please check your token.")
252
+ return gr.update(interactive=False)
253
  return gr.update(interactive=True)
254
 
255
  gr.on(
fetch_utils.py CHANGED
@@ -5,7 +5,7 @@ import datasets
5
 
6
  def check_dataset_and_get_config(dataset_id):
7
  try:
8
- configs = datasets.get_dataset_config_names(dataset_id)
9
  return configs
10
  except Exception:
11
  # Dataset may not exist
@@ -14,7 +14,7 @@ def check_dataset_and_get_config(dataset_id):
14
 
15
  def check_dataset_and_get_split(dataset_id, dataset_config):
16
  try:
17
- ds = datasets.load_dataset(dataset_id, dataset_config)
18
  except Exception as e:
19
  # Dataset may not exist
20
  logging.warning(
 
5
 
6
  def check_dataset_and_get_config(dataset_id):
7
  try:
8
+ configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
9
  return configs
10
  except Exception:
11
  # Dataset may not exist
 
14
 
15
  def check_dataset_and_get_split(dataset_id, dataset_config):
16
  try:
17
+ ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
18
  except Exception as e:
19
  # Dataset may not exist
20
  logging.warning(
text_classification.py CHANGED
@@ -254,7 +254,7 @@ def infer_output_label_column(
254
 
255
  def check_dataset_features_validity(d_id, config, split):
256
  # We assume dataset is ok here
257
- ds = datasets.load_dataset(d_id, config)[split]
258
  try:
259
  dataset_features = ds.features
260
  except AttributeError:
@@ -272,20 +272,19 @@ def select_the_first_string_column(ds):
272
  return None
273
 
274
 
275
- def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
276
  # get a sample prediction from the model on the dataset
277
  prediction_input = None
278
  prediction_result = None
279
  try:
280
  # Use the first item to test prediction
281
- ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
282
  if "text" not in ds.features.keys():
283
  # Dataset does not have text column
284
  prediction_input = ds[0][select_the_first_string_column(ds)]
285
  else:
286
  prediction_input = ds[0]["text"]
287
-
288
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
289
  payload = {"inputs": prediction_input, "options": {"use_cache": True}}
290
  results = hf_inference_api(model_id, hf_token, payload)
291
 
@@ -381,4 +380,16 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
381
  def strip_model_id_from_url(model_id):
382
  if model_id.startswith("https://huggingface.co/"):
383
  return "/".join(model_id.split("/")[-2])
384
- return model_id
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  def check_dataset_features_validity(d_id, config, split):
256
  # We assume dataset is ok here
257
+ ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
258
  try:
259
  dataset_features = ds.features
260
  except AttributeError:
 
272
  return None
273
 
274
 
275
+ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split, hf_token):
276
  # get a sample prediction from the model on the dataset
277
  prediction_input = None
278
  prediction_result = None
279
  try:
280
  # Use the first item to test prediction
281
+ ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
282
  if "text" not in ds.features.keys():
283
  # Dataset does not have text column
284
  prediction_input = ds[0][select_the_first_string_column(ds)]
285
  else:
286
  prediction_input = ds[0]["text"]
287
+
 
288
  payload = {"inputs": prediction_input, "options": {"use_cache": True}}
289
  results = hf_inference_api(model_id, hf_token, payload)
290
 
 
380
  def strip_model_id_from_url(model_id):
381
  if model_id.startswith("https://huggingface.co/"):
382
  return "/".join(model_id.split("/")[-2])
383
+ return model_id
384
+
385
+ def check_hf_token_validity(hf_token):
386
+ if hf_token == "":
387
+ return False
388
+ if not isinstance(hf_token, str):
389
+ return False
390
+ # use inference api to check the token
391
+ payload = {"inputs": "This is a test", "options": {"use_cache": True}}
392
+ response = hf_inference_api("cardiffnlp/twitter-roberta-base-sentiment-latest", hf_token, payload)
393
+ if "error" in response:
394
+ return False
395
+ return True
text_classification_ui_helpers.py CHANGED
@@ -23,8 +23,12 @@ from wordings import (
23
  CONFIRM_MAPPING_DETAILS_FAIL_RAW,
24
  MAPPING_STYLED_ERROR_WARNING,
25
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
 
 
26
  get_styled_input,
 
27
  )
 
28
 
29
  MAX_LABELS = 40
30
  MAX_FEATURES = 20
@@ -41,7 +45,7 @@ def get_related_datasets_from_leaderboard(model_id):
41
  if len(datasets_unique) == 0:
42
  return gr.update(choices=[], value="")
43
 
44
- return gr.update(choices=datasets_unique, value=datasets_unique[0])
45
 
46
 
47
  logger = logging.getLogger(__file__)
@@ -50,18 +54,16 @@ logger = logging.getLogger(__file__)
50
  def check_dataset(dataset_id):
51
  logger.info(f"Loading {dataset_id}")
52
  try:
53
- configs = datasets.get_dataset_config_names(dataset_id)
54
  if len(configs) == 0:
55
  return (
56
  gr.update(),
57
  gr.update(),
58
  ""
59
  )
60
- splits = list(
61
- datasets.load_dataset(
62
- dataset_id, configs[0]
63
- ).keys()
64
- )
65
  return (
66
  gr.update(choices=configs, value=configs[0], visible=True),
67
  gr.update(choices=splits, value=splits[0], visible=True),
@@ -69,6 +71,8 @@ def check_dataset(dataset_id):
69
  )
70
  except Exception as e:
71
  logger.warn(f"Check your dataset {dataset_id}: {e}")
 
 
72
  return (
73
  gr.update(),
74
  gr.update(),
@@ -174,7 +178,7 @@ def precheck_model_ds_enable_example_btn(
174
  return (gr.update(), gr.update(), "")
175
 
176
  try:
177
- ds = datasets.load_dataset(dataset_id, dataset_config)
178
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
179
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
180
 
@@ -185,7 +189,7 @@ def precheck_model_ds_enable_example_btn(
185
  return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
186
  except Exception as e:
187
  # Config or split wrong
188
- gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
189
  return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
190
 
191
 
@@ -214,9 +218,11 @@ def align_columns_and_show_prediction(
214
  dropdown_placement = [
215
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
216
  ]
 
 
217
 
218
  prediction_input, prediction_response = get_example_prediction(
219
- model_id, dataset_id, dataset_config, dataset_split
220
  )
221
 
222
  if prediction_input is None or prediction_response is None:
@@ -241,7 +247,7 @@ def align_columns_and_show_prediction(
241
 
242
  model_labels = list(prediction_response.keys())
243
 
244
- ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
245
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
246
 
247
  # when dataset does not have labels or features
@@ -255,6 +261,16 @@ def align_columns_and_show_prediction(
255
  "",
256
  *dropdown_placement,
257
  )
 
 
 
 
 
 
 
 
 
 
258
 
259
  column_mappings = list_labels_and_features_from_dataset(
260
  ds_labels,
@@ -301,10 +317,10 @@ def check_column_mapping_keys_validity(all_mappings):
301
  def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
302
  label_mapping = {}
303
  if len(all_mappings["labels"].keys()) != len(ds_labels):
304
- gr.Warning("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
305
 
306
  if len(all_mappings["features"].keys()) != len(ds_features):
307
- gr.Warning("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
308
 
309
  for i, label in zip(range(len(ds_labels)), ds_labels):
310
  # align the saved labels with dataset labels order
@@ -315,13 +331,12 @@ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
315
  feature_mapping = all_mappings["features"]
316
  return label_mapping, feature_mapping
317
 
318
-
319
  def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
320
  all_mappings = read_column_mapping(uid)
321
  check_column_mapping_keys_validity(all_mappings)
322
 
323
  # get ds labels and features again for alignment
324
- ds = datasets.load_dataset(d_id, config)[split]
325
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
326
  label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
327
 
@@ -346,6 +361,6 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
346
 
347
  return (
348
  gr.update(interactive=False), # Submit button
349
- gr.update(lines=5, visible=True, interactive=False),
350
  uuid.uuid4(), # Allocate a new uuid
351
  )
 
23
  CONFIRM_MAPPING_DETAILS_FAIL_RAW,
24
  MAPPING_STYLED_ERROR_WARNING,
25
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
26
+ UNMATCHED_MODEL_DATASET_STYLED_ERROR,
27
+ CHECK_LOG_SECTION_RAW,
28
  get_styled_input,
29
+ get_dataset_fetch_error_raw,
30
  )
31
+ import os
32
 
33
  MAX_LABELS = 40
34
  MAX_FEATURES = 20
 
45
  if len(datasets_unique) == 0:
46
  return gr.update(choices=[], value="")
47
 
48
+ return gr.update(choices=datasets_unique, value="")
49
 
50
 
51
  logger = logging.getLogger(__file__)
 
54
  def check_dataset(dataset_id):
55
  logger.info(f"Loading {dataset_id}")
56
  try:
57
+ configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
58
  if len(configs) == 0:
59
  return (
60
  gr.update(),
61
  gr.update(),
62
  ""
63
  )
64
+ splits = datasets.get_dataset_split_names(
65
+ dataset_id, configs[0], trust_remote_code=True
66
+ )
 
 
67
  return (
68
  gr.update(choices=configs, value=configs[0], visible=True),
69
  gr.update(choices=splits, value=splits[0], visible=True),
 
71
  )
72
  except Exception as e:
73
  logger.warn(f"Check your dataset {dataset_id}: {e}")
74
+ if "forbidden" in str(e).lower(): # GSK-2770
75
+ gr.warning(get_dataset_fetch_error_raw(e))
76
  return (
77
  gr.update(),
78
  gr.update(),
 
178
  return (gr.update(), gr.update(), "")
179
 
180
  try:
181
+ ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
182
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
183
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
184
 
 
189
  return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
190
  except Exception as e:
191
  # Config or split wrong
192
+ logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
193
  return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
194
 
195
 
 
218
  dropdown_placement = [
219
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
220
  ]
221
+
222
+ hf_token = os.environ.get("HF_WRITE_TOKEN", default="")
223
 
224
  prediction_input, prediction_response = get_example_prediction(
225
+ model_id, dataset_id, dataset_config, dataset_split, hf_token
226
  )
227
 
228
  if prediction_input is None or prediction_response is None:
 
247
 
248
  model_labels = list(prediction_response.keys())
249
 
250
+ ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
251
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
252
 
253
  # when dataset does not have labels or features
 
261
  "",
262
  *dropdown_placement,
263
  )
264
+
265
+ if len(ds_labels) != len(model_labels):
266
+ return (
267
+ gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
268
+ gr.update(visible=False),
269
+ gr.update(visible=False, open=False),
270
+ gr.update(interactive=False),
271
+ "",
272
+ *dropdown_placement,
273
+ )
274
 
275
  column_mappings = list_labels_and_features_from_dataset(
276
  ds_labels,
 
317
  def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
318
  label_mapping = {}
319
  if len(all_mappings["labels"].keys()) != len(ds_labels):
320
+ logger.warn("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
321
 
322
  if len(all_mappings["features"].keys()) != len(ds_features):
323
+ logger.warn("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
324
 
325
  for i, label in zip(range(len(ds_labels)), ds_labels):
326
  # align the saved labels with dataset labels order
 
331
  feature_mapping = all_mappings["features"]
332
  return label_mapping, feature_mapping
333
 
 
334
  def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
335
  all_mappings = read_column_mapping(uid)
336
  check_column_mapping_keys_validity(all_mappings)
337
 
338
  # get ds labels and features again for alignment
339
+ ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
340
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
341
  label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
342
 
 
361
 
362
  return (
363
  gr.update(interactive=False), # Submit button
364
+ gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
365
  uuid.uuid4(), # Allocate a new uuid
366
  )
wordings.py CHANGED
@@ -2,7 +2,7 @@ INTRODUCTION_MD = """
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator
4
  </h1>
5
- Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
@@ -18,13 +18,17 @@ CONFIRM_MAPPING_DETAILS_FAIL_MD = """
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
- Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
22
  """
23
 
24
  CHECK_CONFIG_OR_SPLIT_RAW = """
25
  Please check your dataset config or split.
26
  """
27
 
 
 
 
 
28
  PREDICTION_SAMPLE_MD = """
29
  <h1 style="text-align: center;">
30
  Model Prediction Sample
@@ -33,11 +37,17 @@ PREDICTION_SAMPLE_MD = """
33
  """
34
 
35
  MAPPING_STYLED_ERROR_WARNING = """
36
- <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
37
  Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
38
  </h3>
39
  """
40
 
 
 
 
 
 
 
41
  NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
42
  Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
43
  """
@@ -61,7 +71,16 @@ USE_INFERENCE_API_TIP = """
61
  </b>
62
  """
63
 
 
 
 
 
 
 
 
 
 
64
  def get_styled_input(input):
65
  return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
66
- Sample input: {input}
67
  </h3>"""
 
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator
4
  </h1>
5
+ Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model.
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
 
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
+ Sorry, we cannot auto-align the input/output of your dataset with the model.
22
  """
23
 
24
  CHECK_CONFIG_OR_SPLIT_RAW = """
25
  Please check your dataset config or split.
26
  """
27
 
28
+ CHECK_LOG_SECTION_RAW = """
29
+ Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab. You can find your report will be posted to your model's community discussion.
30
+ """
31
+
32
  PREDICTION_SAMPLE_MD = """
33
  <h1 style="text-align: center;">
34
  Model Prediction Sample
 
37
  """
38
 
39
  MAPPING_STYLED_ERROR_WARNING = """
40
+ <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
41
  Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
42
  </h3>
43
  """
44
 
45
+ UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
46
+ <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
47
+ Your model and dataset have different numbers of labels. Please double check your model and dataset.
48
+ </h3>
49
+ """
50
+
51
  NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
52
  Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
53
  """
 
71
  </b>
72
  """
73
 
74
+ HF_TOKEN_INVALID_STYLED= """
75
+ <p style="text-align: left;color: red; ">
76
+ Your Hugging Face token is invalid. Please double check your token.
77
+ </p>
78
+ """
79
+
80
+ def get_dataset_fetch_error_raw(error):
81
+ return f"""Sorry you cannot use this dataset because {error} Contact HF team to support this dataset."""
82
+
83
  def get_styled_input(input):
84
  return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
85
+ Your model and dataset have been validated! <br /> Sample input: {input}
86
  </h3>"""