lewtun HF staff commited on
Commit
31b9ddb
β€’
1 Parent(s): a267f6f

Integrate Omar's feedback

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +26 -12
  3. requirements.txt +1 -1
  4. utils.py +4 -4
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ“Š
4
  colorFrom: red
5
  colorTo: red
6
  sdk: streamlit
7
- sdk_version: 1.2.0
8
  app_file: app.py
9
  ---
10
 
 
4
  colorFrom: red
5
  colorTo: red
6
  sdk: streamlit
7
+ sdk_version: 1.10.0
8
  app_file: app.py
9
  ---
10
 
app.py CHANGED
@@ -59,9 +59,12 @@ SUPPORTED_TASKS = list(TASK_TO_ID.keys())
59
 
60
  @st.cache
61
  def get_supported_metrics():
62
- metrics = list_metrics()
63
  supported_metrics = []
64
  for metric in tqdm(metrics):
 
 
 
65
  try:
66
  metric_func = load(metric)
67
  except Exception as e:
@@ -93,14 +96,15 @@ supported_metrics = get_supported_metrics()
93
  #######
94
  # APP #
95
  #######
96
- st.title("Evaluation as a Service")
97
  st.markdown(
98
  """
99
- Welcome to Hugging Face's Evaluation as a Service! This application allows
100
  you to evaluate πŸ€— Transformers
101
  [models](https://huggingface.co/models?library=transformers&sort=downloads)
102
- with a dataset on the Hub. Please select the dataset and configuration
103
- below. The results of your evaluation will be displayed on the [public
 
104
  leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
105
  """
106
  )
@@ -112,7 +116,12 @@ if "dataset" in query_params:
112
  if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
113
  default_dataset = query_params["dataset"][0]
114
 
115
- selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_datasets.index(default_dataset))
 
 
 
 
 
116
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
117
 
118
 
@@ -262,9 +271,10 @@ with st.expander("Advanced configuration"):
262
  col_mapping[target_col] = "target"
263
 
264
  elif selected_task == "extractive_question_answering":
265
- col_mapping = metadata[0]["col_mapping"]
266
- # Hub YAML parser converts periods to hyphens, so we remap them here
267
- col_mapping = format_col_mapping(col_mapping)
 
268
  with col1:
269
  st.markdown("`context` column")
270
  st.text("")
@@ -327,14 +337,18 @@ with st.expander("Advanced configuration"):
327
  list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
328
  )
329
  st.info(
330
- """"Note: user-selected metrics will be run with their default arguments. \
331
  Check out the [available metrics](https://huggingface.co/metrics) for more details."""
332
  )
333
 
334
  with st.form(key="form"):
335
 
336
  compatible_models = get_compatible_models(selected_task, selected_dataset)
337
- selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
 
 
 
 
338
  print("Selected models:", selected_models)
339
 
340
  if len(selected_models) > 0:
@@ -347,7 +361,7 @@ with st.form(key="form"):
347
  )
348
  print("Selected models:", selected_models)
349
 
350
- submit_button = st.form_submit_button("Make submission")
351
 
352
  if submit_button:
353
  if len(selected_models) > 0:
 
59
 
60
  @st.cache
61
  def get_supported_metrics():
62
+ metrics = [metric.id for metric in list_metrics()]
63
  supported_metrics = []
64
  for metric in tqdm(metrics):
65
+ # TODO: this currently requires all metric dependencies to be installed
66
+ # in the same environment. Refactor to avoid needing to actually load
67
+ # the metric.
68
  try:
69
  metric_func = load(metric)
70
  except Exception as e:
 
96
  #######
97
  # APP #
98
  #######
99
+ st.title("Evaluation on the Hub")
100
  st.markdown(
101
  """
102
+ Welcome to Hugging Face's automatic model evaluator! This application allows
103
  you to evaluate πŸ€— Transformers
104
  [models](https://huggingface.co/models?library=transformers&sort=downloads)
105
+ across a wide variety of datasets on the Hub -- all for free! Please select
106
+ the dataset and configuration below. The results of your evaluation will be
107
+ displayed on the [public
108
  leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
109
  """
110
  )
 
116
  if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
117
  default_dataset = query_params["dataset"][0]
118
 
119
+ selected_dataset = st.selectbox(
120
+ "Select a dataset",
121
+ all_datasets,
122
+ index=all_datasets.index(default_dataset),
123
+ help="Datasets with metadata can be evaluated with 1-click. Check out the [documentation](https://huggingface.co/docs/hub/datasets-cards) to add evaluation metadata to a dataset.",
124
+ )
125
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
126
 
127
 
 
271
  col_mapping[target_col] = "target"
272
 
273
  elif selected_task == "extractive_question_answering":
274
+ if metadata is not None:
275
+ col_mapping = metadata[0]["col_mapping"]
276
+ # Hub YAML parser converts periods to hyphens, so we remap them here
277
+ col_mapping = format_col_mapping(col_mapping)
278
  with col1:
279
  st.markdown("`context` column")
280
  st.text("")
 
337
  list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
338
  )
339
  st.info(
340
+ """Note: user-selected metrics will be run with their default arguments. \
341
  Check out the [available metrics](https://huggingface.co/metrics) for more details."""
342
  )
343
 
344
  with st.form(key="form"):
345
 
346
  compatible_models = get_compatible_models(selected_task, selected_dataset)
347
+ selected_models = st.multiselect(
348
+ "Select the models you wish to evaluate",
349
+ compatible_models,
350
+ help="Don't see your model in this list? Add the dataset and task it was trained to the [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)",
351
+ )
352
  print("Selected models:", selected_models)
353
 
354
  if len(selected_models) > 0:
 
361
  )
362
  print("Selected models:", selected_models)
363
 
364
+ submit_button = st.form_submit_button("Evaluate models")
365
 
366
  if submit_button:
367
  if len(selected_models) > 0:
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  huggingface-hub<0.8
2
  python-dotenv
3
- streamlit==1.2.0
4
  datasets<2.3
5
  evaluate<0.2
6
  # Dataset specific deps
 
1
  huggingface-hub<0.8
2
  python-dotenv
3
+ streamlit==1.10.0
4
  datasets<2.3
5
  evaluate<0.2
6
  # Dataset specific deps
utils.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Dict, Union
2
 
3
  import requests
4
- from huggingface_hub import HfApi, ModelFilter
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",
@@ -55,9 +55,9 @@ def http_get(path: str, domain: str, token: str = None, params: dict = None) ->
55
 
56
 
57
  def get_metadata(dataset_name: str) -> Union[Dict, None]:
58
- data = requests.get(f"https://huggingface.co/api/datasets/{dataset_name}").json()
59
- if data["cardData"] is not None and "train-eval-index" in data["cardData"].keys():
60
- return data["cardData"]["train-eval-index"]
61
  else:
62
  return None
63
 
 
1
  from typing import Dict, Union
2
 
3
  import requests
4
+ from huggingface_hub import HfApi, ModelFilter, dataset_info
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",
 
55
 
56
 
57
  def get_metadata(dataset_name: str) -> Union[Dict, None]:
58
+ data = dataset_info(dataset_name)
59
+ if data.cardData is not None and "train-eval-index" in data.cardData.keys():
60
+ return data.cardData["train-eval-index"]
61
  else:
62
  return None
63