tonywu71 commited on
Commit
9331159
1 Parent(s): 0858809

add-support-for-new-vidore-result-format (#2)

Browse files

- build: add ruff.toml (658164b5e13ea5896bc104f0a2b23482d76a34ee)
- style: apply linter (66d537f2fc87156f49bf7da027c2967ac949de1e)
- feat: add support for new vidore output format (1f00e6d583ed19b120cbbefc8a6892f373004f92)
- fix: replace `vidore_benchmark_hash` with `vidore_benchmark_version` (f0be09062a4b4bdc9e710a0c2f641e45cb315a29)
- refactor: simplify handling of the new vidore result format (bc3b1444cf4f210ff356cf60af8776c54307d08c)
- docs: add note in model submission markdown about repo name casing (6db62ef21c97fbf3221489548d9dba20357deb0e)

Files changed (3) hide show
  1. app.py +17 -10
  2. data/model_handler.py +23 -17
  3. ruff.toml +7 -0
app.py CHANGED
@@ -5,10 +5,10 @@ from data.model_handler import ModelHandler
5
 
6
  METRICS = ["ndcg_at_5", "recall_at_1"]
7
 
8
- def main():
9
  model_handler = ModelHandler()
10
  initial_metric = "ndcg_at_5"
11
-
12
  data = model_handler.get_vidore_data(initial_metric)
13
  data = add_rank_and_format(data)
14
 
@@ -48,7 +48,7 @@ def main():
48
  gr.Markdown(
49
  """
50
  Visual Document Retrieval Benchmark leaderboard. To submit results, refer to the corresponding tab.
51
-
52
  Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
53
  """
54
  )
@@ -125,9 +125,10 @@ def main():
125
 
126
  1. **Evaluate your model**:
127
  - Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/)
128
-
129
  2. **Format your submission file**:
130
- - The submission file should automatically be generated, and named `results.json` with the following structure:
 
131
  ```json
132
  {
133
  "dataset_name_1": {
@@ -142,13 +143,19 @@ def main():
142
  },
143
  }
144
  ```
145
- - The dataset names should be the same as the ViDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d).
146
-
 
147
  3. **Submit your model**:
148
  - Create a public HuggingFace model repository with your model.
149
- - Add the tag `vidore` to your model in the metadata of the model card and place the `results.json` file at the root.
150
-
151
- And you're done! Your model will appear on the leaderboard when you click refresh! Once the space gets rebooted, it will appear on startup.
 
 
 
 
 
152
  """
153
  )
154
 
 
5
 
6
  METRICS = ["ndcg_at_5", "recall_at_1"]
7
 
8
+ def main():
9
  model_handler = ModelHandler()
10
  initial_metric = "ndcg_at_5"
11
+
12
  data = model_handler.get_vidore_data(initial_metric)
13
  data = add_rank_and_format(data)
14
 
 
48
  gr.Markdown(
49
  """
50
  Visual Document Retrieval Benchmark leaderboard. To submit results, refer to the corresponding tab.
51
+
52
  Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
53
  """
54
  )
 
125
 
126
  1. **Evaluate your model**:
127
  - Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/)
128
+
129
  2. **Format your submission file**:
130
+ - The submission file should automatically be generated, and named `results.json` with the
131
+ following structure:
132
  ```json
133
  {
134
  "dataset_name_1": {
 
143
  },
144
  }
145
  ```
146
+ - The dataset names should be the same as the ViDoRe dataset names listed in the following
147
+ collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d).
148
+
149
  3. **Submit your model**:
150
  - Create a public HuggingFace model repository with your model.
151
+ - Add the tag `vidore` to your model in the metadata of the model card and place the
152
+ `results.json` file at the root.
153
+
154
+ And you're done! Your model will appear on the leaderboard when you click refresh! Once the space
155
+ gets rebooted, it will appear on startup.
156
+
157
+ Note: For proper hyperlink redirection, please ensure that your model repository name is in
158
+ kebab-case, e.g. `my-model-name`.
159
  """
160
  )
161
 
data/model_handler.py CHANGED
@@ -1,12 +1,15 @@
1
  import json
2
  import os
3
- from typing import Dict
4
- from huggingface_hub import HfApi, hf_hub_download, metadata_load
5
  import pandas as pd
6
- from .dataset_handler import get_datasets_nickname, VIDORE_DATASETS_KEYWORDS
 
 
7
 
8
  BLOCKLIST = ["impactframes"]
9
 
 
10
  class ModelHandler:
11
  def __init__(self, model_infos_path="model_infos.json"):
12
  self.api = HfApi()
@@ -23,26 +26,28 @@ class ModelHandler:
23
  with open(self.model_infos_path, "w") as f:
24
  json.dump(self.model_infos, f)
25
 
 
 
 
26
  def get_vidore_data(self, metric="ndcg_at_5"):
27
  models = self.api.list_models(filter="vidore")
28
  repositories = [model.modelId for model in models] # type: ignore
29
 
30
  for repo_id in repositories:
31
- org_name = repo_id.split('/')[0]
32
  if org_name in BLOCKLIST:
33
  continue
34
-
35
- files = [f for f in self.api.list_repo_files(repo_id) if f.endswith('_metrics.json') or f == 'results.json']
36
 
37
-
 
38
  if len(files) == 0:
39
  continue
40
  else:
41
  for file in files:
42
- if file.endswith('results.json'):
43
- model_name = repo_id.replace('/', '_')
44
  else:
45
- model_name = file.split('_metrics.json')[0]
46
 
47
  if model_name not in self.model_infos:
48
  readme_path = hf_hub_download(repo_id, filename="README.md")
@@ -53,15 +58,16 @@ class ModelHandler:
53
  with open(result_path) as f:
54
  results = json.load(f)
55
 
56
- for dataset in results:
57
- results[dataset] = {key: value for key, value in results[dataset].items()}
 
58
 
59
  self.model_infos[model_name] = {"meta": meta, "results": results}
60
  except Exception as e:
61
  print(f"Error loading {model_name} - {e}")
62
  continue
63
 
64
- #self._save_model_infos()
65
 
66
  model_res = {}
67
  if len(self.model_infos) > 0:
@@ -69,7 +75,7 @@ class ModelHandler:
69
  res = self.model_infos[model]["results"]
70
  dataset_res = {}
71
  for dataset in res.keys():
72
- #for each keyword check if it is in the dataset name if not continue
73
  if not any(keyword in dataset for keyword in VIDORE_DATASETS_KEYWORDS):
74
  print(f"{dataset} not found in ViDoRe datasets. Skipping ...")
75
  continue
@@ -77,9 +83,9 @@ class ModelHandler:
77
  dataset_nickname = get_datasets_nickname(dataset)
78
  dataset_res[dataset_nickname] = res[dataset][metric]
79
  model_res[model] = dataset_res
80
-
81
  df = pd.DataFrame(model_res).T
82
-
83
  return df
84
  return pd.DataFrame()
85
 
@@ -104,7 +110,7 @@ class ModelHandler:
104
  df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
105
  df.sort_values("Average", ascending=False, inplace=True)
106
  df.insert(0, "Rank", list(range(1, len(df) + 1)))
107
- #multiply values by 100 if they are floats and round to 1 decimal place
108
  for col in df.columns:
109
  if df[col].dtype == "float64":
110
  df[col] = df[col].apply(lambda x: round(x * 100, 1))
 
1
  import json
2
  import os
3
+ from typing import Any, Dict
4
+
5
  import pandas as pd
6
+ from huggingface_hub import HfApi, hf_hub_download, metadata_load
7
+
8
+ from .dataset_handler import VIDORE_DATASETS_KEYWORDS, get_datasets_nickname
9
 
10
  BLOCKLIST = ["impactframes"]
11
 
12
+
13
  class ModelHandler:
14
  def __init__(self, model_infos_path="model_infos.json"):
15
  self.api = HfApi()
 
26
  with open(self.model_infos_path, "w") as f:
27
  json.dump(self.model_infos, f)
28
 
29
+ def _are_results_in_new_vidore_format(self, results: Dict[str, Any]) -> bool:
30
+ return "metadata" in results and "metrics" in results
31
+
32
  def get_vidore_data(self, metric="ndcg_at_5"):
33
  models = self.api.list_models(filter="vidore")
34
  repositories = [model.modelId for model in models] # type: ignore
35
 
36
  for repo_id in repositories:
37
+ org_name = repo_id.split("/")[0]
38
  if org_name in BLOCKLIST:
39
  continue
 
 
40
 
41
+ files = [f for f in self.api.list_repo_files(repo_id) if f.endswith("_metrics.json") or f == "results.json"]
42
+
43
  if len(files) == 0:
44
  continue
45
  else:
46
  for file in files:
47
+ if file.endswith("results.json"):
48
+ model_name = repo_id.replace("/", "_")
49
  else:
50
+ model_name = file.split("_metrics.json")[0]
51
 
52
  if model_name not in self.model_infos:
53
  readme_path = hf_hub_download(repo_id, filename="README.md")
 
58
  with open(result_path) as f:
59
  results = json.load(f)
60
 
61
+ if self._are_results_in_new_vidore_format(results):
62
+ metadata = results["metadata"]
63
+ results = results["metrics"]
64
 
65
  self.model_infos[model_name] = {"meta": meta, "results": results}
66
  except Exception as e:
67
  print(f"Error loading {model_name} - {e}")
68
  continue
69
 
70
+ # self._save_model_infos()
71
 
72
  model_res = {}
73
  if len(self.model_infos) > 0:
 
75
  res = self.model_infos[model]["results"]
76
  dataset_res = {}
77
  for dataset in res.keys():
78
+ # for each keyword check if it is in the dataset name if not continue
79
  if not any(keyword in dataset for keyword in VIDORE_DATASETS_KEYWORDS):
80
  print(f"{dataset} not found in ViDoRe datasets. Skipping ...")
81
  continue
 
83
  dataset_nickname = get_datasets_nickname(dataset)
84
  dataset_res[dataset_nickname] = res[dataset][metric]
85
  model_res[model] = dataset_res
86
+
87
  df = pd.DataFrame(model_res).T
88
+
89
  return df
90
  return pd.DataFrame()
91
 
 
110
  df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
111
  df.sort_values("Average", ascending=False, inplace=True)
112
  df.insert(0, "Rank", list(range(1, len(df) + 1)))
113
+ # multiply values by 100 if they are floats and round to 1 decimal place
114
  for col in df.columns:
115
  if df[col].dtype == "float64":
116
  df[col] = df[col].apply(lambda x: round(x * 100, 1))
ruff.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ line-length = 120
2
+
3
+ [lint]
4
+ select = ["E", "F", "W", "I", "N"]
5
+
6
+ [lint.per-file-ignores]
7
+ "__init__.py" = ["F401"]