Spaces:

vidore
/

vidore-leaderboard

Running

App Files Files Community

add-support-for-new-vidore-result-format

by tonywu71 - opened Nov 8, 2024

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+47

-27

Files changed (3) hide show

app.py +17 -10
data/model_handler.py +23 -17
ruff.toml +7 -0

app.py CHANGED Viewed

@@ -5,10 +5,10 @@ from data.model_handler import ModelHandler
 METRICS = ["ndcg_at_5", "recall_at_1"]
-def main():
     model_handler = ModelHandler()
     initial_metric = "ndcg_at_5"
     data = model_handler.get_vidore_data(initial_metric)
     data = add_rank_and_format(data)
@@ -48,7 +48,7 @@ def main():
                 gr.Markdown(
                     """
                 Visual Document Retrieval Benchmark leaderboard. To submit results, refer to the corresponding tab.
                 Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
                 """
                 )
@@ -125,9 +125,10 @@ def main():
                     1. **Evaluate your model**:
                        - Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/)
                     2. **Format your submission file**:
-                        - The submission file should automatically be generated, and named `results.json` with the following structure:
                         ```json
                         {
                             "dataset_name_1": {
@@ -142,13 +143,19 @@ def main():
                             },
                         }
                         ```
-                        - The dataset names should be the same as the ViDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d).
                     3. **Submit your model**:
                         - Create a public HuggingFace model repository with your model.
-                        - Add the tag `vidore` to your model in the metadata of the model card and place the `results.json` file at the root.
-                    And you're done! Your model will appear on the leaderboard when you click refresh! Once the space gets rebooted, it will appear on startup.
                     """
                 )

 METRICS = ["ndcg_at_5", "recall_at_1"]
+def main():
     model_handler = ModelHandler()
     initial_metric = "ndcg_at_5"
     data = model_handler.get_vidore_data(initial_metric)
     data = add_rank_and_format(data)
                 gr.Markdown(
                     """
                 Visual Document Retrieval Benchmark leaderboard. To submit results, refer to the corresponding tab.
                 Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models.
                 """
                 )
                     1. **Evaluate your model**:
                        - Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/)
                     2. **Format your submission file**:
+                        - The submission file should automatically be generated, and named `results.json` with the
+                        following structure:
                         ```json
                         {
                             "dataset_name_1": {
                             },
                         }
                         ```
+                        - The dataset names should be the same as the ViDoRe dataset names listed in the following
+                        collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d).
                     3. **Submit your model**:
                         - Create a public HuggingFace model repository with your model.
+                        - Add the tag `vidore` to your model in the metadata of the model card and place the
+                        `results.json` file at the root.
+                    And you're done! Your model will appear on the leaderboard when you click refresh! Once the space
+                    gets rebooted, it will appear on startup.
+                    Note: For proper hyperlink redirection, please ensure that your model repository name is in
+                    kebab-case, e.g. `my-model-name`.
                     """
                 )

data/model_handler.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import json
 import os
-from typing import Dict
-from huggingface_hub import HfApi, hf_hub_download, metadata_load
 import pandas as pd
-from .dataset_handler import get_datasets_nickname, VIDORE_DATASETS_KEYWORDS
 BLOCKLIST = ["impactframes"]
 class ModelHandler:
     def __init__(self, model_infos_path="model_infos.json"):
         self.api = HfApi()
@@ -23,26 +26,28 @@ class ModelHandler:
         with open(self.model_infos_path, "w") as f:
             json.dump(self.model_infos, f)
     def get_vidore_data(self, metric="ndcg_at_5"):
         models = self.api.list_models(filter="vidore")
         repositories = [model.modelId for model in models]  # type: ignore
         for repo_id in repositories:
-            org_name = repo_id.split('/')[0]
             if org_name in BLOCKLIST:
                 continue
-            files = [f for f in self.api.list_repo_files(repo_id) if f.endswith('_metrics.json') or f == 'results.json']
             if len(files) == 0:
                 continue
             else:
                 for file in files:
-                    if file.endswith('results.json'):
-                        model_name = repo_id.replace('/', '_')
                     else:
-                        model_name = file.split('_metrics.json')[0]
                     if model_name not in self.model_infos:
                         readme_path = hf_hub_download(repo_id, filename="README.md")
@@ -53,15 +58,16 @@ class ModelHandler:
                             with open(result_path) as f:
                                 results = json.load(f)
-                            for dataset in results:
-                                results[dataset] = {key: value for key, value in results[dataset].items()}
                             self.model_infos[model_name] = {"meta": meta, "results": results}
                         except Exception as e:
                             print(f"Error loading {model_name} - {e}")
                             continue
-        #self._save_model_infos()
         model_res = {}
         if len(self.model_infos) > 0:
@@ -69,7 +75,7 @@ class ModelHandler:
                 res = self.model_infos[model]["results"]
                 dataset_res = {}
                 for dataset in res.keys():
-                    #for each keyword check if it is in the dataset name if not continue
                     if not any(keyword in dataset for keyword in VIDORE_DATASETS_KEYWORDS):
                         print(f"{dataset} not found in ViDoRe datasets. Skipping ...")
                         continue
@@ -77,9 +83,9 @@ class ModelHandler:
                     dataset_nickname = get_datasets_nickname(dataset)
                     dataset_res[dataset_nickname] = res[dataset][metric]
                 model_res[model] = dataset_res
             df = pd.DataFrame(model_res).T
             return df
         return pd.DataFrame()
@@ -104,7 +110,7 @@ class ModelHandler:
             df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
             df.sort_values("Average", ascending=False, inplace=True)
         df.insert(0, "Rank", list(range(1, len(df) + 1)))
-        #multiply values by 100 if they are floats and round to 1 decimal place
         for col in df.columns:
             if df[col].dtype == "float64":
                 df[col] = df[col].apply(lambda x: round(x * 100, 1))

 import json
 import os
+from typing import Any, Dict
 import pandas as pd
+from huggingface_hub import HfApi, hf_hub_download, metadata_load
+from .dataset_handler import VIDORE_DATASETS_KEYWORDS, get_datasets_nickname
 BLOCKLIST = ["impactframes"]
 class ModelHandler:
     def __init__(self, model_infos_path="model_infos.json"):
         self.api = HfApi()
         with open(self.model_infos_path, "w") as f:
             json.dump(self.model_infos, f)
+    def _are_results_in_new_vidore_format(self, results: Dict[str, Any]) -> bool:
+        return "metadata" in results and "metrics" in results
     def get_vidore_data(self, metric="ndcg_at_5"):
         models = self.api.list_models(filter="vidore")
         repositories = [model.modelId for model in models]  # type: ignore
         for repo_id in repositories:
+            org_name = repo_id.split("/")[0]
             if org_name in BLOCKLIST:
                 continue
+            files = [f for f in self.api.list_repo_files(repo_id) if f.endswith("_metrics.json") or f == "results.json"]
             if len(files) == 0:
                 continue
             else:
                 for file in files:
+                    if file.endswith("results.json"):
+                        model_name = repo_id.replace("/", "_")
                     else:
+                        model_name = file.split("_metrics.json")[0]
                     if model_name not in self.model_infos:
                         readme_path = hf_hub_download(repo_id, filename="README.md")
                             with open(result_path) as f:
                                 results = json.load(f)
+                            if self._are_results_in_new_vidore_format(results):
+                                metadata = results["metadata"]
+                                results = results["metrics"]
                             self.model_infos[model_name] = {"meta": meta, "results": results}
                         except Exception as e:
                             print(f"Error loading {model_name} - {e}")
                             continue
+        # self._save_model_infos()
         model_res = {}
         if len(self.model_infos) > 0:
                 res = self.model_infos[model]["results"]
                 dataset_res = {}
                 for dataset in res.keys():
+                    # for each keyword check if it is in the dataset name if not continue
                     if not any(keyword in dataset for keyword in VIDORE_DATASETS_KEYWORDS):
                         print(f"{dataset} not found in ViDoRe datasets. Skipping ...")
                         continue
                     dataset_nickname = get_datasets_nickname(dataset)
                     dataset_res[dataset_nickname] = res[dataset][metric]
                 model_res[model] = dataset_res
             df = pd.DataFrame(model_res).T
             return df
         return pd.DataFrame()
             df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
             df.sort_values("Average", ascending=False, inplace=True)
         df.insert(0, "Rank", list(range(1, len(df) + 1)))
+        # multiply values by 100 if they are floats and round to 1 decimal place
         for col in df.columns:
             if df[col].dtype == "float64":
                 df[col] = df[col].apply(lambda x: round(x * 100, 1))

ruff.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+line-length = 120
+[lint]
+select = ["E", "F", "W", "I", "N"]
+[lint.per-file-ignores]
+"__init__.py" = ["F401"]