Slep commited on
Commit
0cc0a55
1 Parent(s): 51b7bd7

Initial commit.

Browse files
Files changed (5) hide show
  1. .gitignore +162 -0
  2. README.md +0 -2
  3. app.py +195 -0
  4. utils/__init__.py +0 -0
  5. utils/misc.py +16 -0
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dev/
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
README.md CHANGED
@@ -12,5 +12,3 @@ tags:
12
  - leaderboard
13
  fullWidth: true
14
  ---
15
-
16
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
12
  - leaderboard
13
  fullWidth: true
14
  ---
 
 
app.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ from collections import OrderedDict
4
+ import logging
5
+ import tempfile
6
+ import os
7
+ from huggingface_hub import (
8
+ HfApi,
9
+ hf_hub_download,
10
+ get_safetensors_metadata,
11
+ metadata_load,
12
+ )
13
+
14
+ from utils.misc import human_format, make_clickable_model
15
+
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ EXCLUDED_MODELS = [] # For models that misbehave :)
21
+
22
+ K_EVALUATIONS = [1, 5, 10, 20, 50]
23
+ DIST_EVALUATIONS = [10_000, 100_000, 500_000, 1_000_000]
24
+
25
+ EXPECTED_KEY_TO_COLNAME = OrderedDict(
26
+ [
27
+ ("rank", "Rank"), # Just for columns order
28
+ ("model", "Model"), # Just for columns order
29
+ ("model_size", "Model Size (Million)"), # Just for columns order
30
+ ("embedding_dim", "Embedding Dimension"),
31
+ ]
32
+ + [
33
+ (f"recall_at_{K}|{D}", f"R@{K} +{human_format(D)} Dist.")
34
+ for D in DIST_EVALUATIONS[::-1]
35
+ for K in K_EVALUATIONS
36
+ ]
37
+ + [
38
+ ("n_dists", "Available Dists"),
39
+ ],
40
+ )
41
+
42
+
43
+ def get_safetensors_nparams(modelId):
44
+ try:
45
+ safetensors = get_safetensors_metadata(modelId)
46
+ num_parameters = sum(safetensors.parameter_count.values())
47
+ return round(num_parameters / 1e6)
48
+ except Exception:
49
+ pass
50
+
51
+
52
+ def parse_model(m):
53
+ readme_path = hf_hub_download(m.modelId, filename="README.md")
54
+ meta = metadata_load(readme_path)
55
+
56
+ if "model-index" not in meta:
57
+ raise ValueError("Missing `model-index` in metadata")
58
+
59
+ for result in meta["model-index"][0]["results"]:
60
+ if result["dataset"]["type"] == "Slep/LAION-RVS-Fashion":
61
+ break # Found the right dataset
62
+
63
+ # Get data from model-index / safetensors metadata
64
+ d = {
65
+ EXPECTED_KEY_TO_COLNAME["model"]: make_clickable_model(m.modelId),
66
+ EXPECTED_KEY_TO_COLNAME["model_size"]: get_safetensors_nparams(m.modelId),
67
+ }
68
+
69
+ # Get data from exported results
70
+ for metric in result["metrics"]:
71
+ t = metric["type"]
72
+
73
+ if t in EXPECTED_KEY_TO_COLNAME:
74
+ d[EXPECTED_KEY_TO_COLNAME[t]] = metric["value"]
75
+
76
+ return d
77
+
78
+
79
+ def get_data_from_hub():
80
+ api = HfApi()
81
+ models = api.list_models(filter="lrvsf-benchmark")
82
+
83
+ df_list = []
84
+ for m in models:
85
+ if m.modelId in EXCLUDED_MODELS:
86
+ continue
87
+
88
+ try:
89
+ parsed = parse_model(m)
90
+ if parsed:
91
+ df_list.append(parsed)
92
+ except Exception as e:
93
+ logging.warning(f"Failed to parse model {m.modelId} : {e}")
94
+
95
+ return pd.DataFrame(df_list, columns=EXPECTED_KEY_TO_COLNAME.values())
96
+
97
+
98
+ def filter_columns(df, k_filter, d_filter):
99
+ # Fixed column positions
100
+ selected_columns = [
101
+ EXPECTED_KEY_TO_COLNAME["rank"],
102
+ EXPECTED_KEY_TO_COLNAME["model"],
103
+ EXPECTED_KEY_TO_COLNAME["model_size"],
104
+ EXPECTED_KEY_TO_COLNAME["embedding_dim"],
105
+ ]
106
+
107
+ datatypes = ["number", "markdown", "number", "number"]
108
+
109
+ for key, name in EXPECTED_KEY_TO_COLNAME.items():
110
+ if name in selected_columns:
111
+ # Already added, probably part of the initial columns
112
+ continue
113
+
114
+ if key.startswith("recall_at_"):
115
+ # Process : recall_at_K|D -> recall_at_K , D -> K , D
116
+ # Could be a regex... but simple enough
117
+ recall_at_K, D = key.split("|")
118
+ K = recall_at_K.split("_")[-1]
119
+
120
+ if int(K) in k_filter and int(D) in d_filter:
121
+ selected_columns.append(name)
122
+ datatypes.append("str") # Because of the ± std
123
+
124
+ selected_columns.append(EXPECTED_KEY_TO_COLNAME["n_dists"])
125
+ datatypes.append("number")
126
+
127
+ return df[selected_columns], datatypes
128
+
129
+
130
+ def add_rank(df):
131
+ main_metrics = df["R@1 +1M Dist."].str.split("±").str[0].astype(float)
132
+ df["Rank"] = main_metrics.argsort() + 1
133
+ return df
134
+
135
+
136
+ def save_current_leaderboard(df):
137
+ filename = tempfile.NamedTemporaryFile(
138
+ prefix="lrvsf_export_", suffix=".csv", delete=False
139
+ ).name
140
+ df.to_csv(filename, index=False)
141
+ return filename
142
+
143
+
144
+ def load_lrvsf_models(k_filter, d_filter, csv_file):
145
+ # Remove previous tmpfile
146
+ if csv_file:
147
+ os.remove(csv_file)
148
+
149
+ df = get_data_from_hub()
150
+ df = add_rank(df)
151
+ df, datatypes = filter_columns(df, k_filter, d_filter)
152
+ filename = save_current_leaderboard(df)
153
+
154
+ outputs = [
155
+ gr.DataFrame(value=df, datatype=datatypes),
156
+ gr.File(filename, label="CSV File"),
157
+ ]
158
+
159
+ return outputs
160
+
161
+
162
+ if __name__ == "__main__":
163
+ with gr.Blocks() as demo:
164
+ gr.Markdown(
165
+ """
166
+ # LAION - Referred Visual Search - Fashion : Leaderboard
167
+ """
168
+ )
169
+ with gr.Row():
170
+ k_filter = gr.CheckboxGroup(
171
+ choices=K_EVALUATIONS, value=K_EVALUATIONS, label="Recall at K"
172
+ )
173
+ d_filter = gr.CheckboxGroup(
174
+ choices=[(human_format(D), D) for D in DIST_EVALUATIONS],
175
+ value=DIST_EVALUATIONS,
176
+ label="Number of Distractors",
177
+ )
178
+
179
+ df_table = gr.Dataframe(type="pandas", interactive=False)
180
+ csv_file = gr.File(interactive=False)
181
+ refresh = gr.Button("Refresh")
182
+
183
+ # Actions
184
+ refresh.click(
185
+ load_lrvsf_models,
186
+ inputs=[k_filter, d_filter, csv_file],
187
+ outputs=[df_table, csv_file],
188
+ )
189
+ demo.load(
190
+ load_lrvsf_models,
191
+ inputs=[k_filter, d_filter, csv_file],
192
+ outputs=[df_table, csv_file],
193
+ )
194
+
195
+ demo.launch()
utils/__init__.py ADDED
File without changes
utils/misc.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def human_format(num):
2
+ num = float("{:.3g}".format(num))
3
+ magnitude = 0
4
+ while abs(num) >= 1000:
5
+ magnitude += 1
6
+ num /= 1000.0
7
+ return "{}{}".format(
8
+ "{:f}".format(num).rstrip("0").rstrip("."), ["", "K", "M", "B", "T"][magnitude]
9
+ )
10
+
11
+
12
+ def make_clickable_model(model_name, link=None):
13
+ if link is None:
14
+ link = "https://huggingface.co/" + model_name
15
+ # Remove user from model name
16
+ return f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name.split("/")[-1]}</a>'