Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat-add-reranker-tab-0607 (#21)
Browse files- feat: add the ranking only tab for qa (ca1267e31152cb65545c46a9c5d52d015bb8f1f9)
- feat: add the ranking only tab for long-doc (de0db296fa5eb71e078501722b44ef22b61f7e26)
- feat: fix the reranker list (6d20d9786aaf9786d84f0729c935438a0e9ec1fa)
- chore: clean up (118f4b013300f1bb97c990755dee3d7339cbb240)
app.py
CHANGED
@@ -11,10 +11,10 @@ from src.about import (
|
|
11 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
|
12 |
DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
|
13 |
from src.display.css_html_js import custom_css
|
14 |
-
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
-
from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
|
18 |
from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, \
|
19 |
get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table, get_noreranking_dropdown
|
20 |
from src.display.gradio_listener import set_listeners
|
@@ -108,9 +108,8 @@ with demo:
|
|
108 |
show_anonymous = get_anonymous_checkbox()
|
109 |
with gr.Row():
|
110 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
111 |
-
|
112 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
113 |
-
with gr.TabItem("
|
114 |
with gr.Row():
|
115 |
# search retrieval models
|
116 |
with gr.Column():
|
@@ -149,17 +148,19 @@ with demo:
|
|
149 |
leaderboard_table,
|
150 |
queue=True
|
151 |
)
|
152 |
-
with gr.TabItem("
|
153 |
-
with gr.
|
154 |
-
|
155 |
-
|
|
|
|
|
156 |
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
157 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
158 |
-
hidden_lb_db_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
159 |
-
hidden_lb_db_retriever = reset_rank(hidden_lb_db_retriever)
|
160 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
161 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
162 |
-
|
|
|
|
|
163 |
|
164 |
set_listeners(
|
165 |
"qa",
|
@@ -188,7 +189,48 @@ with demo:
|
|
188 |
lb_table_retriever,
|
189 |
queue=True
|
190 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
193 |
with gr.Row():
|
194 |
with gr.Column(min_width=320):
|
@@ -211,7 +253,7 @@ with demo:
|
|
211 |
with gr.Row():
|
212 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
213 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
214 |
-
with gr.TabItem("
|
215 |
with gr.Row():
|
216 |
with gr.Column():
|
217 |
search_bar = get_search_bar()
|
@@ -255,11 +297,12 @@ with demo:
|
|
255 |
lb_table,
|
256 |
queue=True
|
257 |
)
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
263 |
lb_df_retriever_long_doc = leaderboard_df_long_doc[
|
264 |
leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
265 |
]
|
@@ -300,6 +343,49 @@ with demo:
|
|
300 |
lb_table_retriever_long_doc,
|
301 |
queue=True
|
302 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
|
305 |
with gr.Column():
|
|
|
11 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
|
12 |
DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
|
13 |
from src.display.css_html_js import custom_css
|
14 |
+
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
+
from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank, remove_html
|
18 |
from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, \
|
19 |
get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table, get_noreranking_dropdown
|
20 |
from src.display.gradio_listener import set_listeners
|
|
|
108 |
show_anonymous = get_anonymous_checkbox()
|
109 |
with gr.Row():
|
110 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
|
|
111 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
112 |
+
with gr.TabItem("Retrieval + Reranking", id=10):
|
113 |
with gr.Row():
|
114 |
# search retrieval models
|
115 |
with gr.Column():
|
|
|
148 |
leaderboard_table,
|
149 |
queue=True
|
150 |
)
|
151 |
+
with gr.TabItem("Retrieval Only", id=11):
|
152 |
+
with gr.Row():
|
153 |
+
with gr.Column(scale=1):
|
154 |
+
search_bar_retriever = get_search_bar()
|
155 |
+
with gr.Column(scale=1):
|
156 |
+
selected_noreranker = get_noreranking_dropdown()
|
157 |
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
158 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
|
|
|
|
159 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
160 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
161 |
+
hidden_lb_df_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
162 |
+
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
163 |
+
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
|
164 |
|
165 |
set_listeners(
|
166 |
"qa",
|
|
|
189 |
lb_table_retriever,
|
190 |
queue=True
|
191 |
)
|
192 |
+
with gr.TabItem("Reranking Only", id=12):
|
193 |
+
lb_df_reranker = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
194 |
+
lb_df_reranker = reset_rank(lb_df_reranker)
|
195 |
+
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
196 |
+
with gr.Row():
|
197 |
+
with gr.Column(scale=1):
|
198 |
+
selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
|
199 |
+
with gr.Column(scale=1):
|
200 |
+
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
201 |
+
lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
|
202 |
+
hidden_lb_df_reranker = original_df_qa[original_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
203 |
+
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
204 |
+
hidden_lb_table_reranker = get_leaderboard_table(
|
205 |
+
hidden_lb_df_reranker, types_qa, visible=False
|
206 |
+
)
|
207 |
|
208 |
+
set_listeners(
|
209 |
+
"qa",
|
210 |
+
lb_table_reranker,
|
211 |
+
hidden_lb_table_reranker,
|
212 |
+
search_bar_reranker,
|
213 |
+
selected_domains,
|
214 |
+
selected_langs,
|
215 |
+
selected_rerankings_reranker,
|
216 |
+
show_anonymous,
|
217 |
+
show_revision_and_timestamp,
|
218 |
+
)
|
219 |
+
# set metric listener
|
220 |
+
selected_metric.change(
|
221 |
+
update_metric_qa,
|
222 |
+
[
|
223 |
+
selected_metric,
|
224 |
+
selected_domains,
|
225 |
+
selected_langs,
|
226 |
+
selected_rerankings_reranker,
|
227 |
+
search_bar_reranker,
|
228 |
+
show_anonymous,
|
229 |
+
show_revision_and_timestamp,
|
230 |
+
],
|
231 |
+
lb_table_reranker,
|
232 |
+
queue=True
|
233 |
+
)
|
234 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
235 |
with gr.Row():
|
236 |
with gr.Column(min_width=320):
|
|
|
253 |
with gr.Row():
|
254 |
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
255 |
with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
|
256 |
+
with gr.TabItem("Retrieval + Reranking", id=20):
|
257 |
with gr.Row():
|
258 |
with gr.Column():
|
259 |
search_bar = get_search_bar()
|
|
|
297 |
lb_table,
|
298 |
queue=True
|
299 |
)
|
300 |
+
with gr.TabItem("Retrieval Only", id=21):
|
301 |
+
with gr.Row():
|
302 |
+
with gr.Column(scale=1):
|
303 |
+
search_bar_retriever = get_search_bar()
|
304 |
+
with gr.Column(scale=1):
|
305 |
+
selected_noreranker = get_noreranking_dropdown()
|
306 |
lb_df_retriever_long_doc = leaderboard_df_long_doc[
|
307 |
leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
308 |
]
|
|
|
343 |
lb_table_retriever_long_doc,
|
344 |
queue=True
|
345 |
)
|
346 |
+
with gr.TabItem("Reranking Only", id=22):
|
347 |
+
lb_df_reranker_ldoc = leaderboard_df_long_doc[
|
348 |
+
leaderboard_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == "BM25"
|
349 |
+
]
|
350 |
+
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
351 |
+
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
352 |
+
with gr.Row():
|
353 |
+
with gr.Column(scale=1):
|
354 |
+
selected_rerankings_reranker_ldoc = get_reranking_dropdown(reranking_models_reranker_ldoc)
|
355 |
+
with gr.Column(scale=1):
|
356 |
+
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
357 |
+
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, types_long_doc)
|
358 |
+
hidden_lb_df_reranker_ldoc = original_df_long_doc[original_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
|
359 |
+
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
360 |
+
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
361 |
+
hidden_lb_df_reranker_ldoc, types_long_doc, visible=False
|
362 |
+
)
|
363 |
+
|
364 |
+
set_listeners(
|
365 |
+
"long-doc",
|
366 |
+
lb_table_reranker_ldoc,
|
367 |
+
hidden_lb_table_reranker_ldoc,
|
368 |
+
search_bar_reranker_ldoc,
|
369 |
+
selected_domains,
|
370 |
+
selected_langs,
|
371 |
+
selected_rerankings_reranker_ldoc,
|
372 |
+
show_anonymous,
|
373 |
+
show_revision_and_timestamp,
|
374 |
+
)
|
375 |
+
selected_metric.change(
|
376 |
+
update_metric_long_doc,
|
377 |
+
[
|
378 |
+
selected_metric,
|
379 |
+
selected_domains,
|
380 |
+
selected_langs,
|
381 |
+
selected_rerankings_reranker_ldoc,
|
382 |
+
search_bar_reranker_ldoc,
|
383 |
+
show_anonymous,
|
384 |
+
show_revision_and_timestamp,
|
385 |
+
],
|
386 |
+
lb_table_reranker_ldoc,
|
387 |
+
queue=True
|
388 |
+
)
|
389 |
|
390 |
with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
|
391 |
with gr.Column():
|