Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: implement the version selector for qa retrieval only
Browse files
app.py
CHANGED
@@ -14,19 +14,21 @@ from src.display.css_html_js import custom_css
|
|
14 |
from src.envs import (
|
15 |
API,
|
16 |
EVAL_RESULTS_PATH,
|
17 |
-
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION
|
18 |
)
|
19 |
from src.loaders import (
|
20 |
load_eval_results
|
21 |
)
|
22 |
from src.utils import (
|
23 |
update_metric,
|
24 |
-
set_listeners
|
|
|
25 |
)
|
26 |
from src.display.gradio_formatting import (
|
27 |
get_version_dropdown,
|
28 |
get_search_bar,
|
29 |
get_reranking_dropdown,
|
|
|
30 |
get_metric_dropdown,
|
31 |
get_domain_dropdown,
|
32 |
get_language_dropdown,
|
@@ -110,11 +112,9 @@ with demo:
|
|
110 |
# select domain
|
111 |
with gr.Row():
|
112 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
113 |
-
# selected_domains = get_domain_dropdown(QABenchmarks["2404"])
|
114 |
# select language
|
115 |
with gr.Row():
|
116 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
117 |
-
# selected_langs = get_language_dropdown(QABenchmarks["2404"])
|
118 |
with gr.Column():
|
119 |
# select the metric
|
120 |
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
|
@@ -132,21 +132,22 @@ with demo:
|
|
132 |
with gr.Column():
|
133 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
134 |
# shown_table
|
135 |
-
|
136 |
datastore.leaderboard_df_qa, datastore.types_qa)
|
137 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
138 |
-
|
139 |
datastore.raw_df_qa, datastore.types_qa, visible=False)
|
140 |
|
141 |
selected_version.change(
|
142 |
update_datastore,
|
143 |
[selected_version,],
|
144 |
-
[selected_domains, selected_langs, selected_rerankings,
|
145 |
)
|
|
|
146 |
set_listeners(
|
147 |
"qa",
|
148 |
-
|
149 |
-
|
150 |
search_bar,
|
151 |
selected_version,
|
152 |
selected_domains,
|
@@ -168,30 +169,37 @@ with demo:
|
|
168 |
show_anonymous,
|
169 |
show_revision_and_timestamp,
|
170 |
],
|
171 |
-
|
172 |
queue=True
|
173 |
)
|
174 |
|
175 |
-
"""
|
176 |
with gr.TabItem("Retrieval Only", id=11):
|
177 |
with gr.Row():
|
178 |
with gr.Column(scale=1):
|
179 |
search_bar_retriever = get_search_bar()
|
180 |
with gr.Column(scale=1):
|
181 |
selected_noreranker = get_noreranking_dropdown()
|
182 |
-
|
|
|
183 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
184 |
-
lb_table_retriever = get_leaderboard_table(lb_df_retriever,
|
185 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
186 |
-
hidden_lb_df_retriever =
|
187 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
188 |
-
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever,
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
set_listeners(
|
191 |
"qa",
|
192 |
lb_table_retriever,
|
193 |
hidden_lb_table_retriever,
|
194 |
search_bar_retriever,
|
|
|
195 |
selected_domains,
|
196 |
selected_langs,
|
197 |
selected_noreranker,
|
@@ -210,11 +218,11 @@ with demo:
|
|
210 |
search_bar_retriever,
|
211 |
show_anonymous,
|
212 |
show_revision_and_timestamp,
|
213 |
-
selected_version,
|
214 |
],
|
215 |
lb_table_retriever,
|
216 |
queue=True
|
217 |
)
|
|
|
218 |
with gr.TabItem("Reranking Only", id=12):
|
219 |
lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
220 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
|
|
14 |
from src.envs import (
|
15 |
API,
|
16 |
EVAL_RESULTS_PATH,
|
17 |
+
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL
|
18 |
)
|
19 |
from src.loaders import (
|
20 |
load_eval_results
|
21 |
)
|
22 |
from src.utils import (
|
23 |
update_metric,
|
24 |
+
set_listeners,
|
25 |
+
reset_rank
|
26 |
)
|
27 |
from src.display.gradio_formatting import (
|
28 |
get_version_dropdown,
|
29 |
get_search_bar,
|
30 |
get_reranking_dropdown,
|
31 |
+
get_noreranking_dropdown,
|
32 |
get_metric_dropdown,
|
33 |
get_domain_dropdown,
|
34 |
get_language_dropdown,
|
|
|
112 |
# select domain
|
113 |
with gr.Row():
|
114 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
|
|
115 |
# select language
|
116 |
with gr.Row():
|
117 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
|
|
118 |
with gr.Column():
|
119 |
# select the metric
|
120 |
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
|
|
|
132 |
with gr.Column():
|
133 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
134 |
# shown_table
|
135 |
+
lb_table = get_leaderboard_table(
|
136 |
datastore.leaderboard_df_qa, datastore.types_qa)
|
137 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
138 |
+
hidden_lb_table = get_leaderboard_table(
|
139 |
datastore.raw_df_qa, datastore.types_qa, visible=False)
|
140 |
|
141 |
selected_version.change(
|
142 |
update_datastore,
|
143 |
[selected_version,],
|
144 |
+
[selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table]
|
145 |
)
|
146 |
+
|
147 |
set_listeners(
|
148 |
"qa",
|
149 |
+
lb_table,
|
150 |
+
hidden_lb_table,
|
151 |
search_bar,
|
152 |
selected_version,
|
153 |
selected_domains,
|
|
|
169 |
show_anonymous,
|
170 |
show_revision_and_timestamp,
|
171 |
],
|
172 |
+
lb_table,
|
173 |
queue=True
|
174 |
)
|
175 |
|
|
|
176 |
with gr.TabItem("Retrieval Only", id=11):
|
177 |
with gr.Row():
|
178 |
with gr.Column(scale=1):
|
179 |
search_bar_retriever = get_search_bar()
|
180 |
with gr.Column(scale=1):
|
181 |
selected_noreranker = get_noreranking_dropdown()
|
182 |
+
|
183 |
+
lb_df_retriever = datastore.leaderboard_df_qa[datastore.leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
184 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
185 |
+
lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.types_qa)
|
186 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
187 |
+
hidden_lb_df_retriever = datastore.raw_df_qa[datastore.raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
188 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
189 |
+
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, datastore.types_qa, visible=False)
|
190 |
+
|
191 |
+
selected_version.change(
|
192 |
+
update_datastore,
|
193 |
+
[selected_version,],
|
194 |
+
[selected_domains, selected_langs, selected_rerankings, lb_table_retriever, hidden_lb_table_retriever]
|
195 |
+
)
|
196 |
|
197 |
set_listeners(
|
198 |
"qa",
|
199 |
lb_table_retriever,
|
200 |
hidden_lb_table_retriever,
|
201 |
search_bar_retriever,
|
202 |
+
selected_version,
|
203 |
selected_domains,
|
204 |
selected_langs,
|
205 |
selected_noreranker,
|
|
|
218 |
search_bar_retriever,
|
219 |
show_anonymous,
|
220 |
show_revision_and_timestamp,
|
|
|
221 |
],
|
222 |
lb_table_retriever,
|
223 |
queue=True
|
224 |
)
|
225 |
+
"""
|
226 |
with gr.TabItem("Reranking Only", id=12):
|
227 |
lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
228 |
lb_df_reranker = reset_rank(lb_df_reranker)
|