Spaces:
Running
Running
Added selector of p_value
Browse files
app.py
CHANGED
@@ -133,6 +133,8 @@ def process_submission(*inputs):
|
|
133 |
submit_prompt = gr.update(visible=True)
|
134 |
submission_btn_yes = gr.update(interactive=True, visible=True)
|
135 |
|
|
|
|
|
136 |
pre_submit_leaderboard_table = gr.update(
|
137 |
value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS),
|
138 |
visible=True,
|
@@ -254,25 +256,29 @@ def fetch_model_detail(submission_id):
|
|
254 |
gr.update(value=metadata['link_to_model'], visible=True)
|
255 |
)
|
256 |
|
257 |
-
def fetch_model_tournament_results_table(submission_id, category):
|
|
|
|
|
258 |
if submission_id == None or category == None:
|
259 |
return gr.update(
|
260 |
visible=False,
|
261 |
)
|
262 |
else:
|
263 |
return gr.update(
|
264 |
-
value=leaderboard_server.get_model_tournament_table(submission_id, category),
|
265 |
visible=True,
|
266 |
)
|
267 |
|
268 |
-
def fetch_model_tournament_results_table_csv(submission_id, category):
|
|
|
|
|
269 |
if submission_id == None or category == None:
|
270 |
return gr.update(
|
271 |
visible=False,
|
272 |
)
|
273 |
else:
|
274 |
return gr.update(
|
275 |
-
value=leaderboard_server.get_model_tournament_table_csv(submission_id, category),
|
276 |
visible=True,
|
277 |
)
|
278 |
|
@@ -288,7 +294,7 @@ def create_task_abbreviation_legend_table(category):
|
|
288 |
|
289 |
return task_abbreviation_legend_body
|
290 |
|
291 |
-
def change_leaderboard_category(category, selected_submission_id):
|
292 |
if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
|
293 |
task_abbreviation_legend = gr.update(
|
294 |
visible=False,
|
@@ -319,19 +325,21 @@ def change_leaderboard_category(category, selected_submission_id):
|
|
319 |
visible=True,
|
320 |
)
|
321 |
|
322 |
-
model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category)
|
323 |
-
model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category)
|
|
|
|
|
324 |
|
325 |
leaderboard = gr.update(
|
326 |
-
value=leaderboard_server.get_leaderboard(category=category),
|
327 |
visible=True,
|
328 |
)
|
329 |
leaderboard_csv = gr.update(
|
330 |
-
value=leaderboard_server.get_leaderboard_csv(category=category),
|
331 |
visible=True,
|
332 |
)
|
333 |
leaderboard_scatter_plot = gr.update(
|
334 |
-
value=leaderboard_server.get_leaderboard_scatter_plot(category=category),
|
335 |
visible=True,
|
336 |
)
|
337 |
|
@@ -552,6 +560,9 @@ const intervalId = setInterval(addTitleForEachRowOfLeaderboardTable, 1000);
|
|
552 |
</script>
|
553 |
"""
|
554 |
|
|
|
|
|
|
|
555 |
def gradio_app():
|
556 |
with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main:
|
557 |
check_significance_is_reachable_timer = gr.Timer(
|
@@ -654,6 +665,13 @@ def gradio_app():
|
|
654 |
interactive=True,
|
655 |
)
|
656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
with gr.Row():
|
658 |
leaderboard_table = gr.DataFrame(
|
659 |
leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
|
@@ -725,29 +743,35 @@ def gradio_app():
|
|
725 |
visible=False,
|
726 |
)
|
727 |
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
|
|
|
|
|
|
|
|
|
|
745 |
|
746 |
tournament_results_dropdown.change(
|
747 |
fn=fetch_model_tournament_results_table,
|
748 |
inputs=[
|
749 |
tournament_results_dropdown,
|
750 |
leaderboard_category_of_tasks,
|
|
|
751 |
],
|
752 |
outputs=model_tournament_results_table,
|
753 |
).then(
|
@@ -755,6 +779,7 @@ def gradio_app():
|
|
755 |
inputs=[
|
756 |
tournament_results_dropdown,
|
757 |
leaderboard_category_of_tasks,
|
|
|
758 |
],
|
759 |
outputs=model_tournament_results_table_csv,
|
760 |
)
|
@@ -792,6 +817,14 @@ def gradio_app():
|
|
792 |
interactive=True,
|
793 |
)
|
794 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
795 |
with gr.Row():
|
796 |
model_details_model_tournament_results_table = gr.DataFrame(
|
797 |
value=None,
|
@@ -820,11 +853,16 @@ def gradio_app():
|
|
820 |
fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
|
821 |
inputs=model_details_model_dropdown,
|
822 |
outputs=model_details_category_of_tasks
|
|
|
|
|
|
|
|
|
823 |
).then(
|
824 |
fn=fetch_model_tournament_results_table,
|
825 |
inputs=[
|
826 |
model_details_model_dropdown,
|
827 |
model_details_category_of_tasks,
|
|
|
828 |
],
|
829 |
outputs=model_details_model_tournament_results_table
|
830 |
).then(
|
@@ -832,25 +870,32 @@ def gradio_app():
|
|
832 |
inputs=[
|
833 |
model_details_model_dropdown,
|
834 |
model_details_category_of_tasks,
|
|
|
835 |
],
|
836 |
outputs=model_details_model_tournament_results_table_csv
|
837 |
)
|
838 |
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
847 |
-
|
848 |
-
|
849 |
-
|
850 |
-
|
851 |
-
|
852 |
-
|
853 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
854 |
|
855 |
with gr.TabItem('Submission'):
|
856 |
with gr.Column():
|
|
|
133 |
submit_prompt = gr.update(visible=True)
|
134 |
submission_btn_yes = gr.update(interactive=True, visible=True)
|
135 |
|
136 |
+
# TODO: checkbox use_corrected_p_value
|
137 |
+
|
138 |
pre_submit_leaderboard_table = gr.update(
|
139 |
value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS),
|
140 |
visible=True,
|
|
|
256 |
gr.update(value=metadata['link_to_model'], visible=True)
|
257 |
)
|
258 |
|
259 |
+
def fetch_model_tournament_results_table(submission_id, category, use_corrected_p_value):
|
260 |
+
kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
|
261 |
+
|
262 |
if submission_id == None or category == None:
|
263 |
return gr.update(
|
264 |
visible=False,
|
265 |
)
|
266 |
else:
|
267 |
return gr.update(
|
268 |
+
value=leaderboard_server.get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value),
|
269 |
visible=True,
|
270 |
)
|
271 |
|
272 |
+
def fetch_model_tournament_results_table_csv(submission_id, category, use_corrected_p_value):
|
273 |
+
kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
|
274 |
+
|
275 |
if submission_id == None or category == None:
|
276 |
return gr.update(
|
277 |
visible=False,
|
278 |
)
|
279 |
else:
|
280 |
return gr.update(
|
281 |
+
value=leaderboard_server.get_model_tournament_table_csv(submission_id, category, kind_of_p_value=kind_of_p_value),
|
282 |
visible=True,
|
283 |
)
|
284 |
|
|
|
294 |
|
295 |
return task_abbreviation_legend_body
|
296 |
|
297 |
+
def change_leaderboard_category(category, use_corrected_p_value, selected_submission_id):
|
298 |
if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
|
299 |
task_abbreviation_legend = gr.update(
|
300 |
visible=False,
|
|
|
325 |
visible=True,
|
326 |
)
|
327 |
|
328 |
+
model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category, use_corrected_p_value)
|
329 |
+
model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category, use_corrected_p_value)
|
330 |
+
|
331 |
+
kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
|
332 |
|
333 |
leaderboard = gr.update(
|
334 |
+
value=leaderboard_server.get_leaderboard(category=category, kind_of_p_value=kind_of_p_value),
|
335 |
visible=True,
|
336 |
)
|
337 |
leaderboard_csv = gr.update(
|
338 |
+
value=leaderboard_server.get_leaderboard_csv(category=category, kind_of_p_value=kind_of_p_value),
|
339 |
visible=True,
|
340 |
)
|
341 |
leaderboard_scatter_plot = gr.update(
|
342 |
+
value=leaderboard_server.get_leaderboard_scatter_plot(category=category, kind_of_p_value=kind_of_p_value),
|
343 |
visible=True,
|
344 |
)
|
345 |
|
|
|
560 |
</script>
|
561 |
"""
|
562 |
|
563 |
+
CHECKBOX_USE_CORRECTED_P_VALUE_INFO = "Switch to False Discovery Rate (FDR) guarantees"
|
564 |
+
CHECKBOX_USE_CORRECTED_P_VALUE_LABEL = "FDR guarantees"
|
565 |
+
|
566 |
def gradio_app():
|
567 |
with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main:
|
568 |
check_significance_is_reachable_timer = gr.Timer(
|
|
|
665 |
interactive=True,
|
666 |
)
|
667 |
|
668 |
+
with gr.Row():
|
669 |
+
leaderboard_use_corrected_p_value = gr.Checkbox(
|
670 |
+
info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO,
|
671 |
+
label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL,
|
672 |
+
interactive=True,
|
673 |
+
)
|
674 |
+
|
675 |
with gr.Row():
|
676 |
leaderboard_table = gr.DataFrame(
|
677 |
leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
|
|
|
743 |
visible=False,
|
744 |
)
|
745 |
|
746 |
+
for _leaderboard_form_input in [
|
747 |
+
leaderboard_category_of_tasks,
|
748 |
+
leaderboard_use_corrected_p_value,
|
749 |
+
]:
|
750 |
+
_leaderboard_form_input.change(
|
751 |
+
fn=change_leaderboard_category,
|
752 |
+
inputs=[
|
753 |
+
leaderboard_category_of_tasks,
|
754 |
+
leaderboard_use_corrected_p_value,
|
755 |
+
tournament_results_dropdown,
|
756 |
+
],
|
757 |
+
outputs=[
|
758 |
+
leaderboard_table,
|
759 |
+
leaderboard_table_csv,
|
760 |
+
leaderboard_table_legend,
|
761 |
+
leaderboard_scatter_plot,
|
762 |
+
tournament_results_title,
|
763 |
+
tournament_results_dropdown,
|
764 |
+
model_tournament_results_table,
|
765 |
+
model_tournament_results_table_csv,
|
766 |
+
],
|
767 |
+
)
|
768 |
|
769 |
tournament_results_dropdown.change(
|
770 |
fn=fetch_model_tournament_results_table,
|
771 |
inputs=[
|
772 |
tournament_results_dropdown,
|
773 |
leaderboard_category_of_tasks,
|
774 |
+
leaderboard_use_corrected_p_value,
|
775 |
],
|
776 |
outputs=model_tournament_results_table,
|
777 |
).then(
|
|
|
779 |
inputs=[
|
780 |
tournament_results_dropdown,
|
781 |
leaderboard_category_of_tasks,
|
782 |
+
leaderboard_use_corrected_p_value,
|
783 |
],
|
784 |
outputs=model_tournament_results_table_csv,
|
785 |
)
|
|
|
817 |
interactive=True,
|
818 |
)
|
819 |
|
820 |
+
with gr.Row():
|
821 |
+
model_details_use_corrected_p_value = gr.Checkbox(
|
822 |
+
info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO,
|
823 |
+
label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL,
|
824 |
+
visible=False,
|
825 |
+
interactive=True,
|
826 |
+
)
|
827 |
+
|
828 |
with gr.Row():
|
829 |
model_details_model_tournament_results_table = gr.DataFrame(
|
830 |
value=None,
|
|
|
853 |
fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
|
854 |
inputs=model_details_model_dropdown,
|
855 |
outputs=model_details_category_of_tasks
|
856 |
+
).then(
|
857 |
+
fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
|
858 |
+
inputs=model_details_model_dropdown,
|
859 |
+
outputs=model_details_use_corrected_p_value
|
860 |
).then(
|
861 |
fn=fetch_model_tournament_results_table,
|
862 |
inputs=[
|
863 |
model_details_model_dropdown,
|
864 |
model_details_category_of_tasks,
|
865 |
+
model_details_use_corrected_p_value,
|
866 |
],
|
867 |
outputs=model_details_model_tournament_results_table
|
868 |
).then(
|
|
|
870 |
inputs=[
|
871 |
model_details_model_dropdown,
|
872 |
model_details_category_of_tasks,
|
873 |
+
model_details_use_corrected_p_value,
|
874 |
],
|
875 |
outputs=model_details_model_tournament_results_table_csv
|
876 |
)
|
877 |
|
878 |
+
for _model_details_form_input in [
|
879 |
+
model_details_category_of_tasks,
|
880 |
+
model_details_use_corrected_p_value,
|
881 |
+
]:
|
882 |
+
_model_details_form_input.change(
|
883 |
+
fn=fetch_model_tournament_results_table,
|
884 |
+
inputs=[
|
885 |
+
model_details_model_dropdown,
|
886 |
+
model_details_category_of_tasks,
|
887 |
+
model_details_use_corrected_p_value,
|
888 |
+
],
|
889 |
+
outputs=model_details_model_tournament_results_table,
|
890 |
+
).then(
|
891 |
+
fn=fetch_model_tournament_results_table_csv,
|
892 |
+
inputs=[
|
893 |
+
model_details_model_dropdown,
|
894 |
+
model_details_category_of_tasks,
|
895 |
+
model_details_use_corrected_p_value,
|
896 |
+
],
|
897 |
+
outputs=model_details_model_tournament_results_table_csv,
|
898 |
+
)
|
899 |
|
900 |
with gr.TabItem('Submission'):
|
901 |
with gr.Column():
|
server.py
CHANGED
@@ -277,6 +277,8 @@ class LeaderboardServer:
|
|
277 |
self.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS = self._prepare_category_to_task_abbr_to_details()
|
278 |
self.MAX_LENGTH_OF_MODEL_TITLE = 28
|
279 |
self.DIR_DATAFRAMES_CSV = "./dataframes_csv"
|
|
|
|
|
280 |
|
281 |
self.var_lock = ReadWriteLock()
|
282 |
self.submission_ids = set()
|
@@ -326,45 +328,45 @@ class LeaderboardServer:
|
|
326 |
|
327 |
categories = [self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS] + sorted(self.TASKS_CATEGORIES)
|
328 |
|
329 |
-
leaderboard_dataframes = {
|
330 |
-
category: self._get_leaderboard(category=category) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
331 |
for category in categories
|
332 |
-
}
|
333 |
|
334 |
with self.var_lock.ro:
|
335 |
submission_ids = self.submission_ids
|
336 |
|
337 |
-
tournament_dataframes = {
|
338 |
submission_id: {
|
339 |
-
category: self._get_model_tournament_table(submission_id, category) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
340 |
for category in categories
|
341 |
}
|
342 |
for submission_id in submission_ids
|
343 |
-
}
|
344 |
|
345 |
with self.var_lock.rw:
|
346 |
self.leaderboard_dataframes = leaderboard_dataframes
|
347 |
self.tournament_dataframes = tournament_dataframes
|
348 |
|
349 |
-
leaderboard_dataframes_csv = {
|
350 |
category: self._dataframe_to_csv(
|
351 |
-
self._get_leaderboard(category=category, to_csv=True) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
352 |
-
f"Leaderboard - {category}.csv"
|
353 |
)
|
354 |
for category in categories
|
355 |
-
}
|
356 |
|
357 |
with self.var_lock.ro:
|
358 |
-
tournament_dataframes_csv = {
|
359 |
submission_id: {
|
360 |
category: self._dataframe_to_csv(
|
361 |
-
self._get_model_tournament_table(submission_id, category, to_csv=True) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
362 |
-
f"Tournament table - {self.submission_id_to_data[submission_id]['submission_metadata']['model_name'][:self.MAX_LENGTH_OF_MODEL_TITLE].replace('/', '_')} - {category}.csv",
|
363 |
)
|
364 |
for category in categories
|
365 |
}
|
366 |
for submission_id in submission_ids
|
367 |
-
}
|
368 |
|
369 |
with self.var_lock.rw:
|
370 |
self.leaderboard_dataframes_csv = leaderboard_dataframes_csv
|
@@ -554,30 +556,36 @@ class LeaderboardServer:
|
|
554 |
df_css.loc[i, c] = ''
|
555 |
return df_css
|
556 |
|
557 |
-
def get_model_tournament_table_csv(self, submission_id, category, pre_submit=None):
|
|
|
|
|
558 |
if pre_submit == None:
|
559 |
with self.var_lock.ro:
|
560 |
-
return self.tournament_dataframes_csv[submission_id][category]
|
561 |
else:
|
562 |
return self._dataframe_to_csv(
|
563 |
-
self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, to_csv=True),
|
564 |
f"Tournament table - pre-submit - {category}.csv",
|
565 |
)
|
566 |
|
567 |
-
def get_model_tournament_table(self, submission_id, category, pre_submit=None):
|
|
|
|
|
568 |
if pre_submit == None:
|
569 |
with self.var_lock.ro:
|
570 |
-
return copy.copy(self.tournament_dataframes[submission_id][category])
|
571 |
else:
|
572 |
-
return self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit)
|
573 |
|
574 |
-
def _get_model_tournament_table(self, submission_id, category, pre_submit=None, to_csv=False):
|
|
|
|
|
575 |
model_tournament_table = []
|
576 |
|
577 |
with self.var_lock.ro:
|
578 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
579 |
|
580 |
-
for competitor_id in tournament_results[submission_id].keys() - {submission_id}:
|
581 |
if competitor_id not in self.submission_id_to_data:
|
582 |
if pre_submit and competitor_id == pre_submit.submission_id:
|
583 |
data = pre_submit.data
|
@@ -590,13 +598,14 @@ class LeaderboardServer:
|
|
590 |
for task in self.TASKS_METADATA:
|
591 |
task_category = self.TASKS_METADATA[task]["category"]
|
592 |
if category in (task_category, self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS):
|
|
|
|
|
|
|
|
|
593 |
if to_csv:
|
594 |
-
match_results[task] =
|
595 |
else:
|
596 |
-
match_task_result_details
|
597 |
-
match_task_result_details.update(copy.deepcopy(tournament_results[submission_id][competitor_id][task]))
|
598 |
-
match_task_result_details["significant"] = str(match_task_result_details["significant"]).lower() # originaly bool
|
599 |
-
match_task_result_significant = match_task_result_details["significant"]
|
600 |
match_task_result_details = "\n".join(f"{k}: {v}" for k, v in match_task_result_details.items())
|
601 |
match_results[task] = f'<abbr title={xmlQuoteAttr(match_task_result_details)}>{match_task_result_significant}</abbr>'
|
602 |
|
@@ -654,7 +663,10 @@ class LeaderboardServer:
|
|
654 |
|
655 |
return True
|
656 |
|
657 |
-
def
|
|
|
|
|
|
|
658 |
tournament_results = copy.deepcopy(tournament_results)
|
659 |
|
660 |
if not self._is_correct_significance_in_tournament_results(tournament_results):
|
@@ -665,7 +677,7 @@ class LeaderboardServer:
|
|
665 |
corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
|
666 |
for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
|
667 |
tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
|
668 |
-
tournament_results[submission_id][competitor_id][task]["significant"] =
|
669 |
|
670 |
return tournament_results
|
671 |
|
@@ -680,17 +692,19 @@ class LeaderboardServer:
|
|
680 |
dataframe.to_csv(filepath, index=False)
|
681 |
return filepath
|
682 |
|
683 |
-
def get_leaderboard_scatter_plot(self, pre_submit=None, category=None):
|
684 |
import numpy as np
|
685 |
from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
|
686 |
|
|
|
|
|
687 |
#m = self.TASKS_METADATA
|
688 |
#tournament = self.tournament_results
|
689 |
name_map = self.submission_id_to_model_title
|
690 |
|
691 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
692 |
|
693 |
-
csv_file_path = self.leaderboard_dataframes_csv[self.TASKS_CATEGORY_OVERALL]
|
694 |
ldb_records = get_ldb_records(name_map, csv_file_path)
|
695 |
categories = self.TASKS_CATEGORIES
|
696 |
model_names = list(ldb_records.keys())
|
@@ -725,29 +739,32 @@ class LeaderboardServer:
|
|
725 |
|
726 |
return fig
|
727 |
|
728 |
-
def get_leaderboard_csv(self, pre_submit=None, category=None):
|
729 |
if pre_submit == None:
|
730 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
|
|
731 |
with self.var_lock.ro:
|
732 |
-
return self.leaderboard_dataframes_csv[category]
|
733 |
else:
|
734 |
return self._dataframe_to_csv(
|
735 |
-
self._get_leaderboard(pre_submit=pre_submit, category=category, to_csv=True),
|
736 |
f"Leaderboard - pre-submit - {category}.csv",
|
737 |
)
|
738 |
|
739 |
-
def get_leaderboard(self, pre_submit=None, category=None):
|
740 |
if pre_submit == None:
|
741 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
|
|
742 |
with self.var_lock.ro:
|
743 |
-
return copy.copy(self.leaderboard_dataframes[category])
|
744 |
else:
|
745 |
-
return self._get_leaderboard(pre_submit=pre_submit, category=category)
|
746 |
|
747 |
-
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
|
748 |
with self.var_lock.ro:
|
749 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
750 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
|
|
751 |
|
752 |
if len(tournament_results) == 0:
|
753 |
return pd.DataFrame(columns=['No submissions yet'])
|
@@ -779,8 +796,13 @@ class LeaderboardServer:
|
|
779 |
num_of_wins = 0
|
780 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
781 |
num_of_competitors += 1
|
782 |
-
|
|
|
|
|
|
|
|
|
783 |
num_of_wins += 1
|
|
|
784 |
task_score = num_of_wins / num_of_competitors * 100 if num_of_competitors > 0 else 100
|
785 |
win_score.setdefault(task_category, []).append(task_score)
|
786 |
|
@@ -1061,7 +1083,7 @@ class LeaderboardServer:
|
|
1061 |
print(f"Locked `submit_lock` for {submission_id = }")
|
1062 |
print(info_msg)
|
1063 |
|
1064 |
-
self.update_leaderboard()
|
1065 |
|
1066 |
if HF_FAKE_TOURNAMENT:
|
1067 |
tournament_results = self.fake_tournament(submission_id, file)
|
|
|
277 |
self.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS = self._prepare_category_to_task_abbr_to_details()
|
278 |
self.MAX_LENGTH_OF_MODEL_TITLE = 28
|
279 |
self.DIR_DATAFRAMES_CSV = "./dataframes_csv"
|
280 |
+
self.DEFAULT_KIND_OF_P_VALUE = "p_value"
|
281 |
+
self.KINDS_OF_P_VALUE = ["p_value", "corrected_p_value"]
|
282 |
|
283 |
self.var_lock = ReadWriteLock()
|
284 |
self.submission_ids = set()
|
|
|
328 |
|
329 |
categories = [self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS] + sorted(self.TASKS_CATEGORIES)
|
330 |
|
331 |
+
leaderboard_dataframes = {kind_of_p_value: {
|
332 |
+
category: self._get_leaderboard(category=category, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
333 |
for category in categories
|
334 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
335 |
|
336 |
with self.var_lock.ro:
|
337 |
submission_ids = self.submission_ids
|
338 |
|
339 |
+
tournament_dataframes = {kind_of_p_value: {
|
340 |
submission_id: {
|
341 |
+
category: self._get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
342 |
for category in categories
|
343 |
}
|
344 |
for submission_id in submission_ids
|
345 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
346 |
|
347 |
with self.var_lock.rw:
|
348 |
self.leaderboard_dataframes = leaderboard_dataframes
|
349 |
self.tournament_dataframes = tournament_dataframes
|
350 |
|
351 |
+
leaderboard_dataframes_csv = {kind_of_p_value: {
|
352 |
category: self._dataframe_to_csv(
|
353 |
+
self._get_leaderboard(category=category, to_csv=True, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
354 |
+
f"Leaderboard - {category}{' - FDR guarantees' if kind_of_p_value != self.DEFAULT_KIND_OF_P_VALUE else ''}.csv"
|
355 |
)
|
356 |
for category in categories
|
357 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
358 |
|
359 |
with self.var_lock.ro:
|
360 |
+
tournament_dataframes_csv = {kind_of_p_value: {
|
361 |
submission_id: {
|
362 |
category: self._dataframe_to_csv(
|
363 |
+
self._get_model_tournament_table(submission_id, category, to_csv=True, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
364 |
+
f"Tournament table - {self.submission_id_to_data[submission_id]['submission_metadata']['model_name'][:self.MAX_LENGTH_OF_MODEL_TITLE].replace('/', '_')} - {category}{' - FDR guarantees' if kind_of_p_value != self.DEFAULT_KIND_OF_P_VALUE else ''}.csv",
|
365 |
)
|
366 |
for category in categories
|
367 |
}
|
368 |
for submission_id in submission_ids
|
369 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
370 |
|
371 |
with self.var_lock.rw:
|
372 |
self.leaderboard_dataframes_csv = leaderboard_dataframes_csv
|
|
|
556 |
df_css.loc[i, c] = ''
|
557 |
return df_css
|
558 |
|
559 |
+
def get_model_tournament_table_csv(self, submission_id, category, pre_submit=None, kind_of_p_value=None):
|
560 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
561 |
+
|
562 |
if pre_submit == None:
|
563 |
with self.var_lock.ro:
|
564 |
+
return self.tournament_dataframes_csv[kind_of_p_value][submission_id][category]
|
565 |
else:
|
566 |
return self._dataframe_to_csv(
|
567 |
+
self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, to_csv=True, kind_of_p_value=kind_of_p_value),
|
568 |
f"Tournament table - pre-submit - {category}.csv",
|
569 |
)
|
570 |
|
571 |
+
def get_model_tournament_table(self, submission_id, category, pre_submit=None, kind_of_p_value=None):
|
572 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
573 |
+
|
574 |
if pre_submit == None:
|
575 |
with self.var_lock.ro:
|
576 |
+
return copy.copy(self.tournament_dataframes[kind_of_p_value][submission_id][category])
|
577 |
else:
|
578 |
+
return self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, kind_of_p_value=kind_of_p_value)
|
579 |
|
580 |
+
def _get_model_tournament_table(self, submission_id, category, pre_submit=None, to_csv=False, kind_of_p_value=None):
|
581 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
582 |
+
|
583 |
model_tournament_table = []
|
584 |
|
585 |
with self.var_lock.ro:
|
586 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
587 |
|
588 |
+
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
589 |
if competitor_id not in self.submission_id_to_data:
|
590 |
if pre_submit and competitor_id == pre_submit.submission_id:
|
591 |
data = pre_submit.data
|
|
|
598 |
for task in self.TASKS_METADATA:
|
599 |
task_category = self.TASKS_METADATA[task]["category"]
|
600 |
if category in (task_category, self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS):
|
601 |
+
match_task_result_details = dict.fromkeys(["significant", "corrected_p_value", "p_value"]) # order has impact to sorting DataFrame
|
602 |
+
match_task_result_details.update(copy.deepcopy(tournament_results[submission_id][competitor_id][task]))
|
603 |
+
match_task_result_significant = self._is_task_pval_significant(match_task_result_details[kind_of_p_value])
|
604 |
+
|
605 |
if to_csv:
|
606 |
+
match_results[task] = match_task_result_significant
|
607 |
else:
|
608 |
+
match_task_result_details["significant"] = str(match_task_result_significant).lower() # originaly bool
|
|
|
|
|
|
|
609 |
match_task_result_details = "\n".join(f"{k}: {v}" for k, v in match_task_result_details.items())
|
610 |
match_results[task] = f'<abbr title={xmlQuoteAttr(match_task_result_details)}>{match_task_result_significant}</abbr>'
|
611 |
|
|
|
663 |
|
664 |
return True
|
665 |
|
666 |
+
def _is_task_pval_significant(self, task_pval, alpha=0.05):
|
667 |
+
return bool(task_pval < alpha)
|
668 |
+
|
669 |
+
def _correct_significance_in_tournament_results(self, tournament_results):
|
670 |
tournament_results = copy.deepcopy(tournament_results)
|
671 |
|
672 |
if not self._is_correct_significance_in_tournament_results(tournament_results):
|
|
|
677 |
corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
|
678 |
for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
|
679 |
tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
|
680 |
+
tournament_results[submission_id][competitor_id][task]["significant"] = self._is_task_pval_significant(task_pval)
|
681 |
|
682 |
return tournament_results
|
683 |
|
|
|
692 |
dataframe.to_csv(filepath, index=False)
|
693 |
return filepath
|
694 |
|
695 |
+
def get_leaderboard_scatter_plot(self, pre_submit=None, category=None, kind_of_p_value=None):
|
696 |
import numpy as np
|
697 |
from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
|
698 |
|
699 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
700 |
+
|
701 |
#m = self.TASKS_METADATA
|
702 |
#tournament = self.tournament_results
|
703 |
name_map = self.submission_id_to_model_title
|
704 |
|
705 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
706 |
|
707 |
+
csv_file_path = self.leaderboard_dataframes_csv[kind_of_p_value][self.TASKS_CATEGORY_OVERALL]
|
708 |
ldb_records = get_ldb_records(name_map, csv_file_path)
|
709 |
categories = self.TASKS_CATEGORIES
|
710 |
model_names = list(ldb_records.keys())
|
|
|
739 |
|
740 |
return fig
|
741 |
|
742 |
+
def get_leaderboard_csv(self, pre_submit=None, category=None, kind_of_p_value=None):
|
743 |
if pre_submit == None:
|
744 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
745 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
746 |
with self.var_lock.ro:
|
747 |
+
return self.leaderboard_dataframes_csv[kind_of_p_value][category]
|
748 |
else:
|
749 |
return self._dataframe_to_csv(
|
750 |
+
self._get_leaderboard(pre_submit=pre_submit, category=category, to_csv=True, kind_of_p_value=kind_of_p_value),
|
751 |
f"Leaderboard - pre-submit - {category}.csv",
|
752 |
)
|
753 |
|
754 |
+
def get_leaderboard(self, pre_submit=None, category=None, kind_of_p_value=None):
|
755 |
if pre_submit == None:
|
756 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
757 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
758 |
with self.var_lock.ro:
|
759 |
+
return copy.copy(self.leaderboard_dataframes[kind_of_p_value][category])
|
760 |
else:
|
761 |
+
return self._get_leaderboard(pre_submit=pre_submit, category=category, kind_of_p_value=kind_of_p_value)
|
762 |
|
763 |
+
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False, kind_of_p_value=None):
|
764 |
with self.var_lock.ro:
|
765 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
766 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
767 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
768 |
|
769 |
if len(tournament_results) == 0:
|
770 |
return pd.DataFrame(columns=['No submissions yet'])
|
|
|
796 |
num_of_wins = 0
|
797 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
798 |
num_of_competitors += 1
|
799 |
+
|
800 |
+
match_task_result_details = tournament_results[submission_id][competitor_id][task]
|
801 |
+
match_task_result_significant = self._is_task_pval_significant(match_task_result_details[kind_of_p_value])
|
802 |
+
|
803 |
+
if match_task_result_significant:
|
804 |
num_of_wins += 1
|
805 |
+
|
806 |
task_score = num_of_wins / num_of_competitors * 100 if num_of_competitors > 0 else 100
|
807 |
win_score.setdefault(task_category, []).append(task_score)
|
808 |
|
|
|
1083 |
print(f"Locked `submit_lock` for {submission_id = }")
|
1084 |
print(info_msg)
|
1085 |
|
1086 |
+
self.update_leaderboard() # TODO: Přidat komentář proč to tady je. Nemělo by to být pouze při `do_submit == True`?
|
1087 |
|
1088 |
if HF_FAKE_TOURNAMENT:
|
1089 |
tournament_results = self.fake_tournament(submission_id, file)
|