idolezal commited on
Commit
cc83df6
·
1 Parent(s): 046b7f2

Added selector of p_value

Browse files
Files changed (2) hide show
  1. app.py +87 -42
  2. server.py +62 -40
app.py CHANGED
@@ -133,6 +133,8 @@ def process_submission(*inputs):
133
  submit_prompt = gr.update(visible=True)
134
  submission_btn_yes = gr.update(interactive=True, visible=True)
135
 
 
 
136
  pre_submit_leaderboard_table = gr.update(
137
  value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS),
138
  visible=True,
@@ -254,25 +256,29 @@ def fetch_model_detail(submission_id):
254
  gr.update(value=metadata['link_to_model'], visible=True)
255
  )
256
 
257
- def fetch_model_tournament_results_table(submission_id, category):
 
 
258
  if submission_id == None or category == None:
259
  return gr.update(
260
  visible=False,
261
  )
262
  else:
263
  return gr.update(
264
- value=leaderboard_server.get_model_tournament_table(submission_id, category),
265
  visible=True,
266
  )
267
 
268
- def fetch_model_tournament_results_table_csv(submission_id, category):
 
 
269
  if submission_id == None or category == None:
270
  return gr.update(
271
  visible=False,
272
  )
273
  else:
274
  return gr.update(
275
- value=leaderboard_server.get_model_tournament_table_csv(submission_id, category),
276
  visible=True,
277
  )
278
 
@@ -288,7 +294,7 @@ def create_task_abbreviation_legend_table(category):
288
 
289
  return task_abbreviation_legend_body
290
 
291
- def change_leaderboard_category(category, selected_submission_id):
292
  if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
293
  task_abbreviation_legend = gr.update(
294
  visible=False,
@@ -319,19 +325,21 @@ def change_leaderboard_category(category, selected_submission_id):
319
  visible=True,
320
  )
321
 
322
- model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category)
323
- model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category)
 
 
324
 
325
  leaderboard = gr.update(
326
- value=leaderboard_server.get_leaderboard(category=category),
327
  visible=True,
328
  )
329
  leaderboard_csv = gr.update(
330
- value=leaderboard_server.get_leaderboard_csv(category=category),
331
  visible=True,
332
  )
333
  leaderboard_scatter_plot = gr.update(
334
- value=leaderboard_server.get_leaderboard_scatter_plot(category=category),
335
  visible=True,
336
  )
337
 
@@ -552,6 +560,9 @@ const intervalId = setInterval(addTitleForEachRowOfLeaderboardTable, 1000);
552
  </script>
553
  """
554
 
 
 
 
555
  def gradio_app():
556
  with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main:
557
  check_significance_is_reachable_timer = gr.Timer(
@@ -654,6 +665,13 @@ def gradio_app():
654
  interactive=True,
655
  )
656
 
 
 
 
 
 
 
 
657
  with gr.Row():
658
  leaderboard_table = gr.DataFrame(
659
  leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
@@ -725,29 +743,35 @@ def gradio_app():
725
  visible=False,
726
  )
727
 
728
- leaderboard_category_of_tasks.change(
729
- fn=change_leaderboard_category,
730
- inputs=[
731
- leaderboard_category_of_tasks,
732
- tournament_results_dropdown,
733
- ],
734
- outputs=[
735
- leaderboard_table,
736
- leaderboard_table_csv,
737
- leaderboard_table_legend,
738
- leaderboard_scatter_plot,
739
- tournament_results_title,
740
- tournament_results_dropdown,
741
- model_tournament_results_table,
742
- model_tournament_results_table_csv,
743
- ],
744
- )
 
 
 
 
 
745
 
746
  tournament_results_dropdown.change(
747
  fn=fetch_model_tournament_results_table,
748
  inputs=[
749
  tournament_results_dropdown,
750
  leaderboard_category_of_tasks,
 
751
  ],
752
  outputs=model_tournament_results_table,
753
  ).then(
@@ -755,6 +779,7 @@ def gradio_app():
755
  inputs=[
756
  tournament_results_dropdown,
757
  leaderboard_category_of_tasks,
 
758
  ],
759
  outputs=model_tournament_results_table_csv,
760
  )
@@ -792,6 +817,14 @@ def gradio_app():
792
  interactive=True,
793
  )
794
 
 
 
 
 
 
 
 
 
795
  with gr.Row():
796
  model_details_model_tournament_results_table = gr.DataFrame(
797
  value=None,
@@ -820,11 +853,16 @@ def gradio_app():
820
  fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
821
  inputs=model_details_model_dropdown,
822
  outputs=model_details_category_of_tasks
 
 
 
 
823
  ).then(
824
  fn=fetch_model_tournament_results_table,
825
  inputs=[
826
  model_details_model_dropdown,
827
  model_details_category_of_tasks,
 
828
  ],
829
  outputs=model_details_model_tournament_results_table
830
  ).then(
@@ -832,25 +870,32 @@ def gradio_app():
832
  inputs=[
833
  model_details_model_dropdown,
834
  model_details_category_of_tasks,
 
835
  ],
836
  outputs=model_details_model_tournament_results_table_csv
837
  )
838
 
839
- model_details_category_of_tasks.change(
840
- fn=fetch_model_tournament_results_table,
841
- inputs=[
842
- model_details_model_dropdown,
843
- model_details_category_of_tasks,
844
- ],
845
- outputs=model_details_model_tournament_results_table,
846
- ).then(
847
- fn=fetch_model_tournament_results_table_csv,
848
- inputs=[
849
- model_details_model_dropdown,
850
- model_details_category_of_tasks,
851
- ],
852
- outputs=model_details_model_tournament_results_table_csv,
853
- )
 
 
 
 
 
 
854
 
855
  with gr.TabItem('Submission'):
856
  with gr.Column():
 
133
  submit_prompt = gr.update(visible=True)
134
  submission_btn_yes = gr.update(interactive=True, visible=True)
135
 
136
+ # TODO: checkbox use_corrected_p_value
137
+
138
  pre_submit_leaderboard_table = gr.update(
139
  value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS),
140
  visible=True,
 
256
  gr.update(value=metadata['link_to_model'], visible=True)
257
  )
258
 
259
+ def fetch_model_tournament_results_table(submission_id, category, use_corrected_p_value):
260
+ kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
261
+
262
  if submission_id == None or category == None:
263
  return gr.update(
264
  visible=False,
265
  )
266
  else:
267
  return gr.update(
268
+ value=leaderboard_server.get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value),
269
  visible=True,
270
  )
271
 
272
+ def fetch_model_tournament_results_table_csv(submission_id, category, use_corrected_p_value):
273
+ kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
274
+
275
  if submission_id == None or category == None:
276
  return gr.update(
277
  visible=False,
278
  )
279
  else:
280
  return gr.update(
281
+ value=leaderboard_server.get_model_tournament_table_csv(submission_id, category, kind_of_p_value=kind_of_p_value),
282
  visible=True,
283
  )
284
 
 
294
 
295
  return task_abbreviation_legend_body
296
 
297
+ def change_leaderboard_category(category, use_corrected_p_value, selected_submission_id):
298
  if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
299
  task_abbreviation_legend = gr.update(
300
  visible=False,
 
325
  visible=True,
326
  )
327
 
328
+ model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category, use_corrected_p_value)
329
+ model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category, use_corrected_p_value)
330
+
331
+ kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
332
 
333
  leaderboard = gr.update(
334
+ value=leaderboard_server.get_leaderboard(category=category, kind_of_p_value=kind_of_p_value),
335
  visible=True,
336
  )
337
  leaderboard_csv = gr.update(
338
+ value=leaderboard_server.get_leaderboard_csv(category=category, kind_of_p_value=kind_of_p_value),
339
  visible=True,
340
  )
341
  leaderboard_scatter_plot = gr.update(
342
+ value=leaderboard_server.get_leaderboard_scatter_plot(category=category, kind_of_p_value=kind_of_p_value),
343
  visible=True,
344
  )
345
 
 
560
  </script>
561
  """
562
 
563
+ CHECKBOX_USE_CORRECTED_P_VALUE_INFO = "Switch to False Discovery Rate (FDR) guarantees"
564
+ CHECKBOX_USE_CORRECTED_P_VALUE_LABEL = "FDR guarantees"
565
+
566
  def gradio_app():
567
  with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main:
568
  check_significance_is_reachable_timer = gr.Timer(
 
665
  interactive=True,
666
  )
667
 
668
+ with gr.Row():
669
+ leaderboard_use_corrected_p_value = gr.Checkbox(
670
+ info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO,
671
+ label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL,
672
+ interactive=True,
673
+ )
674
+
675
  with gr.Row():
676
  leaderboard_table = gr.DataFrame(
677
  leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
 
743
  visible=False,
744
  )
745
 
746
+ for _leaderboard_form_input in [
747
+ leaderboard_category_of_tasks,
748
+ leaderboard_use_corrected_p_value,
749
+ ]:
750
+ _leaderboard_form_input.change(
751
+ fn=change_leaderboard_category,
752
+ inputs=[
753
+ leaderboard_category_of_tasks,
754
+ leaderboard_use_corrected_p_value,
755
+ tournament_results_dropdown,
756
+ ],
757
+ outputs=[
758
+ leaderboard_table,
759
+ leaderboard_table_csv,
760
+ leaderboard_table_legend,
761
+ leaderboard_scatter_plot,
762
+ tournament_results_title,
763
+ tournament_results_dropdown,
764
+ model_tournament_results_table,
765
+ model_tournament_results_table_csv,
766
+ ],
767
+ )
768
 
769
  tournament_results_dropdown.change(
770
  fn=fetch_model_tournament_results_table,
771
  inputs=[
772
  tournament_results_dropdown,
773
  leaderboard_category_of_tasks,
774
+ leaderboard_use_corrected_p_value,
775
  ],
776
  outputs=model_tournament_results_table,
777
  ).then(
 
779
  inputs=[
780
  tournament_results_dropdown,
781
  leaderboard_category_of_tasks,
782
+ leaderboard_use_corrected_p_value,
783
  ],
784
  outputs=model_tournament_results_table_csv,
785
  )
 
817
  interactive=True,
818
  )
819
 
820
+ with gr.Row():
821
+ model_details_use_corrected_p_value = gr.Checkbox(
822
+ info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO,
823
+ label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL,
824
+ visible=False,
825
+ interactive=True,
826
+ )
827
+
828
  with gr.Row():
829
  model_details_model_tournament_results_table = gr.DataFrame(
830
  value=None,
 
853
  fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
854
  inputs=model_details_model_dropdown,
855
  outputs=model_details_category_of_tasks
856
+ ).then(
857
+ fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
858
+ inputs=model_details_model_dropdown,
859
+ outputs=model_details_use_corrected_p_value
860
  ).then(
861
  fn=fetch_model_tournament_results_table,
862
  inputs=[
863
  model_details_model_dropdown,
864
  model_details_category_of_tasks,
865
+ model_details_use_corrected_p_value,
866
  ],
867
  outputs=model_details_model_tournament_results_table
868
  ).then(
 
870
  inputs=[
871
  model_details_model_dropdown,
872
  model_details_category_of_tasks,
873
+ model_details_use_corrected_p_value,
874
  ],
875
  outputs=model_details_model_tournament_results_table_csv
876
  )
877
 
878
+ for _model_details_form_input in [
879
+ model_details_category_of_tasks,
880
+ model_details_use_corrected_p_value,
881
+ ]:
882
+ _model_details_form_input.change(
883
+ fn=fetch_model_tournament_results_table,
884
+ inputs=[
885
+ model_details_model_dropdown,
886
+ model_details_category_of_tasks,
887
+ model_details_use_corrected_p_value,
888
+ ],
889
+ outputs=model_details_model_tournament_results_table,
890
+ ).then(
891
+ fn=fetch_model_tournament_results_table_csv,
892
+ inputs=[
893
+ model_details_model_dropdown,
894
+ model_details_category_of_tasks,
895
+ model_details_use_corrected_p_value,
896
+ ],
897
+ outputs=model_details_model_tournament_results_table_csv,
898
+ )
899
 
900
  with gr.TabItem('Submission'):
901
  with gr.Column():
server.py CHANGED
@@ -277,6 +277,8 @@ class LeaderboardServer:
277
  self.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS = self._prepare_category_to_task_abbr_to_details()
278
  self.MAX_LENGTH_OF_MODEL_TITLE = 28
279
  self.DIR_DATAFRAMES_CSV = "./dataframes_csv"
 
 
280
 
281
  self.var_lock = ReadWriteLock()
282
  self.submission_ids = set()
@@ -326,45 +328,45 @@ class LeaderboardServer:
326
 
327
  categories = [self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS] + sorted(self.TASKS_CATEGORIES)
328
 
329
- leaderboard_dataframes = {
330
- category: self._get_leaderboard(category=category) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
331
  for category in categories
332
- }
333
 
334
  with self.var_lock.ro:
335
  submission_ids = self.submission_ids
336
 
337
- tournament_dataframes = {
338
  submission_id: {
339
- category: self._get_model_tournament_table(submission_id, category) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
340
  for category in categories
341
  }
342
  for submission_id in submission_ids
343
- }
344
 
345
  with self.var_lock.rw:
346
  self.leaderboard_dataframes = leaderboard_dataframes
347
  self.tournament_dataframes = tournament_dataframes
348
 
349
- leaderboard_dataframes_csv = {
350
  category: self._dataframe_to_csv(
351
- self._get_leaderboard(category=category, to_csv=True) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
352
- f"Leaderboard - {category}.csv"
353
  )
354
  for category in categories
355
- }
356
 
357
  with self.var_lock.ro:
358
- tournament_dataframes_csv = {
359
  submission_id: {
360
  category: self._dataframe_to_csv(
361
- self._get_model_tournament_table(submission_id, category, to_csv=True) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
362
- f"Tournament table - {self.submission_id_to_data[submission_id]['submission_metadata']['model_name'][:self.MAX_LENGTH_OF_MODEL_TITLE].replace('/', '_')} - {category}.csv",
363
  )
364
  for category in categories
365
  }
366
  for submission_id in submission_ids
367
- }
368
 
369
  with self.var_lock.rw:
370
  self.leaderboard_dataframes_csv = leaderboard_dataframes_csv
@@ -554,30 +556,36 @@ class LeaderboardServer:
554
  df_css.loc[i, c] = ''
555
  return df_css
556
 
557
- def get_model_tournament_table_csv(self, submission_id, category, pre_submit=None):
 
 
558
  if pre_submit == None:
559
  with self.var_lock.ro:
560
- return self.tournament_dataframes_csv[submission_id][category]
561
  else:
562
  return self._dataframe_to_csv(
563
- self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, to_csv=True),
564
  f"Tournament table - pre-submit - {category}.csv",
565
  )
566
 
567
- def get_model_tournament_table(self, submission_id, category, pre_submit=None):
 
 
568
  if pre_submit == None:
569
  with self.var_lock.ro:
570
- return copy.copy(self.tournament_dataframes[submission_id][category])
571
  else:
572
- return self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit)
573
 
574
- def _get_model_tournament_table(self, submission_id, category, pre_submit=None, to_csv=False):
 
 
575
  model_tournament_table = []
576
 
577
  with self.var_lock.ro:
578
  tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
579
 
580
- for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
581
  if competitor_id not in self.submission_id_to_data:
582
  if pre_submit and competitor_id == pre_submit.submission_id:
583
  data = pre_submit.data
@@ -590,13 +598,14 @@ class LeaderboardServer:
590
  for task in self.TASKS_METADATA:
591
  task_category = self.TASKS_METADATA[task]["category"]
592
  if category in (task_category, self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS):
 
 
 
 
593
  if to_csv:
594
- match_results[task] = tournament_results[submission_id][competitor_id][task]["significant"]
595
  else:
596
- match_task_result_details = dict.fromkeys(["significant", "corrected_p_value", "p_value"]) # order has impact to sorting DataFrame
597
- match_task_result_details.update(copy.deepcopy(tournament_results[submission_id][competitor_id][task]))
598
- match_task_result_details["significant"] = str(match_task_result_details["significant"]).lower() # originaly bool
599
- match_task_result_significant = match_task_result_details["significant"]
600
  match_task_result_details = "\n".join(f"{k}: {v}" for k, v in match_task_result_details.items())
601
  match_results[task] = f'<abbr title={xmlQuoteAttr(match_task_result_details)}>{match_task_result_significant}</abbr>'
602
 
@@ -654,7 +663,10 @@ class LeaderboardServer:
654
 
655
  return True
656
 
657
- def _correct_significance_in_tournament_results(self, tournament_results, alpha=0.05):
 
 
 
658
  tournament_results = copy.deepcopy(tournament_results)
659
 
660
  if not self._is_correct_significance_in_tournament_results(tournament_results):
@@ -665,7 +677,7 @@ class LeaderboardServer:
665
  corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
666
  for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
667
  tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
668
- tournament_results[submission_id][competitor_id][task]["significant"] = bool(task_pval < alpha)
669
 
670
  return tournament_results
671
 
@@ -680,17 +692,19 @@ class LeaderboardServer:
680
  dataframe.to_csv(filepath, index=False)
681
  return filepath
682
 
683
- def get_leaderboard_scatter_plot(self, pre_submit=None, category=None):
684
  import numpy as np
685
  from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
686
 
 
 
687
  #m = self.TASKS_METADATA
688
  #tournament = self.tournament_results
689
  name_map = self.submission_id_to_model_title
690
 
691
  category = category if category else self.TASKS_CATEGORY_OVERALL
692
 
693
- csv_file_path = self.leaderboard_dataframes_csv[self.TASKS_CATEGORY_OVERALL]
694
  ldb_records = get_ldb_records(name_map, csv_file_path)
695
  categories = self.TASKS_CATEGORIES
696
  model_names = list(ldb_records.keys())
@@ -725,29 +739,32 @@ class LeaderboardServer:
725
 
726
  return fig
727
 
728
- def get_leaderboard_csv(self, pre_submit=None, category=None):
729
  if pre_submit == None:
730
  category = category if category else self.TASKS_CATEGORY_OVERALL
 
731
  with self.var_lock.ro:
732
- return self.leaderboard_dataframes_csv[category]
733
  else:
734
  return self._dataframe_to_csv(
735
- self._get_leaderboard(pre_submit=pre_submit, category=category, to_csv=True),
736
  f"Leaderboard - pre-submit - {category}.csv",
737
  )
738
 
739
- def get_leaderboard(self, pre_submit=None, category=None):
740
  if pre_submit == None:
741
  category = category if category else self.TASKS_CATEGORY_OVERALL
 
742
  with self.var_lock.ro:
743
- return copy.copy(self.leaderboard_dataframes[category])
744
  else:
745
- return self._get_leaderboard(pre_submit=pre_submit, category=category)
746
 
747
- def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
748
  with self.var_lock.ro:
749
  tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
750
  category = category if category else self.TASKS_CATEGORY_OVERALL
 
751
 
752
  if len(tournament_results) == 0:
753
  return pd.DataFrame(columns=['No submissions yet'])
@@ -779,8 +796,13 @@ class LeaderboardServer:
779
  num_of_wins = 0
780
  for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
781
  num_of_competitors += 1
782
- if tournament_results[submission_id][competitor_id][task]["significant"]:
 
 
 
 
783
  num_of_wins += 1
 
784
  task_score = num_of_wins / num_of_competitors * 100 if num_of_competitors > 0 else 100
785
  win_score.setdefault(task_category, []).append(task_score)
786
 
@@ -1061,7 +1083,7 @@ class LeaderboardServer:
1061
  print(f"Locked `submit_lock` for {submission_id = }")
1062
  print(info_msg)
1063
 
1064
- self.update_leaderboard()
1065
 
1066
  if HF_FAKE_TOURNAMENT:
1067
  tournament_results = self.fake_tournament(submission_id, file)
 
277
  self.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS = self._prepare_category_to_task_abbr_to_details()
278
  self.MAX_LENGTH_OF_MODEL_TITLE = 28
279
  self.DIR_DATAFRAMES_CSV = "./dataframes_csv"
280
+ self.DEFAULT_KIND_OF_P_VALUE = "p_value"
281
+ self.KINDS_OF_P_VALUE = ["p_value", "corrected_p_value"]
282
 
283
  self.var_lock = ReadWriteLock()
284
  self.submission_ids = set()
 
328
 
329
  categories = [self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS] + sorted(self.TASKS_CATEGORIES)
330
 
331
+ leaderboard_dataframes = {kind_of_p_value: {
332
+ category: self._get_leaderboard(category=category, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
333
  for category in categories
334
+ } for kind_of_p_value in self.KINDS_OF_P_VALUE}
335
 
336
  with self.var_lock.ro:
337
  submission_ids = self.submission_ids
338
 
339
+ tournament_dataframes = {kind_of_p_value: {
340
  submission_id: {
341
+ category: self._get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
342
  for category in categories
343
  }
344
  for submission_id in submission_ids
345
+ } for kind_of_p_value in self.KINDS_OF_P_VALUE}
346
 
347
  with self.var_lock.rw:
348
  self.leaderboard_dataframes = leaderboard_dataframes
349
  self.tournament_dataframes = tournament_dataframes
350
 
351
+ leaderboard_dataframes_csv = {kind_of_p_value: {
352
  category: self._dataframe_to_csv(
353
+ self._get_leaderboard(category=category, to_csv=True, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
354
+ f"Leaderboard - {category}{' - FDR guarantees' if kind_of_p_value != self.DEFAULT_KIND_OF_P_VALUE else ''}.csv"
355
  )
356
  for category in categories
357
+ } for kind_of_p_value in self.KINDS_OF_P_VALUE}
358
 
359
  with self.var_lock.ro:
360
+ tournament_dataframes_csv = {kind_of_p_value: {
361
  submission_id: {
362
  category: self._dataframe_to_csv(
363
+ self._get_model_tournament_table(submission_id, category, to_csv=True, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
364
+ f"Tournament table - {self.submission_id_to_data[submission_id]['submission_metadata']['model_name'][:self.MAX_LENGTH_OF_MODEL_TITLE].replace('/', '_')} - {category}{' - FDR guarantees' if kind_of_p_value != self.DEFAULT_KIND_OF_P_VALUE else ''}.csv",
365
  )
366
  for category in categories
367
  }
368
  for submission_id in submission_ids
369
+ } for kind_of_p_value in self.KINDS_OF_P_VALUE}
370
 
371
  with self.var_lock.rw:
372
  self.leaderboard_dataframes_csv = leaderboard_dataframes_csv
 
556
  df_css.loc[i, c] = ''
557
  return df_css
558
 
559
+ def get_model_tournament_table_csv(self, submission_id, category, pre_submit=None, kind_of_p_value=None):
560
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
561
+
562
  if pre_submit == None:
563
  with self.var_lock.ro:
564
+ return self.tournament_dataframes_csv[kind_of_p_value][submission_id][category]
565
  else:
566
  return self._dataframe_to_csv(
567
+ self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, to_csv=True, kind_of_p_value=kind_of_p_value),
568
  f"Tournament table - pre-submit - {category}.csv",
569
  )
570
 
571
+ def get_model_tournament_table(self, submission_id, category, pre_submit=None, kind_of_p_value=None):
572
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
573
+
574
  if pre_submit == None:
575
  with self.var_lock.ro:
576
+ return copy.copy(self.tournament_dataframes[kind_of_p_value][submission_id][category])
577
  else:
578
+ return self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, kind_of_p_value=kind_of_p_value)
579
 
580
+ def _get_model_tournament_table(self, submission_id, category, pre_submit=None, to_csv=False, kind_of_p_value=None):
581
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
582
+
583
  model_tournament_table = []
584
 
585
  with self.var_lock.ro:
586
  tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
587
 
588
+ for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
589
  if competitor_id not in self.submission_id_to_data:
590
  if pre_submit and competitor_id == pre_submit.submission_id:
591
  data = pre_submit.data
 
598
  for task in self.TASKS_METADATA:
599
  task_category = self.TASKS_METADATA[task]["category"]
600
  if category in (task_category, self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS):
601
+ match_task_result_details = dict.fromkeys(["significant", "corrected_p_value", "p_value"]) # order has impact to sorting DataFrame
602
+ match_task_result_details.update(copy.deepcopy(tournament_results[submission_id][competitor_id][task]))
603
+ match_task_result_significant = self._is_task_pval_significant(match_task_result_details[kind_of_p_value])
604
+
605
  if to_csv:
606
+ match_results[task] = match_task_result_significant
607
  else:
608
+ match_task_result_details["significant"] = str(match_task_result_significant).lower() # originaly bool
 
 
 
609
  match_task_result_details = "\n".join(f"{k}: {v}" for k, v in match_task_result_details.items())
610
  match_results[task] = f'<abbr title={xmlQuoteAttr(match_task_result_details)}>{match_task_result_significant}</abbr>'
611
 
 
663
 
664
  return True
665
 
666
+ def _is_task_pval_significant(self, task_pval, alpha=0.05):
667
+ return bool(task_pval < alpha)
668
+
669
+ def _correct_significance_in_tournament_results(self, tournament_results):
670
  tournament_results = copy.deepcopy(tournament_results)
671
 
672
  if not self._is_correct_significance_in_tournament_results(tournament_results):
 
677
  corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
678
  for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
679
  tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
680
+ tournament_results[submission_id][competitor_id][task]["significant"] = self._is_task_pval_significant(task_pval)
681
 
682
  return tournament_results
683
 
 
692
  dataframe.to_csv(filepath, index=False)
693
  return filepath
694
 
695
+ def get_leaderboard_scatter_plot(self, pre_submit=None, category=None, kind_of_p_value=None):
696
  import numpy as np
697
  from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
698
 
699
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
700
+
701
  #m = self.TASKS_METADATA
702
  #tournament = self.tournament_results
703
  name_map = self.submission_id_to_model_title
704
 
705
  category = category if category else self.TASKS_CATEGORY_OVERALL
706
 
707
+ csv_file_path = self.leaderboard_dataframes_csv[kind_of_p_value][self.TASKS_CATEGORY_OVERALL]
708
  ldb_records = get_ldb_records(name_map, csv_file_path)
709
  categories = self.TASKS_CATEGORIES
710
  model_names = list(ldb_records.keys())
 
739
 
740
  return fig
741
 
742
+ def get_leaderboard_csv(self, pre_submit=None, category=None, kind_of_p_value=None):
743
  if pre_submit == None:
744
  category = category if category else self.TASKS_CATEGORY_OVERALL
745
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
746
  with self.var_lock.ro:
747
+ return self.leaderboard_dataframes_csv[kind_of_p_value][category]
748
  else:
749
  return self._dataframe_to_csv(
750
+ self._get_leaderboard(pre_submit=pre_submit, category=category, to_csv=True, kind_of_p_value=kind_of_p_value),
751
  f"Leaderboard - pre-submit - {category}.csv",
752
  )
753
 
754
+ def get_leaderboard(self, pre_submit=None, category=None, kind_of_p_value=None):
755
  if pre_submit == None:
756
  category = category if category else self.TASKS_CATEGORY_OVERALL
757
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
758
  with self.var_lock.ro:
759
+ return copy.copy(self.leaderboard_dataframes[kind_of_p_value][category])
760
  else:
761
+ return self._get_leaderboard(pre_submit=pre_submit, category=category, kind_of_p_value=kind_of_p_value)
762
 
763
+ def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False, kind_of_p_value=None):
764
  with self.var_lock.ro:
765
  tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
766
  category = category if category else self.TASKS_CATEGORY_OVERALL
767
+ kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
768
 
769
  if len(tournament_results) == 0:
770
  return pd.DataFrame(columns=['No submissions yet'])
 
796
  num_of_wins = 0
797
  for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
798
  num_of_competitors += 1
799
+
800
+ match_task_result_details = tournament_results[submission_id][competitor_id][task]
801
+ match_task_result_significant = self._is_task_pval_significant(match_task_result_details[kind_of_p_value])
802
+
803
+ if match_task_result_significant:
804
  num_of_wins += 1
805
+
806
  task_score = num_of_wins / num_of_competitors * 100 if num_of_competitors > 0 else 100
807
  win_score.setdefault(task_category, []).append(task_score)
808
 
 
1083
  print(f"Locked `submit_lock` for {submission_id = }")
1084
  print(info_msg)
1085
 
1086
+ self.update_leaderboard() # TODO: Přidat komentář proč to tady je. Nemělo by to být pouze při `do_submit == True`?
1087
 
1088
  if HF_FAKE_TOURNAMENT:
1089
  tournament_results = self.fake_tournament(submission_id, file)