kaikaidai commited on
Commit
ced5a34
·
verified ·
1 Parent(s): 8863707

Leaderboard, decimal places

Browse files
Files changed (1) hide show
  1. app.py +48 -29
app.py CHANGED
@@ -28,11 +28,9 @@ from common import (
28
  )
29
  from example_metrics import EXAMPLE_METRICS
30
 
31
- import hashlib
32
-
33
 
34
  # Model and ELO score data
35
- DEFAULT_ELO = 1500 # Starting ELO for new models
36
  K_FACTOR = 32 # Standard chess K-factor, adjust as needed
37
  elo_scores = defaultdict(lambda: DEFAULT_ELO)
38
  vote_counts = defaultdict(int)
@@ -210,7 +208,7 @@ def get_current_votes():
210
  return get_votes(db)
211
 
212
 
213
- def get_leaderboard():
214
  """Generate leaderboard data using fresh votes from MongoDB."""
215
  # Get fresh voting data
216
  voting_data = get_current_votes()
@@ -263,12 +261,16 @@ def get_leaderboard():
263
  leaderboard = []
264
  for model in model_data.keys():
265
  votes = matches[model]
 
 
 
 
266
  elo = ratings[model]
267
  ci = 1.96 * (400 / (votes + 1) ** 0.5) if votes > 0 else 0
268
  data = {
269
  "Model": model,
270
- "ELO Score": f"{elo:.2f}",
271
- "95% CI": f"±{ci:.2f}",
272
  "# Votes": votes,
273
  "Organization": model_data[model]["organization"],
274
  "License": model_data[model]["license"],
@@ -532,12 +534,52 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
532
  gr.Markdown(ACKNOWLEDGEMENTS)
533
 
534
  with gr.TabItem("Leaderboard"):
 
 
 
 
 
 
 
 
535
  stats_display = gr.Markdown()
536
  leaderboard_table = gr.Dataframe(
537
  headers=["Model", "ELO", "95% CI", "Matches", "Organization", "License"],
538
  datatype=["str", "number", "str", "number", "str", "str", "str"],
539
  )
540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  with gr.TabItem("Policy"):
542
  gr.Markdown(POLICY_CONTENT)
543
 
@@ -758,29 +800,6 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
758
  outputs=[send_btn, regenerate_button],
759
  )
760
 
761
- # Update the leaderboard
762
- def refresh_leaderboard():
763
- """Refresh the leaderboard data and stats."""
764
- leaderboard = get_leaderboard()
765
- data = [
766
- [
767
- entry["Model"],
768
- float(entry["ELO Score"]),
769
- entry["95% CI"],
770
- entry["# Votes"],
771
- entry["Organization"],
772
- entry["License"],
773
- ]
774
- for entry in leaderboard
775
- ]
776
- stats = get_leaderboard_stats()
777
- return [gr.update(value=data), gr.update(value=stats)]
778
-
779
- # Add the load event at the very end, just before demo.launch()
780
- demo.load(
781
- fn=refresh_leaderboard, inputs=None, outputs=[leaderboard_table, stats_display]
782
- )
783
-
784
  # Add click handlers for metric buttons
785
  outputs_list = [eval_prompt] + [var_input for _, var_input in variable_rows]
786
 
 
28
  )
29
  from example_metrics import EXAMPLE_METRICS
30
 
 
 
31
 
32
  # Model and ELO score data
33
+ DEFAULT_ELO = 1200 # Starting ELO for new models
34
  K_FACTOR = 32 # Standard chess K-factor, adjust as needed
35
  elo_scores = defaultdict(lambda: DEFAULT_ELO)
36
  vote_counts = defaultdict(int)
 
208
  return get_votes(db)
209
 
210
 
211
+ def get_leaderboard(show_preliminary=True):
212
  """Generate leaderboard data using fresh votes from MongoDB."""
213
  # Get fresh voting data
214
  voting_data = get_current_votes()
 
261
  leaderboard = []
262
  for model in model_data.keys():
263
  votes = matches[model]
264
+ # Skip models with < 500 votes if show_preliminary is False
265
+ if not show_preliminary and votes < 500:
266
+ continue
267
+
268
  elo = ratings[model]
269
  ci = 1.96 * (400 / (votes + 1) ** 0.5) if votes > 0 else 0
270
  data = {
271
  "Model": model,
272
+ "ELO Score": f"{int(elo)}",
273
+ "95% CI": f"±{int(ci)}",
274
  "# Votes": votes,
275
  "Organization": model_data[model]["organization"],
276
  "License": model_data[model]["license"],
 
534
  gr.Markdown(ACKNOWLEDGEMENTS)
535
 
536
  with gr.TabItem("Leaderboard"):
537
+ with gr.Row():
538
+ with gr.Column(scale=1):
539
+ show_preliminary = gr.Checkbox(
540
+ label="Reveal preliminary results",
541
+ value=True, # Checked by default
542
+ info="Show all models, including models with less few human ratings (< 500 votes)",
543
+ interactive=True
544
+ )
545
  stats_display = gr.Markdown()
546
  leaderboard_table = gr.Dataframe(
547
  headers=["Model", "ELO", "95% CI", "Matches", "Organization", "License"],
548
  datatype=["str", "number", "str", "number", "str", "str", "str"],
549
  )
550
 
551
+ # Update refresh_leaderboard to use the checkbox value
552
+ def refresh_leaderboard(show_preliminary):
553
+ """Refresh the leaderboard data and stats."""
554
+ leaderboard = get_leaderboard(show_preliminary)
555
+ data = [
556
+ [
557
+ entry["Model"],
558
+ float(entry["ELO Score"]),
559
+ entry["95% CI"],
560
+ entry["# Votes"],
561
+ entry["Organization"],
562
+ entry["License"],
563
+ ]
564
+ for entry in leaderboard
565
+ ]
566
+ stats = get_leaderboard_stats()
567
+ return [gr.update(value=data), gr.update(value=stats)]
568
+
569
+ # Add change handler for checkbox
570
+ show_preliminary.change(
571
+ fn=refresh_leaderboard,
572
+ inputs=[show_preliminary],
573
+ outputs=[leaderboard_table, stats_display]
574
+ )
575
+
576
+ # Update the load event
577
+ demo.load(
578
+ fn=refresh_leaderboard,
579
+ inputs=[show_preliminary],
580
+ outputs=[leaderboard_table, stats_display]
581
+ )
582
+
583
  with gr.TabItem("Policy"):
584
  gr.Markdown(POLICY_CONTENT)
585
 
 
800
  outputs=[send_btn, regenerate_button],
801
  )
802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  # Add click handlers for metric buttons
804
  outputs_list = [eval_prompt] + [var_input for _, var_input in variable_rows]
805