idolezal commited on
Commit
a2b6394
·
1 Parent(s): 32a9aae

Try analyze winscore with bokeh

Browse files
Files changed (3) hide show
  1. analyze_winscore.py +181 -0
  2. app.py +4 -8
  3. server.py +37 -0
analyze_winscore.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import csv
4
+ import random
5
+ import numpy as np
6
+ from bokeh.plotting import figure
7
+ from bokeh.models import LabelSet, LogScale
8
+ from bokeh.palettes import Turbo256 # A color palette with enough colors
9
+ from bokeh.models import ColumnDataSource
10
+
11
+ # Function to fit a polynomial curve and return the x and y values of the fitted curve
12
+ def fit_curve(x, y, degree=1):
13
+ # Fit a polynomial of given degree
14
+ coeffs = np.polyfit(x, y, degree)
15
+ poly = np.poly1d(coeffs)
16
+ x_fit = np.linspace(min(x), max(x), 100)
17
+ y_fit = poly(x_fit)
18
+ return x_fit, y_fit
19
+
20
+ # Function to detect and remove outliers using the IQR method
21
+ def remove_outliers(x, y):
22
+ x = np.array(x)
23
+ y = np.array(y)
24
+
25
+ # Calculate Q1 (25th percentile) and Q3 (75th percentile)
26
+ Q1_x, Q3_x = np.percentile(x, [25, 75])
27
+ Q1_y, Q3_y = np.percentile(y, [25, 75])
28
+
29
+ IQR_x = Q3_x - Q1_x
30
+ IQR_y = Q3_y - Q1_y
31
+
32
+ # Define bounds for outliers
33
+ lower_bound_x = Q1_x - 1.5 * IQR_x
34
+ upper_bound_x = Q3_x + 1.5 * IQR_x
35
+ lower_bound_y = Q1_y - 1.5 * IQR_y
36
+ upper_bound_y = Q3_y + 1.5 * IQR_y
37
+
38
+ # Filter out outliers
39
+ mask_x = (x >= lower_bound_x) & (x <= upper_bound_x)
40
+ mask_y = (y >= lower_bound_y) & (y <= upper_bound_y)
41
+ mask = mask_x & mask_y
42
+
43
+ return x[mask], y[mask], x[~mask], y[~mask]
44
+
45
+ def get_ldb_records(name_map, csv_file_path):
46
+ model_mapping = {model_title: model_title for model_title in name_map.values()}
47
+
48
+ ldb_records={}
49
+ with open(csv_file_path, mode='r') as file:
50
+ reader = csv.DictReader(file)
51
+ for row in reader:
52
+ if row['Model'].startswith("Qwen/Qwen2.5"):
53
+ continue
54
+ sanitized_name = model_mapping[row['Model']]
55
+ ldb_records[sanitized_name] = row
56
+
57
+ return ldb_records
58
+
59
+ def create_scatter_plot_with_curve_with_variances_named(category, variance_across_categories, x, y, sizes, model_names, ldb_records):
60
+ FONTSIZE = 10
61
+
62
+ # Remove outliers
63
+ x_filtered, y_filtered, x_outliers, y_outliers = remove_outliers(x, y)
64
+
65
+ # Scale the variance to a range suitable for marker sizes (e.g., between 5 and 30)
66
+ min_marker_size = 5
67
+ max_marker_size = 30
68
+
69
+ def scale_variance_to_size(variance):
70
+ # Scale variance to marker size (linear mapping)
71
+ return min_marker_size + (variance - min(variance_across_categories.values())) * (max_marker_size - min_marker_size) / (max(variance_across_categories.values()) - min(variance_across_categories.values()))
72
+
73
+ # Function to get the variance for a given model name
74
+ def get_variance_for_model(model_name):
75
+ print(model_name)
76
+ return variance_across_categories.get(model_name, 0) # Default to 0 if model not found
77
+
78
+ # Get marker sizes and variances for the filtered data
79
+ filtered_variances = [get_variance_for_model(mname) for mname in np.array(model_names)[np.in1d(x, x_filtered)]]
80
+ marker_sizes_filtered = [scale_variance_to_size(var) for var in filtered_variances]
81
+
82
+ # Get marker sizes and variances for the outlier data
83
+ outlier_variances = [get_variance_for_model(mname) for mname in np.array(model_names)[np.in1d(x, x_outliers)]]
84
+ marker_sizes_outliers = [scale_variance_to_size(var) for var in outlier_variances]
85
+
86
+ # Randomly assign symbols to the filtered data points
87
+ filtered_symbols = ['circle' if ldb_records[mname]['Type'] == 'chat' else 'triangle' for mname in np.array(model_names)[np.in1d(x, x_filtered)]]
88
+
89
+ # Randomly assign symbols to the outlier data points
90
+ outlier_symbols = ['circle' if ldb_records[mname]['Type'] == 'chat' else 'triangle' for mname in np.array(model_names)[np.in1d(x, x_outliers)]]
91
+
92
+ # Define a color palette with enough colors
93
+ stride = len(Turbo256) // len(model_names)
94
+ color_palette = list(Turbo256[::stride]) # Adjust this palette size based on the number of data points
95
+ random.shuffle(color_palette)
96
+
97
+ # Create unique colors for filtered data
98
+ filtered_colors = [color_palette[i % len(color_palette)] for i in range(len(x_filtered))]
99
+
100
+ # Create unique colors for outliers
101
+ outlier_colors = [color_palette[(i + len(x_filtered)) % len(color_palette)] for i in range(len(x_outliers))]
102
+
103
+ # Create ColumnDataSource with filtered data
104
+ source_filtered = ColumnDataSource(data={
105
+ 'x': x_filtered,
106
+ 'y': y_filtered,
107
+ 'sizes': np.array(sizes)[np.in1d(x, x_filtered)], # Keep original model sizes
108
+ 'marker_sizes': marker_sizes_filtered, # New field for marker sizes based on variance
109
+ 'model_names': np.array(model_names)[np.in1d(x, x_filtered)],
110
+ 'variance': filtered_variances, # New field for variance
111
+ 'color': filtered_colors,
112
+ 'symbol': filtered_symbols
113
+ })
114
+
115
+ # Create ColumnDataSource with outlier data
116
+ source_outliers = ColumnDataSource(data={
117
+ 'x': x_outliers,
118
+ 'y': y_outliers,
119
+ 'sizes': np.array(sizes)[np.in1d(x, x_outliers)], # Keep original model sizes
120
+ 'marker_sizes': marker_sizes_outliers, # New field for marker sizes based on variance
121
+ 'model_names': np.array(model_names)[np.in1d(x, x_outliers)],
122
+ 'variance': outlier_variances, # New field for variance
123
+ 'color': outlier_colors,
124
+ 'symbol': outlier_symbols
125
+ })
126
+
127
+ # Create a figure for the category
128
+ p = figure(#width=900, height=800, #title=f"{category} vs Model Size vs Variance Across Categories",
129
+ #tools="pan,wheel_zoom,box_zoom,reset,save",
130
+ tooltips=[("Model", "@model_names"),
131
+ ("Model Size (B parameters)", "@sizes"),
132
+ ("Variance", "@variance"), # Added variance to the tooltip
133
+ ("Performance", "@y")])
134
+
135
+ # Plot filtered data with unique colors and scaled marker sizes
136
+ p.scatter('x', 'y', size='marker_sizes', source=source_filtered, fill_alpha=0.6, color='color', marker='symbol')
137
+
138
+ # Plot outliers with unique colors and scaled marker sizes
139
+ p.scatter('x', 'y', size='marker_sizes', source=source_outliers, fill_alpha=0.6, color='color', marker='symbol')
140
+
141
+ # Fit and plot a curve
142
+ x_fit, y_fit = fit_curve(x_filtered, y_filtered, degree=1) # You can adjust the degree of the polynomial
143
+
144
+
145
+ p.line(x_fit, y_fit, line_color='gray', line_width=2, line_dash='dashed')
146
+
147
+ # Add labels (with slight offset to avoid overlap)
148
+ p.add_layout(LabelSet(x='x', y='y', text='model_names', source=source_filtered,
149
+ x_offset=5, y_offset=8, text_font_size=f"{FONTSIZE-4}pt", text_color='black'))
150
+
151
+ p.add_layout(LabelSet(x='x', y='y', text='model_names', source=source_outliers,
152
+ x_offset=5, y_offset=8, text_font_size=f"{FONTSIZE-4}pt", text_color='black'))
153
+
154
+
155
+ # Set axis labels
156
+ p.xaxis.axis_label = 'Model Size (B parameters)'
157
+ p.yaxis.axis_label = f'{category}'
158
+
159
+ # Set axis label font sizes
160
+ p.xaxis.axis_label_text_font_size = f"{FONTSIZE}pt" # Set font size for x-axis label
161
+ p.yaxis.axis_label_text_font_size = f"{FONTSIZE}pt" # Set font size for y-axis label
162
+
163
+
164
+ # Increase tick label font sizes
165
+ p.xaxis.major_label_text_font_size = f"{FONTSIZE}pt" # Increase x-axis tick label size
166
+ p.yaxis.major_label_text_font_size = f"{FONTSIZE}pt" # Increase y-axis tick label size
167
+
168
+ #p.x_range.start = 1
169
+ #p.x_range.end = 18
170
+
171
+ #p.y_range.end = 60
172
+
173
+ p.x_scale = LogScale()
174
+
175
+ p.xaxis.ticker = [1,2,4,7,12,15]
176
+ p.xaxis.axis_label_text_font_style = "normal"
177
+ p.yaxis.axis_label_text_font_style = "normal"
178
+
179
+ return p
180
+
181
+ # EOF
app.py CHANGED
@@ -6,8 +6,6 @@ import gradio as gr
6
  from gradio.themes.utils.sizes import text_md
7
  from gradio_modal import Modal
8
 
9
- from bokeh.plotting import figure
10
-
11
  from content import (
12
  HEADER_MARKDOWN,
13
  LEADERBOARD_TAB_TITLE_MARKDOWN,
@@ -628,12 +626,10 @@ def gradio_app():
628
  gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
629
 
630
  with gr.Row():
631
- x = list(range(11))
632
- y0 = x
633
- y1 = [10 - i for i in x]
634
- fig = figure(width=250, height=250, title='Plot1')
635
- fig.circle(x, y0, size=10, color="navy", alpha=0.5)
636
- p1 = gr.Plot(value=fig, label='Plot 1')
637
 
638
  with gr.Row():
639
  leaderboard_category_of_tasks = gr.Dropdown(
 
6
  from gradio.themes.utils.sizes import text_md
7
  from gradio_modal import Modal
8
 
 
 
9
  from content import (
10
  HEADER_MARKDOWN,
11
  LEADERBOARD_TAB_TITLE_MARKDOWN,
 
626
  gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
627
 
628
  with gr.Row():
629
+ gr.Plot(
630
+ value=leaderboard_server.get_bokeh_figure(),
631
+ label='Foo',
632
+ )
 
 
633
 
634
  with gr.Row():
635
  leaderboard_category_of_tasks = gr.Dropdown(
server.py CHANGED
@@ -622,6 +622,43 @@ class LeaderboardServer:
622
  dataframe.to_csv(filepath, index=False)
623
  return filepath
624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  def get_leaderboard_csv(self, pre_submit=None, category=None):
626
  if pre_submit == None:
627
  category = category if category else self.TASKS_CATEGORY_OVERALL
 
622
  dataframe.to_csv(filepath, index=False)
623
  return filepath
624
 
625
+ def get_bokeh_figure(self):
626
+ import numpy as np
627
+ from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
628
+
629
+ #m = self.TASKS_METADATA
630
+ #tournament = self.tournament_results
631
+ name_map = self.submission_id_to_model_title
632
+
633
+ category = self.TASKS_CATEGORY_OVERALL
634
+ csv_file_path = self.leaderboard_dataframes_csv[category]
635
+ ldb_records = get_ldb_records(name_map, csv_file_path)
636
+ categories = self.TASKS_CATEGORIES
637
+ model_names = list(ldb_records.keys())
638
+ sizes = [float(ldb_records[model]['# θ (B)']) for model in model_names]
639
+ average_performance = [float(ldb_records[model]['Average ⬆️']) for model in model_names]
640
+
641
+ variances={}
642
+ for model, record in ldb_records.items():
643
+ r = [float(record[cat]) for cat in categories]
644
+ variances[model] = np.var(r)
645
+
646
+ print(variances)
647
+ print(min(variances.values()))
648
+ variance_across_categories = variances
649
+
650
+ fig = create_scatter_plot_with_curve_with_variances_named(
651
+ 'Overall Duel Win Score',
652
+ variance_across_categories,
653
+ sizes,
654
+ average_performance,
655
+ sizes,
656
+ model_names,
657
+ ldb_records,
658
+ )
659
+
660
+ return fig
661
+
662
  def get_leaderboard_csv(self, pre_submit=None, category=None):
663
  if pre_submit == None:
664
  category = category if category else self.TASKS_CATEGORY_OVERALL