Corey Morris
commited on
Commit
•
a79afe8
1
Parent(s):
c823b6d
Added bar chart for abstract algebra data.
Browse files
app.py
CHANGED
@@ -2,6 +2,49 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import plotly.express as px
|
4 |
from result_data_processor import ResultDataProcessor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
data_provider = ResultDataProcessor()
|
7 |
|
@@ -113,6 +156,7 @@ def create_plot(df, arc_column, moral_column, models=None):
|
|
113 |
|
114 |
# Custom scatter plots
|
115 |
st.header('Custom scatter plots')
|
|
|
116 |
selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
|
117 |
selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=3)
|
118 |
|
@@ -123,9 +167,9 @@ else:
|
|
123 |
st.write("Please select different columns for the x and y axes.")
|
124 |
|
125 |
# end of custom scatter plots
|
|
|
|
|
126 |
|
127 |
-
st.header('Moral Scenarios Performance')
|
128 |
-
st.write("The dashed red line represents the random chance performance of 0.25")
|
129 |
|
130 |
fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_moral_scenarios')
|
131 |
st.plotly_chart(fig)
|
@@ -137,13 +181,16 @@ fig = px.histogram(filtered_data, x="MMLU_moral_scenarios", marginal="rug", hove
|
|
137 |
st.plotly_chart(fig)
|
138 |
|
139 |
st.header('Abstract Algebra Performance')
|
140 |
-
|
141 |
-
st.plotly_chart(fig)
|
142 |
|
143 |
-
fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_abstract_algebra')
|
144 |
-
st.plotly_chart(fig)
|
145 |
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
st.markdown("***Thank you to hugging face for running the evaluations and supplying the data as well as the original authors of the evaluations.***")
|
148 |
|
149 |
st.markdown("""
|
|
|
2 |
import pandas as pd
|
3 |
import plotly.express as px
|
4 |
from result_data_processor import ResultDataProcessor
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
def plot_top_n(df, target_column, n=10):
|
10 |
+
top_n = df.nlargest(n, target_column)
|
11 |
+
|
12 |
+
# Initialize the bar plot
|
13 |
+
fig, ax1 = plt.subplots(figsize=(10, 5))
|
14 |
+
|
15 |
+
# Set width for each bar and their positions
|
16 |
+
width = 0.28
|
17 |
+
ind = np.arange(len(top_n))
|
18 |
+
|
19 |
+
# Plot target_column and MMLU_average on the primary y-axis with adjusted positions
|
20 |
+
ax1.bar(ind - width, top_n[target_column], width=width, color='blue', label=target_column)
|
21 |
+
ax1.bar(ind, top_n['MMLU_average'], width=width, color='orange', label='MMLU_average')
|
22 |
+
|
23 |
+
# Set the primary y-axis labels and title
|
24 |
+
ax1.set_title(f'Top {n} performing models on {target_column}')
|
25 |
+
ax1.set_xlabel('Model')
|
26 |
+
ax1.set_ylabel('Score')
|
27 |
+
|
28 |
+
# Create a secondary y-axis for Parameters
|
29 |
+
ax2 = ax1.twinx()
|
30 |
+
|
31 |
+
# Plot Parameters as bars on the secondary y-axis with adjusted position
|
32 |
+
ax2.bar(ind + width, top_n['Parameters'], width=width, color='red', label='Parameters')
|
33 |
+
|
34 |
+
# Set the secondary y-axis labels
|
35 |
+
ax2.set_ylabel('Parameters', color='red')
|
36 |
+
ax2.tick_params(axis='y', labelcolor='red')
|
37 |
+
|
38 |
+
# Set the x-ticks and their labels
|
39 |
+
ax1.set_xticks(ind)
|
40 |
+
ax1.set_xticklabels(top_n.index, rotation=45, ha="right")
|
41 |
+
|
42 |
+
# Adjust the legend
|
43 |
+
fig.tight_layout()
|
44 |
+
fig.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
45 |
+
|
46 |
+
# Show the plot
|
47 |
+
st.pyplot(fig)
|
48 |
|
49 |
data_provider = ResultDataProcessor()
|
50 |
|
|
|
156 |
|
157 |
# Custom scatter plots
|
158 |
st.header('Custom scatter plots')
|
159 |
+
st.write("The dashed red line represents the random chance performance of 0.25")
|
160 |
selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
|
161 |
selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=3)
|
162 |
|
|
|
167 |
st.write("Please select different columns for the x and y axes.")
|
168 |
|
169 |
# end of custom scatter plots
|
170 |
+
st.markdown("## Notable findings and plots")
|
171 |
+
st.markdown("### Moral Scenarios Performance")
|
172 |
|
|
|
|
|
173 |
|
174 |
fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_moral_scenarios')
|
175 |
st.plotly_chart(fig)
|
|
|
181 |
st.plotly_chart(fig)
|
182 |
|
183 |
st.header('Abstract Algebra Performance')
|
184 |
+
st.write("Small models showed surprisingly strong performance on the abstract algebra task. A 6 Billion parameter model is tied for the best performance on this task and there are a number of other small models in the top 10.")
|
|
|
185 |
|
|
|
|
|
186 |
|
187 |
|
188 |
+
# Usage example:
|
189 |
+
plot_top_n(filtered_data, 'MMLU_abstract_algebra', 10)
|
190 |
+
|
191 |
+
fig = create_plot(filtered_data, 'Parameters', 'MMLU_abstract_algebra')
|
192 |
+
st.plotly_chart(fig)
|
193 |
+
|
194 |
st.markdown("***Thank you to hugging face for running the evaluations and supplying the data as well as the original authors of the evaluations.***")
|
195 |
|
196 |
st.markdown("""
|