Evan Frick
commited on
Commit
•
4001fbf
1
Parent(s):
01f0e4f
app.py
CHANGED
@@ -43,13 +43,13 @@ def main():
|
|
43 |
# Iterate over each model in the selected benchmark
|
44 |
for model, metrics in benchmark_data.items():
|
45 |
|
|
|
|
|
46 |
model = path_split(path_splitext(model)[0])[-1]
|
47 |
# Flatten the metrics dictionary if there are nested metrics
|
48 |
# For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
|
49 |
# We'll aggregate these or allow the user to select subcategories as needed
|
50 |
if isinstance(metrics, dict):
|
51 |
-
# Check if metrics contain nested dictionaries
|
52 |
-
nested_keys = list(metrics.keys())
|
53 |
# If there are nested keys, we can allow the user to select a subcategory
|
54 |
# For simplicity, let's assume we want to display all nested metrics concatenated
|
55 |
flattened_metrics = {}
|
@@ -63,12 +63,14 @@ def main():
|
|
63 |
flattened_metrics[subkey] = submetrics
|
64 |
records.append({
|
65 |
"Model": model,
|
|
|
66 |
**flattened_metrics
|
67 |
})
|
68 |
else:
|
69 |
# If metrics are not nested, just add them directly
|
70 |
records.append({
|
71 |
"Model": model,
|
|
|
72 |
"Value": metrics
|
73 |
})
|
74 |
|
@@ -79,23 +81,27 @@ def main():
|
|
79 |
df = df.loc[:, ~df.apply(contains_list)]
|
80 |
|
81 |
if "human" not in selected_benchmark:
|
82 |
-
df = df[sorted(df.columns, key=
|
83 |
|
84 |
# Set 'Model' as the index
|
85 |
-
df.set_index("Model", inplace=True)
|
86 |
|
87 |
|
88 |
# Create two columns: one for spacing and one for the search bar
|
89 |
-
col1, col2, col3 = st.columns([1,
|
90 |
with col1:
|
91 |
-
|
92 |
-
# st.markdown("#### Filter Columns")
|
93 |
column_search = st.text_input("", placeholder="Search metrics...", key="search")
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
if column_search:
|
97 |
# Filter columns that contain the search term (case-insensitive)
|
98 |
-
filtered_columns = [col for col in df.columns if column_search.lower() in col.lower()]
|
99 |
if filtered_columns:
|
100 |
df_display = df[filtered_columns]
|
101 |
else:
|
@@ -105,8 +111,19 @@ def main():
|
|
105 |
# If no search term, display all columns
|
106 |
df_display = df
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
# Display the DataFrame
|
109 |
-
st.dataframe(df_display.sort_values(df_display.columns[
|
|
|
110 |
|
111 |
# Optional: Allow user to download the data as CSV
|
112 |
csv = df_display.to_csv()
|
|
|
43 |
# Iterate over each model in the selected benchmark
|
44 |
for model, metrics in benchmark_data.items():
|
45 |
|
46 |
+
model_type = "LLM Judge" if model.endswith(".jsonl") else "Reward Model"
|
47 |
+
|
48 |
model = path_split(path_splitext(model)[0])[-1]
|
49 |
# Flatten the metrics dictionary if there are nested metrics
|
50 |
# For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
|
51 |
# We'll aggregate these or allow the user to select subcategories as needed
|
52 |
if isinstance(metrics, dict):
|
|
|
|
|
53 |
# If there are nested keys, we can allow the user to select a subcategory
|
54 |
# For simplicity, let's assume we want to display all nested metrics concatenated
|
55 |
flattened_metrics = {}
|
|
|
63 |
flattened_metrics[subkey] = submetrics
|
64 |
records.append({
|
65 |
"Model": model,
|
66 |
+
"Type": model_type,
|
67 |
**flattened_metrics
|
68 |
})
|
69 |
else:
|
70 |
# If metrics are not nested, just add them directly
|
71 |
records.append({
|
72 |
"Model": model,
|
73 |
+
"Type": model_type,
|
74 |
"Value": metrics
|
75 |
})
|
76 |
|
|
|
81 |
df = df.loc[:, ~df.apply(contains_list)]
|
82 |
|
83 |
if "human" not in selected_benchmark:
|
84 |
+
df = df[sorted(df.columns, key=lambda s: s.lower() if s != "Type" else "A")]
|
85 |
|
86 |
# Set 'Model' as the index
|
87 |
+
df.set_index(["Model"], inplace=True)
|
88 |
|
89 |
|
90 |
# Create two columns: one for spacing and one for the search bar
|
91 |
+
col1, col2, col3 = st.columns([1, 1, 2]) # Adjust the ratios as needed
|
92 |
with col1:
|
93 |
+
|
|
|
94 |
column_search = st.text_input("", placeholder="Search metrics...", key="search")
|
95 |
+
|
96 |
+
with col2:
|
97 |
+
|
98 |
+
model_search = st.text_input("", placeholder="Filter Models (separate criteria with ,) ...", key="search2")
|
99 |
+
|
100 |
+
model_search_crit = model_search.replace(", ", "|").replace(",", "|")
|
101 |
|
102 |
if column_search:
|
103 |
# Filter columns that contain the search term (case-insensitive)
|
104 |
+
filtered_columns = ["Type"] + [col for col in df.columns if column_search.lower() in col.lower()]
|
105 |
if filtered_columns:
|
106 |
df_display = df[filtered_columns]
|
107 |
else:
|
|
|
111 |
# If no search term, display all columns
|
112 |
df_display = df
|
113 |
|
114 |
+
if model_search:
|
115 |
+
|
116 |
+
df_display = df_display[df_display.index.str.contains(model_search_crit, case=False)]
|
117 |
+
|
118 |
+
if len(df_display) == 0:
|
119 |
+
st.warning("No models match your filter.")
|
120 |
+
df_display = pd.DataFrame() # Empty DataFrame
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
# Display the DataFrame
|
125 |
+
st.dataframe(df_display.sort_values(df_display.columns[1], ascending=False).style.background_gradient(cmap='summer_r', axis=0)
|
126 |
+
if len(df_display) else df_display, use_container_width=True, height=500)
|
127 |
|
128 |
# Optional: Allow user to download the data as CSV
|
129 |
csv = df_display.to_csv()
|