Evan Frick commited on
Commit
4001fbf
1 Parent(s): 01f0e4f
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -43,13 +43,13 @@ def main():
43
  # Iterate over each model in the selected benchmark
44
  for model, metrics in benchmark_data.items():
45
 
 
 
46
  model = path_split(path_splitext(model)[0])[-1]
47
  # Flatten the metrics dictionary if there are nested metrics
48
  # For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
49
  # We'll aggregate these or allow the user to select subcategories as needed
50
  if isinstance(metrics, dict):
51
- # Check if metrics contain nested dictionaries
52
- nested_keys = list(metrics.keys())
53
  # If there are nested keys, we can allow the user to select a subcategory
54
  # For simplicity, let's assume we want to display all nested metrics concatenated
55
  flattened_metrics = {}
@@ -63,12 +63,14 @@ def main():
63
  flattened_metrics[subkey] = submetrics
64
  records.append({
65
  "Model": model,
 
66
  **flattened_metrics
67
  })
68
  else:
69
  # If metrics are not nested, just add them directly
70
  records.append({
71
  "Model": model,
 
72
  "Value": metrics
73
  })
74
 
@@ -79,23 +81,27 @@ def main():
79
  df = df.loc[:, ~df.apply(contains_list)]
80
 
81
  if "human" not in selected_benchmark:
82
- df = df[sorted(df.columns, key=str.lower)]
83
 
84
  # Set 'Model' as the index
85
- df.set_index("Model", inplace=True)
86
 
87
 
88
  # Create two columns: one for spacing and one for the search bar
89
- col1, col2, col3 = st.columns([1, 3, 1]) # Adjust the ratios as needed
90
  with col1:
91
- # **Column Search Functionality**
92
- # st.markdown("#### Filter Columns")
93
  column_search = st.text_input("", placeholder="Search metrics...", key="search")
94
- # column_search = st.text_input("Search for metrics (column names):", "")
 
 
 
 
 
95
 
96
  if column_search:
97
  # Filter columns that contain the search term (case-insensitive)
98
- filtered_columns = [col for col in df.columns if column_search.lower() in col.lower()]
99
  if filtered_columns:
100
  df_display = df[filtered_columns]
101
  else:
@@ -105,8 +111,19 @@ def main():
105
  # If no search term, display all columns
106
  df_display = df
107
 
 
 
 
 
 
 
 
 
 
 
108
  # Display the DataFrame
109
- st.dataframe(df_display.sort_values(df_display.columns[0], ascending=False) if len(df_display) else df_display, use_container_width=True)
 
110
 
111
  # Optional: Allow user to download the data as CSV
112
  csv = df_display.to_csv()
 
43
  # Iterate over each model in the selected benchmark
44
  for model, metrics in benchmark_data.items():
45
 
46
+ model_type = "LLM Judge" if model.endswith(".jsonl") else "Reward Model"
47
+
48
  model = path_split(path_splitext(model)[0])[-1]
49
  # Flatten the metrics dictionary if there are nested metrics
50
  # For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
51
  # We'll aggregate these or allow the user to select subcategories as needed
52
  if isinstance(metrics, dict):
 
 
53
  # If there are nested keys, we can allow the user to select a subcategory
54
  # For simplicity, let's assume we want to display all nested metrics concatenated
55
  flattened_metrics = {}
 
63
  flattened_metrics[subkey] = submetrics
64
  records.append({
65
  "Model": model,
66
+ "Type": model_type,
67
  **flattened_metrics
68
  })
69
  else:
70
  # If metrics are not nested, just add them directly
71
  records.append({
72
  "Model": model,
73
+ "Type": model_type,
74
  "Value": metrics
75
  })
76
 
 
81
  df = df.loc[:, ~df.apply(contains_list)]
82
 
83
  if "human" not in selected_benchmark:
84
+ df = df[sorted(df.columns, key=lambda s: s.lower() if s != "Type" else "A")]
85
 
86
  # Set 'Model' as the index
87
+ df.set_index(["Model"], inplace=True)
88
 
89
 
90
  # Create two columns: one for spacing and one for the search bar
91
+ col1, col2, col3 = st.columns([1, 1, 2]) # Adjust the ratios as needed
92
  with col1:
93
+
 
94
  column_search = st.text_input("", placeholder="Search metrics...", key="search")
95
+
96
+ with col2:
97
+
98
+ model_search = st.text_input("", placeholder="Filter Models (separate criteria with ,) ...", key="search2")
99
+
100
+ model_search_crit = model_search.replace(", ", "|").replace(",", "|")
101
 
102
  if column_search:
103
  # Filter columns that contain the search term (case-insensitive)
104
+ filtered_columns = ["Type"] + [col for col in df.columns if column_search.lower() in col.lower()]
105
  if filtered_columns:
106
  df_display = df[filtered_columns]
107
  else:
 
111
  # If no search term, display all columns
112
  df_display = df
113
 
114
+ if model_search:
115
+
116
+ df_display = df_display[df_display.index.str.contains(model_search_crit, case=False)]
117
+
118
+ if len(df_display) == 0:
119
+ st.warning("No models match your filter.")
120
+ df_display = pd.DataFrame() # Empty DataFrame
121
+
122
+
123
+
124
  # Display the DataFrame
125
+ st.dataframe(df_display.sort_values(df_display.columns[1], ascending=False).style.background_gradient(cmap='summer_r', axis=0)
126
+ if len(df_display) else df_display, use_container_width=True, height=500)
127
 
128
  # Optional: Allow user to download the data as CSV
129
  csv = df_display.to_csv()