Ludwig Stumpp commited on
Commit
8658420
β€’
1 Parent(s): 9d7638e

Add filtering by values

Browse files
Files changed (1) hide show
  1. streamlit_app.py +74 -8
streamlit_app.py CHANGED
@@ -1,8 +1,10 @@
1
  import io
2
  import re
 
3
 
4
  import pandas as pd
5
  import streamlit as st
 
6
 
7
  GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
8
 
@@ -86,10 +88,11 @@ def remove_markdown_links(text: str) -> str:
86
  return text
87
 
88
 
89
- def filter_dataframe(df: pd.DataFrame, ignore_columns: list[str] | None = None) -> pd.DataFrame:
90
  """
91
- Adds a UI on top of a dataframe to let viewers filter columns
92
 
 
93
  Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
94
 
95
  Args:
@@ -99,11 +102,6 @@ def filter_dataframe(df: pd.DataFrame, ignore_columns: list[str] | None = None)
99
  Returns:
100
  pd.DataFrame: Filtered dataframe
101
  """
102
- modify = st.checkbox("Add filters")
103
-
104
- if not modify:
105
- return df
106
-
107
  df = df.copy()
108
 
109
  if ignore_columns is None:
@@ -123,6 +121,67 @@ def filter_dataframe(df: pd.DataFrame, ignore_columns: list[str] | None = None)
123
  return df
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def setup_basic():
127
  title = "πŸ† LLM-Leaderboard"
128
 
@@ -146,7 +205,14 @@ def setup_leaderboard(readme: str):
146
  df_leaderboard["Commercial Use?"] = df_leaderboard["Commercial Use?"].map({"yes": 1, "no": 0}).astype(bool)
147
 
148
  st.markdown("## Leaderboard")
149
- st.dataframe(filter_dataframe(df_leaderboard, ignore_columns=["Commercial Use?", "Publisher"]))
 
 
 
 
 
 
 
150
 
151
 
152
  def setup_benchmarks(readme: str):
 
1
  import io
2
  import re
3
+ from collections.abc import Iterable
4
 
5
  import pandas as pd
6
  import streamlit as st
7
+ from pandas.api.types import is_datetime64_any_dtype, is_numeric_dtype
8
 
9
  GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
10
 
 
88
  return text
89
 
90
 
91
+ def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[str] | None = None) -> pd.DataFrame:
92
  """
93
+ Filter dataframe by the rows and columns to display.
94
 
95
+ This does not select based on the values in the dataframe, but rather on the index and columns.
96
  Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
97
 
98
  Args:
 
102
  Returns:
103
  pd.DataFrame: Filtered dataframe
104
  """
 
 
 
 
 
105
  df = df.copy()
106
 
107
  if ignore_columns is None:
 
121
  return df
122
 
123
 
124
+ def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
125
+ """
126
+ Filter dataframe by the values in the dataframe.
127
+
128
+ Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
129
+
130
+ Args:
131
+ df (pd.DataFrame): Original dataframe
132
+
133
+ Returns:
134
+ pd.DataFrame: Filtered dataframe
135
+ """
136
+ df = df.copy()
137
+
138
+ modification_container = st.container()
139
+
140
+ with modification_container:
141
+ to_filter_columns = st.multiselect("Filter results on:", df.columns)
142
+ left, right = st.columns((1, 20))
143
+
144
+ for column in to_filter_columns:
145
+ if is_numeric_dtype(df[column]):
146
+ _min = float(df[column].min())
147
+ _max = float(df[column].max())
148
+
149
+ if (_min != _max) and pd.notna(_min) and pd.notna(_max):
150
+ step = 0.01
151
+ user_num_input = right.slider(
152
+ f"Values for {column}:",
153
+ min_value=round(_min - step, 2),
154
+ max_value=round(_max + step, 2),
155
+ value=(_min, _max),
156
+ step=step,
157
+ )
158
+ df = df[df[column].between(*user_num_input)]
159
+
160
+ elif is_datetime64_any_dtype(df[column]):
161
+ user_date_input = right.date_input(
162
+ f"Values for {column}:",
163
+ value=(
164
+ df[column].min(),
165
+ df[column].max(),
166
+ ),
167
+ )
168
+ if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
169
+ user_date_input_datetime = tuple(map(pd.to_datetime, user_date_input))
170
+ start_date, end_date = user_date_input_datetime
171
+ df = df.loc[df[column].between(start_date, end_date)]
172
+
173
+ else:
174
+ selected_values = right.multiselect(
175
+ f"Values for {column}:",
176
+ df[column].unique(),
177
+ )
178
+
179
+ if selected_values:
180
+ df = df[df[column].isin(selected_values)]
181
+
182
+ return df
183
+
184
+
185
  def setup_basic():
186
  title = "πŸ† LLM-Leaderboard"
187
 
 
205
  df_leaderboard["Commercial Use?"] = df_leaderboard["Commercial Use?"].map({"yes": 1, "no": 0}).astype(bool)
206
 
207
  st.markdown("## Leaderboard")
208
+ modify = st.checkbox("Add filters")
209
+ if modify:
210
+ df_leaderboard = filter_dataframe_by_row_and_columns(
211
+ df_leaderboard, ignore_columns=["Commercial Use?", "Publisher"]
212
+ )
213
+ df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
214
+
215
+ st.dataframe(df_leaderboard)
216
 
217
 
218
  def setup_benchmarks(readme: str):