Evan Frick commited on
Commit
631e505
1 Parent(s): 1c6662a
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ from os.path import split as path_split, splitext as path_splitext
5
+
6
+ st.set_page_config(
7
+ page_title="PPE Metrics Explorer",
8
+ layout="wide", # This makes the app use the entire screen width
9
+ initial_sidebar_state="expanded",
10
+ )
11
+
12
+ # Set the title of the app
13
+ st.title("PPE Metrics Explorer")
14
+
15
+ @st.cache_data
16
+ def load_data(file_path):
17
+ """
18
+ Load pickle data from a file.
19
+ """
20
+ with open(file_path, 'r') as file:
21
+ data = pickle.load(file)
22
+ return data
23
+
24
+ def contains_list(column):
25
+ return column.apply(lambda x: isinstance(x, list)).any()
26
+
27
+ def main():
28
+ # Load the pickle data
29
+ data = load_data('results.pkl')
30
+
31
+ # Extract the list of benchmarks
32
+ benchmarks = list(data.keys())
33
+
34
+ # Dropdown for selecting benchmark
35
+ selected_benchmark = st.selectbox("Select a Benchmark", benchmarks)
36
+
37
+ # Extract data for the selected benchmark
38
+ benchmark_data = data[selected_benchmark]
39
+
40
+ # Prepare a list to store records
41
+ records = []
42
+
43
+ # Iterate over each model in the selected benchmark
44
+ for model, metrics in benchmark_data.items():
45
+
46
+ model = path_split(path_splitext(model)[0])[-1]
47
+ # Flatten the metrics dictionary if there are nested metrics
48
+ # For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
49
+ # We'll aggregate these or allow the user to select subcategories as needed
50
+ if isinstance(metrics, dict):
51
+ # Check if metrics contain nested dictionaries
52
+ nested_keys = list(metrics.keys())
53
+ # If there are nested keys, we can allow the user to select a subcategory
54
+ # For simplicity, let's assume we want to display all nested metrics concatenated
55
+ flattened_metrics = {}
56
+ for subkey, submetrics in metrics.items():
57
+ if isinstance(submetrics, dict):
58
+ for metric_name, value in submetrics.items():
59
+ # Create a compound key
60
+ key = f"{subkey} - {metric_name}"
61
+ flattened_metrics[key] = value
62
+ else:
63
+ flattened_metrics[subkey] = submetrics
64
+ records.append({
65
+ "Model": model,
66
+ **flattened_metrics
67
+ })
68
+ else:
69
+ # If metrics are not nested, just add them directly
70
+ records.append({
71
+ "Model": model,
72
+ "Value": metrics
73
+ })
74
+
75
+ # Create a DataFrame
76
+ df = pd.DataFrame(records)
77
+
78
+ # Drop columns that contain lists
79
+ df = df.loc[:, ~df.apply(contains_list)]
80
+
81
+ if "human" not in selected_benchmark:
82
+ df = df[sorted(df.columns, key=str.lower)]
83
+
84
+ # Set 'Model' as the index
85
+ df.set_index("Model", inplace=True)
86
+
87
+
88
+ # Create two columns: one for spacing and one for the search bar
89
+ col1, col2, col3 = st.columns([1, 3, 1]) # Adjust the ratios as needed
90
+ with col1:
91
+ # **Column Search Functionality**
92
+ # st.markdown("#### Filter Columns")
93
+ column_search = st.text_input("", placeholder="Search metrics...", key="search")
94
+ # column_search = st.text_input("Search for metrics (column names):", "")
95
+
96
+ if column_search:
97
+ # Filter columns that contain the search term (case-insensitive)
98
+ filtered_columns = [col for col in df.columns if column_search.lower() in col.lower()]
99
+ if filtered_columns:
100
+ df_display = df[filtered_columns]
101
+ else:
102
+ st.warning("No columns match your search.")
103
+ df_display = pd.DataFrame() # Empty DataFrame
104
+ else:
105
+ # If no search term, display all columns
106
+ df_display = df
107
+
108
+ # Display the DataFrame
109
+ st.dataframe(df_display.sort_values(df_display.columns[0], ascending=False) if len(df_display) else df_display, use_container_width=True)
110
+
111
+ # Optional: Allow user to download the data as CSV
112
+ csv = df_display.to_csv()
113
+ st.download_button(
114
+ label="Download data as CSV",
115
+ data=csv,
116
+ file_name=f"{selected_benchmark}_metrics.csv",
117
+ mime='text/csv',
118
+ )
119
+
120
+ if __name__ == "__main__":
121
+ main()