Sasidhar commited on
Commit
d89f303
1 Parent(s): e8ef7ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +256 -12
app.py CHANGED
@@ -1,17 +1,261 @@
1
  import streamlit as st
 
 
 
 
 
 
2
  from streamlit_ace import st_ace
 
3
 
4
- code= "def f1(x): return str(x * 3)"
5
- exec(code)
6
- st.write(f1(3))
7
 
8
- content = st_ace(language="python")
9
 
10
- st.write(len(content.splitlines()))
11
- exec(content)
12
- code= "def f1(x): return str(x * 3)"
13
-
14
- exec(code)
15
- st.text(content)
16
- st.write(f1(3))
17
- st.write(test_fun(3))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+ from streamlit_option_menu import option_menu
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ from plotly.subplots import make_subplots
7
+ import plotly.graph_objects as go
8
  from streamlit_ace import st_ace
9
+ from streamlit_pandas_profiling import st_profile_report
10
 
 
 
 
11
 
 
12
 
13
+ def set_data_files_session_object(file_name, file_path):
14
+ if 'data_files' not in st.session_state:
15
+ files_dictionary = {}
16
+ files_dictionary[file_name] = file_path
17
+ st.session_state['data_files'] = files_dictionary
18
+ else:
19
+ files_dictionary = st.session_state['data_files']
20
+ files_dictionary[file_name] = file_path
21
+ st.session_state['data_files'] = files_dictionary
22
+
23
+ def set_filtered_data_session_object(df, file_name):
24
+ if 'filtered_data' not in st.session_state:
25
+ filtered_data_dictionary = {}
26
+ filtered_data_dictionary[file_name] = df
27
+ st.session_state['filtered_data'] = filtered_data_dictionary
28
+ else:
29
+ filtered_data_dictionary = st.session_state['filtered_data']
30
+ filtered_data_dictionary[file_name] = df
31
+ st.session_state['filtered_data'] = filtered_data_dictionary
32
+
33
+ def set_dataframe_session_object(file_name, file_path):
34
+ if 'data_frames' not in st.session_state:
35
+ data_frame_dictionary = {}
36
+ data_frame_dictionary[file_name] = pd.read_csv(file_path)
37
+ st.session_state['data_frames'] = data_frame_dictionary
38
+ else:
39
+ data_frame_dictionary = st.session_state['data_frames']
40
+ data_frame_dictionary[file_name] = pd.read_csv(file_path)
41
+ st.session_state['data_frames'] = data_frame_dictionary
42
+
43
+ def save_file(file_object):
44
+ file_path = os.path.join(os.getcwd(), "uploaded_files", file_object.name)
45
+ with open(file_path, "wb") as f:
46
+ f.write(file_object.getbuffer())
47
+
48
+ set_data_files_session_object(file_object.name, file_path)
49
+ set_dataframe_session_object(file_object.name, file_path)
50
+
51
+
52
+
53
+
54
+ def create_upload_file_component():
55
+ uploaded_files = st.file_uploader("Upload one file at a time.", type=['csv', 'xls', 'xlsx', 'pkl', 'pdf'],
56
+ accept_multiple_files=True)
57
+
58
+ if uploaded_files:
59
+
60
+ os.makedirs(os.path.join(os.getcwd(), "uploaded_files"), mode=0o777, exist_ok=True)
61
+ for uploaded_file in uploaded_files:
62
+ save_file(uploaded_file)
63
+
64
+ def create_component_to_add_target_func(selected_files, dfs, i):
65
+ target_var_name = st.text_input("Name of the target variable",key="target_var" + str(i))
66
+ # content = st_ace(language="python")
67
+ # if content:
68
+ code= "def f1(x): return str(x * 3)"
69
+ exec(code)
70
+ st.write(f1(3))
71
+
72
+ # st.write(len(content.splitlines()))
73
+ # exec(content)
74
+ # code= "def f1(x): return str(x * 3)"
75
+
76
+ # exec(code)
77
+ # st.text(content)
78
+ # st.write(f1(3))
79
+
80
+
81
+ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
82
+ st.subheader(selected_files[i])
83
+ df = dfs[selected_files[i]]
84
+
85
+ filter_data = st.checkbox("Analyse on Filtered Data",key="filter_data_check"+str(i))
86
+
87
+ if filter_data:
88
+ action = "data_filter"
89
+ col_to_filter = st.selectbox("Select the field to Filter on ", df.columns.values,
90
+ key= action + "_col_filter_" + str(i))
91
+ filter_operation = st.selectbox("Operation ",
92
+ ['Greater Than', 'Equals', 'Less Than', "In", "In Between"],
93
+ key=action + "_col_filter_op_" + str(i))
94
+ selected_filter_vals = None
95
+
96
+ if filter_operation:
97
+ if filter_operation == 'In':
98
+ selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(),
99
+ key=action + "_col_filter_val_" + str(i))
100
+ if selected_filter_vals:
101
+ filtered_df = df[df[col_to_filter].isin(selected_filter_vals)]
102
+ elif filter_operation == 'Equals':
103
+ selected_filter_vals = st.text_input("Enter a numeric value",
104
+ key=action + "_col_filter_val_" + str(i))
105
+ if selected_filter_vals:
106
+ filtered_df = df[df[col_to_filter] == selected_filter_vals]
107
+ elif filter_operation == 'Greater Than':
108
+ selected_filter_vals = st.text_input("Enter a numeric value",
109
+ key=action + "_col_filter_val_" + str(i))
110
+ if selected_filter_vals:
111
+ filtered_df = df[df[col_to_filter] > selected_filter_vals]
112
+ elif filter_operation == 'Less Than':
113
+ selected_filter_vals = st.text_input("Enter a numeric value",
114
+ key=action + "_col_filter_val_" + str(i))
115
+ if selected_filter_vals:
116
+ filtered_df = df[df[col_to_filter] < selected_filter_vals]
117
+ elif filter_operation == 'In Between':
118
+ selected_filter_vals = st.select_slider("Select range",
119
+ (df[col_to_filter].min(), df[col_to_filter].max()),
120
+ key=action + "_col_filter_val_" + str(i))
121
+ if selected_filter_vals:
122
+ filtered_df = df[df[col_to_filter] < selected_filter_vals]
123
+
124
+ if selected_filter_vals:
125
+ set_filtered_data_session_object(filtered_df,selected_files[i])
126
+ # st.write(df.shape)
127
+ # st.write( st.session_state['filtered_data'][selected_files[i]].shape)
128
+
129
+ analysis_actions = st.multiselect("What analysis do you wish to do?",
130
+ ['Summary of Data', 'Sample Data','Get Profile' ,'Univariate Analysis',
131
+ 'Bivariate Analysis','Add a Target Column'], key='analysis_action_' + str(i))
132
+ if analysis_actions:
133
+
134
+ df_for_analysis = st.session_state['filtered_data'][selected_files[i]] if filter_data else df
135
+
136
+ for action in analysis_actions:
137
+
138
+ if action == 'Sample Data':
139
+ st.write(df_for_analysis.sample(10))
140
+ elif action == 'Get Profile':
141
+
142
+ pr = df_for_analysis.profile_report()
143
+ st_profile_report(pr)
144
+
145
+ elif action == 'Summary of Data':
146
+ st.write(df_for_analysis.describe())
147
+ # col_to_filter = st.selectbox("Select the field to Filter on ", df.columns.values,
148
+ # key=action + "_col_filter_" + str(i))
149
+ # selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(),
150
+ # key=action + "_col_filter_val_" + str(i))
151
+ elif action == 'Univariate Analysis':
152
+ cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values)
153
+ for col in cols_for_analysis:
154
+ if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 :
155
+
156
+ fig = px.scatter(x=df_for_analysis.index, y=df_for_analysis[col],labels=dict(x="Index", y=col))
157
+ st.plotly_chart(fig, use_container_width=True)
158
+
159
+ elif str(df_for_analysis[col].dtype) in ['object','category'] or df_for_analysis[col].nunique() <= 10:
160
+
161
+ value_dist_df = df_for_analysis[col].value_counts(normalize=True)[:20].reset_index()
162
+ value_dist_df.columns = [col,'% Distribution']
163
+
164
+ value_dist_df_counts = df_for_analysis[col].value_counts()[:20].reset_index()
165
+ value_dist_df_counts.columns = [col,'Count']
166
+ value_dist_df = value_dist_df.merge(value_dist_df_counts,on=col)
167
+
168
+ trace1 = go.Bar(x=value_dist_df[col],y=value_dist_df['Count'],name='Count',marker=dict(color='rgb(34,163,192)'))
169
+ trace2 = go.Scatter(x=value_dist_df[col],y=value_dist_df['% Distribution'],name='% Distribution',yaxis='y2')
170
+
171
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
172
+ fig.add_trace(trace1)
173
+ fig.add_trace(trace2,secondary_y=True)
174
+
175
+ fig['layout'].update(height = 600, width = 800, title = f"{col} data distribution",xaxis=dict(tickangle=-90))
176
+
177
+ # fig.update_layout(height=200, width=400, title_text=f"{col} data distribution")
178
+
179
+ st.plotly_chart(fig, use_container_width=True)
180
+
181
+ elif action == "Add a Target Column":
182
+ # create_component_to_add_target_func(selected_files, dfs, i)
183
+ code= "def f1(x): return str(x * 3)"
184
+ exec(code)
185
+ st.write(f1(3))
186
+
187
+
188
+ def create_component_for_data_analysis():
189
+ if 'data_files' in st.session_state:
190
+
191
+ selected_files = st.multiselect("Select the File(S) to analyze", st.session_state['data_files'].keys())
192
+
193
+ if selected_files:
194
+ cols = st.columns(len(selected_files))
195
+
196
+ dfs = {}
197
+
198
+ for selected_file in selected_files:
199
+ if selected_file in st.session_state['data_frames']:
200
+ dfs[selected_file] = st.session_state['data_frames'][selected_file]
201
+ else:
202
+ st.session_state['data_frames'][selected_file] = pd.read_csv(st.session_state['data_files'][selected_file])
203
+ dfs[selected_file] = st.session_state['data_frames'][selected_file]
204
+
205
+ for i, col in enumerate(cols):
206
+ with col:
207
+ create_component_for_analysis_for_single_df(selected_files, dfs, i)
208
+
209
+ else:
210
+ st.write("Upload a file to start analysis")
211
+
212
+
213
+ # def build_interface_for_model_analysis():
214
+ st.title("Model Results Analyzer")
215
+ with st.sidebar:
216
+
217
+
218
+ selected_menu = option_menu(None, ["Home", "Upload Data", "Add Features","Analyze Data"],
219
+ icons=['house', 'cloud-upload', "list-task", 'gear'],
220
+ menu_icon="cast", default_index=0, orientation="vertical",
221
+ styles={
222
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
223
+ "icon": {"color": "orange", "font-size": "15px"},
224
+ "nav-link": {"font-size": "15px", "text-align": "left", "margin": "0px",
225
+ "--hover-color": "#eee"},
226
+ "nav-link-selected": {"background-color": "green"},
227
+ })
228
+
229
+ if selected_menu == "Home":
230
+ st.markdown('**This is to analyse models performance.**')
231
+
232
+ elif selected_menu == "Upload Data":
233
+
234
+ create_upload_file_component()
235
+
236
+ if 'data_files' in st.session_state:
237
+ st.write(pd.DataFrame(
238
+ data={"File Name": pd.DataFrame.from_dict(st.session_state['data_files'], orient='index').index}))
239
+
240
+ elif selected_menu == "Analyze Data":
241
+ create_component_for_data_analysis()
242
+
243
+ elif selected_menu == "Add Features":
244
+ if 'data_files' in st.session_state:
245
+ selected_file = st.selectbox("Select the File(S) to analyze", st.session_state['data_files'].keys())
246
+
247
+ if selected_file:
248
+ df = st.session_state['data_frames'][selected_file]
249
+ st.header("Enter the function definiton to create a new feature")
250
+ feature_name = st.text_input("Enter the New Feature Name")
251
+ st.warning("please retain the function signature as 'add_feature(row)'")
252
+
253
+ content = st_ace(language="python",value="def add_feature(row):")
254
+
255
+ if content != 'def add_feature(row):':
256
+ exec(content)
257
+ df[feature_name] = df.apply(lambda x:add_feature(x),axis=1)
258
+
259
+ st.session_state['data_frames'][selected_file] = df
260
+ st.write(df.columns.values)
261
+