ProtonDataLabs commited on
Commit
638eb56
1 Parent(s): ee661cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +316 -48
app.py CHANGED
@@ -1,70 +1,338 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  import matplotlib.pyplot as plt
 
 
4
  import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
6
 
 
 
7
 
8
- df = pd.read_csv(r'FY2021_merged_file.csv', dtype={"Fiscal Week": "string",
9
- "Fiscal Year": "category",
10
- "Chain Code": "category",
11
- "Store": "category",
12
- "Address": "string",
13
- "Postal Code": "float",
14
- "City": "category",
15
- "State": "category",
16
- "Container Code": "category",
17
- "Sales Item Category": "category",
18
- "units sold":"float",
19
- "SalePrice":"float",
20
- "sales $":"float"})
21
 
22
- df["Postal Code"] = df["Postal Code"].convert_dtypes()
23
- df["units sold"] = df["units sold"].convert_dtypes()
24
 
25
- # Extract fiscal year and week from the 'Fiscal Week' column for sorting
26
- df['Fiscal Year'] = df['Fiscal Week'].apply(lambda x: int(x.split(' ')[1])) # Extract year as an integer
27
- df['Week Number'] = df['Fiscal Week'].apply(lambda x: int(x.split('Week ')[1])) # Extract week as an integer
 
28
 
29
- # Sort the DataFrame by fiscal year and week number
30
- df = df.sort_values(by=['Fiscal Year', 'Week Number'])
 
31
 
32
- # Reformat 'Fiscal Week' for display (e.g., 'FY21W51')
33
- df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)
 
34
 
35
- # Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
36
- df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
 
37
 
38
 
39
- # df['Fiscal Week'] = df['Fiscal Week'].apply(lambda x: x.replace('FY 20', 'FY').replace('Week ', 'W'))
 
 
 
 
 
40
 
41
- # Sort by 'Fiscal Week'
42
- # df = df.sort_values(by='Fiscal Week')
43
 
44
- st.title('Sales Data Dashboard')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
 
 
46
 
47
- state = st.selectbox('Select State', df['State'].unique())
48
- feature = st.selectbox('Select Feature for Grouping', ['Chain Code', 'Sales Item Category', 'Fiscal Week'])
49
 
50
- # Filter the dataframe based on selections
51
- filtered_df = df[df['State'] == state]
 
 
 
52
 
53
- # Plot based on user's selection
54
- if feature == 'Sales Item Category':
55
- st.subheader(f'Sales Data for {state} - Grouped by Sales Item Category')
56
- group_data = filtered_df.groupby(['Fiscal Week Short', 'Sales Item Category'])['units sold'].sum().reset_index()
57
- fig = px.bar(group_data, x='Fiscal Week Short', y='units sold', color='Sales Item Category',
58
- title=f'Units Sold over Fiscal Week in {state} by Sales Item Category',
59
- labels={'Units Sold': 'Units Sold'})
60
 
61
- elif feature == 'Chain Code':
62
- st.subheader(f'Sales Data for {state} - Grouped by Chain Code')
63
- group_data = filtered_df.groupby(['Fiscal Week Short', 'Chain Code'])['units sold'].sum().reset_index()
64
- fig = px.bar(group_data, x='Fiscal Week Short', y='units sold', color='Chain Code',
65
- title=f'Units Sold over Fiscal Week in {state} by Chain Code',
66
- labels={'Units Sold': 'Units Sold'})
67
 
68
- print(df.head(5))
69
- # Display the interactive plot
70
- st.plotly_chart(fig)
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import matplotlib.dates as mdates
7
  import plotly.express as px
8
+ import re
9
+ from datetime import datetime, timedelta
10
+ import warnings
11
+ # Load the data
12
+ df = pd.read_csv(r"fy21-24.csv",
13
+ dtype={"FyWeek": "string",
14
+ "Fy": "category",
15
+ "Chaincode": "category",
16
+ "Store": "category",
17
+ "Address": "string",
18
+ "Zipcode": "float",
19
+ "City": "category",
20
+ "State": "category",
21
+ "Containercode": "category",
22
+ "Itemtype": "category",
23
+ "SalesVolume":"float",
24
+ "UnitPrice":"float",
25
+ "Sales":"float"})
26
 
27
+ # Convert columns
28
+ df["Zipcode"] = df["Zipcode"].convert_dtypes()
29
+ df["SalesVolume"] = df["SalesVolume"].convert_dtypes()
30
 
31
+ # Title for the app
32
+ st.title('Sales Data Dashboard')
33
 
34
+ # Initialize session state for storing which card was clicked and item type
35
+ if 'active_card' not in st.session_state:
36
+ st.session_state['active_card'] = None
37
+ if 'selected_item_type' not in st.session_state:
38
+ st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
39
+ # Initialize session state for storing the selected state and feature
40
+ if 'selected_state' not in st.session_state:
41
+ st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
42
+ if 'selected_feature' not in st.session_state:
43
+ st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
 
 
 
44
 
45
+ # Two columns for the card buttons
46
+ col1, col2, col3, col4 = st.columns(4)
47
 
48
+ # Define buttons for plot categories, update session state when clicked
49
+ with col1:
50
+ if st.button("Sales Volume Trend for Item Category"):
51
+ st.session_state['active_card'] = 'card1'
52
 
53
+ with col2:
54
+ if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
55
+ st.session_state['active_card'] = 'card2'
56
 
57
+ with col3:
58
+ if st.button("Price vs Sales Trend by Year"):
59
+ st.session_state['active_card'] = 'card3'
60
 
61
+ with col4:
62
+ if st.button("Total Sales Volume by Price Band"):
63
+ st.session_state['active_card'] = 'card4'
64
 
65
 
66
+ ########################################### CARD #1 ####################################################
67
+ if st.session_state['active_card'] == 'card1':
68
+ # Create short fiscal week display
69
+ df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1]))
70
+ df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1]))
71
+ df = df.sort_values(by=['Fiscal Year', 'Week Number'])
72
 
73
+ # Reformat 'Fiscal Week' for display (e.g., 'FY21W51')
74
+ df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)
75
 
76
+ # Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
77
+ df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
78
+ # Dropdown for selecting the state (using session_state)
79
+ st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(),
80
+ index=list(df['State'].unique()).index(st.session_state['selected_state']))
81
+
82
+ # Dropdown for selecting the feature for grouping (using session_state)
83
+ st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping',
84
+ ['Chaincode', 'Itemtype', 'FyWeek'],
85
+ index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature']))
86
+
87
+ # Filter the dataframe based on selected state
88
+ filtered_df = df[df['State'] == st.session_state['selected_state']]
89
+
90
+ # Plot based on user's selected feature
91
+ if st.session_state['selected_feature'] == 'Itemtype':
92
+ st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type')
93
+ group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index()
94
+ fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype',
95
+ title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type',
96
+ labels={'SalesVolume': 'Sales Volume'})
97
+
98
+ elif st.session_state['selected_feature'] == 'Chaincode':
99
+ st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code')
100
+ group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index()
101
+ fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode',
102
+ title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code',
103
+ labels={'SalesVolume': 'Sales Volume'})
104
+
105
+ elif st.session_state['selected_feature'] == 'FyWeek':
106
+ st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week')
107
+ group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index()
108
+ fig = px.bar(group_data, x='FyWeek', y='SalesVolume',
109
+ title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}',
110
+ labels={'SalesVolume': 'Sales Volume'})
111
+
112
+ # Display the interactive plot
113
+ st.plotly_chart(fig)
114
+ ##########################################################################################################
115
+
116
+ ########################################### CARD #2 ####################################################
117
+ # Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code
118
+ if st.session_state['active_card'] == 'card2':
119
+ # Dropdown to select item type (using session_state)
120
+ st.session_state['selected_item_type'] = st.selectbox('Select Item Type',
121
+ df['Itemtype'].unique(),
122
+ index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
123
+
124
+ st.subheader("Sales Volume & Unit Price Correlation for Container Codes")
125
+
126
+ # Group the dataframe and prepare for plotting
127
+ df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
128
+ 'SalesVolume': 'sum',
129
+ 'UnitPrice': 'mean',
130
+ 'Sales': 'sum'
131
+ }).reset_index()
132
+
133
+ # Function to extract date from fiscal week
134
+ def dt_from_fy_week(fyweek):
135
+ fy, w = re.findall(r'\d+', fyweek)
136
+ week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d")
137
+ return (week1_start + timedelta(weeks=int(w) - 1)).date()
138
+
139
+ # Add columns for date and promo to data
140
+ df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
141
+ df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
142
+ df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
143
+ df["Promo"] = df["Promo"].astype("category")
144
+
145
+ # Split FyWeek into fiscal year and week number
146
+ df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
147
+ df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
148
+
149
+ # Filter the dataframe based on the selected item type
150
+ filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']]
151
+
152
+ # Find the top 3 container codes based on total SalesVolume
153
+ top_3_containers = filtered_df.groupby('Containercode', observed=True)['SalesVolume'].sum().nlargest(3).index
154
+
155
+ # Filter the data for only the top 3 container codes
156
+ top_container_data = filtered_df[filtered_df['Containercode'].isin(top_3_containers)]
157
+
158
+ # Group by Year, Week, Dt, and Containercode and aggregate SalesVolume and UnitPrice
159
+ agg_df = top_container_data.groupby(['Containercode', 'Year', 'Week', 'Dt'], observed=True).agg({
160
+ 'SalesVolume': 'sum',
161
+ 'UnitPrice': 'mean'
162
+ }).reset_index()
163
+
164
+ # Loop through the top 3 container codes and create separate plots
165
+ for container in top_3_containers:
166
+ container_data = agg_df[agg_df['Containercode'] == container]
167
+
168
+ # Create a new figure for each container code
169
+ fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6))
170
+
171
+ # Plot SalesVolume
172
+ sns.lineplot(data=container_data, x='Dt', y='SalesVolume', ax=axd)
173
+ axd.set_title(f"SalesVolume - {container}")
174
+ axd.grid(True, linestyle='--', color='gray', alpha=0.7)
175
+
176
+ # Plot mean line for SalesVolume
177
+ axd.axhline(container_data['SalesVolume'].mean(), ls="--", color="r")
178
+ axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
179
+ axd.set_xticklabels([])
180
+
181
+ # Plot UnitPrice
182
+ sns.lineplot(data=container_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd')
183
+ axp.set_title(f"UnitPrice - {container}")
184
+ axp.grid(True, linestyle='--', color='gray', alpha=0.7)
185
+
186
+ # Plot mean line for UnitPrice
187
+ axp.axhline(container_data['UnitPrice'].mean(), ls="--", color="r")
188
+ axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
189
+ axp.tick_params(axis='x', rotation=90)
190
+
191
+ # Adjust layout for each figure
192
+ plt.tight_layout()
193
+
194
+ # Display the plot in Streamlit
195
+ st.pyplot(fig)
196
+
197
+ ###############################################################################################
198
+
199
+ ########################################### CARD #3 ####################################################
200
+ # Check which card was selected using session state
201
+ if st.session_state['active_card'] == 'card3':
202
+ # Dropdown for selecting the Item Type
203
+ st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(),
204
+ index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
205
+ df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
206
+ 'SalesVolume': 'sum',
207
+ 'UnitPrice':'mean',
208
+ 'Sales': 'sum'}).reset_index()
209
+ # add promo and date columns to data
210
+ def dt_from_fy_week(fyweek):
211
+
212
+ fy, w = re.findall(r'\d+', fyweek)
213
+
214
+ week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d")
215
+
216
+ return (week1_start + timedelta(weeks=int(w)-1)).date()
217
+
218
+ df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
219
+ df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
220
+ df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo')
221
+ df["Promo"] = df["Promo"].astype("category")
222
+ # Split FyWeek into fiscal year and week number
223
+ df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
224
+ # df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int)
225
+ df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
226
+
227
+ # Define the fiscal years
228
+ years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"]
229
+
230
+ # Set up a 2x2 grid of subplots for the four years
231
+ fig, axs = plt.subplots(2, 2, figsize=(12, 8))
232
+
233
+ # Loop through each year and create a plot in the grid
234
+ for i, fy in enumerate(years):
235
+ ax = axs.flat[i]
236
+
237
+ # Plot Promo data
238
+ sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")],
239
+ x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo")
240
+
241
+ # Plot NoPromo data
242
+ sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")],
243
+ x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo")
244
+
245
+ # Set the title of each subplot
246
+ ax.set_title(f"{st.session_state['selected_item_type']} - {fy}")
247
+ ax.legend(loc="best")
248
+
249
+ # Set the overall title for the figure
250
+ fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years")
251
+
252
+ # Adjust layout to prevent overlap
253
+ fig.tight_layout(rect=[0, 0, 1, 0.95])
254
+
255
+ # Display the plot
256
+ st.pyplot(fig)
257
+
258
+ ###############################################################################################
259
+
260
+ ########################################### CARD #4 ####################################################
261
+ if st.session_state['active_card'] == 'card4':
262
+ # Define the fiscal years
263
+ years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024']
264
+ df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
265
+ 'SalesVolume': 'sum',
266
+ 'UnitPrice':'mean',
267
+ 'Sales': 'sum'}).reset_index()
268
+
269
+ # Dropdown for selecting the Item Type (using session_state)
270
+ st.session_state['selected_item_type'] = st.selectbox('Select Item Type',
271
+ df['Itemtype'].unique(),
272
+ index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
273
+
274
+ # Set up a 2x2 grid of subplots for the four years
275
+ fig, axes = plt.subplots(2, 2, figsize=(16, 12))
276
+ axes = axes.flatten() # To access axes easily in a loop
277
+
278
+ # Loop through each year and plot the data
279
+ for i, year in enumerate(years):
280
+ # print(st.session_state['selected_item_type'])
281
+ # Filter data for the specific year and item type selected
282
+ cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)]
283
+ cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories()
284
+ cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories()
285
+ # print(cage_data['Itemtype'].unique())
286
+ relevant_container_codes = cage_data['Containercode'].unique()
287
+ print(relevant_container_codes)
288
+ # Calculate price bands
289
+ lower_band = cage_data['UnitPrice'].quantile(0.25)
290
+ median_band = cage_data['UnitPrice'].quantile(0.50)
291
+ higher_band = cage_data['UnitPrice'].quantile(0.75)
292
+
293
+ # Get data for each price band
294
+ lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band]
295
+ median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)]
296
+ higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band]
297
+ # print(lower_band_data['Containercode'].unique())
298
+ # Aggregate SalesVolume and average UnitPrice for each container code in each pricing band
299
+ lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg(
300
+ total_sales_volume=('SalesVolume', 'sum'),
301
+ avg_unit_price=('UnitPrice', 'mean')
302
+ ).reset_index()
303
+
304
+ median_band_agg = median_band_data.groupby('Containercode',observed=True).agg(
305
+ total_sales_volume=('SalesVolume', 'sum'),
306
+ avg_unit_price=('UnitPrice', 'mean')
307
+ ).reset_index()
308
+
309
+ higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg(
310
+ total_sales_volume=('SalesVolume', 'sum'),
311
+ avg_unit_price=('UnitPrice', 'mean')
312
+ ).reset_index()
313
+
314
+ # Add the price band labels
315
+ lower_band_agg['PriceBand'] = 'Lower Band'
316
+ median_band_agg['PriceBand'] = 'Median Band'
317
+ higher_band_agg['PriceBand'] = 'Higher Band'
318
 
319
+ # Combine the data for plotting
320
+ combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg])
321
+ combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)]
322
 
323
+ # Plot Total Sales Volume for each price band in the current subplot
324
+ sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i])
325
 
326
+ # Set the title and customize x-axis for each subplot
327
+ axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}")
328
+ axes[i].set_xlabel('Container Code')
329
+ axes[i].set_ylabel('Total Sales Volume')
330
+ axes[i].tick_params(axis='x', rotation=45)
331
 
332
+ # Adjust the layout so titles and labels don't overlap
333
+ plt.tight_layout()
 
 
 
 
 
334
 
335
+ # Display the plot in Streamlit
336
+ st.pyplot(fig)
 
 
 
 
337
 
338
+ ###############################################################################################