Spaces:
Sleeping
Sleeping
ProtonDataLabs
commited on
Commit
•
136ada0
1
Parent(s):
71f9be1
Update app.py
Browse files
app.py
CHANGED
@@ -5,46 +5,92 @@ import matplotlib.pyplot as plt
|
|
5 |
import seaborn as sns
|
6 |
import matplotlib.dates as mdates
|
7 |
import plotly.express as px
|
|
|
8 |
import re
|
9 |
from datetime import datetime, timedelta
|
10 |
import warnings
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
if 'active_card' not in st.session_state:
|
36 |
st.session_state['active_card'] = None
|
37 |
if 'selected_item_type' not in st.session_state:
|
38 |
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
|
39 |
-
|
40 |
-
if 'selected_state' not in st.session_state:
|
41 |
-
st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
|
42 |
if 'selected_feature' not in st.session_state:
|
43 |
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
|
44 |
|
45 |
-
#
|
46 |
-
col1, col2
|
47 |
-
|
48 |
# Define buttons for plot categories, update session state when clicked
|
49 |
with col1:
|
50 |
if st.button("Sales Volume Trend for Item Category"):
|
@@ -54,67 +100,48 @@ with col2:
|
|
54 |
if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
|
55 |
st.session_state['active_card'] = 'card2'
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
|
61 |
-
with col4:
|
62 |
-
if st.button("Total Sales Volume by Price Band"):
|
63 |
-
st.session_state['active_card'] = 'card4'
|
64 |
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
if st.session_state['active_card'] == 'card1':
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)
|
75 |
-
|
76 |
-
# Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
|
77 |
-
df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
|
78 |
-
# Dropdown for selecting the state (using session_state)
|
79 |
-
st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(),
|
80 |
-
index=list(df['State'].unique()).index(st.session_state['selected_state']))
|
81 |
-
|
82 |
-
# Dropdown for selecting the feature for grouping (using session_state)
|
83 |
-
st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping',
|
84 |
-
['Chaincode', 'Itemtype', 'FyWeek'],
|
85 |
-
index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature']))
|
86 |
-
|
87 |
# Filter the dataframe based on selected state
|
88 |
-
filtered_df = df[df['State'] ==
|
89 |
-
|
90 |
-
#
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code')
|
100 |
-
group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index()
|
101 |
-
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode',
|
102 |
-
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code',
|
103 |
-
labels={'SalesVolume': 'Sales Volume'})
|
104 |
-
|
105 |
-
elif st.session_state['selected_feature'] == 'FyWeek':
|
106 |
-
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week')
|
107 |
-
group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index()
|
108 |
-
fig = px.bar(group_data, x='FyWeek', y='SalesVolume',
|
109 |
-
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}',
|
110 |
-
labels={'SalesVolume': 'Sales Volume'})
|
111 |
-
|
112 |
-
# Display the interactive plot
|
113 |
st.plotly_chart(fig)
|
|
|
114 |
##########################################################################################################
|
115 |
|
|
|
116 |
########################################### CARD #2 ####################################################
|
117 |
-
|
|
|
|
|
|
|
|
|
118 |
if st.session_state['active_card'] == 'card2':
|
119 |
# Dropdown to select item type (using session_state)
|
120 |
st.session_state['selected_item_type'] = st.selectbox(
|
@@ -124,33 +151,18 @@ if st.session_state['active_card'] == 'card2':
|
|
124 |
# Dropdown to select the grouping category (container code, chain code, or state)
|
125 |
group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
# Group the dataframe and prepare for plotting
|
130 |
-
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
|
131 |
-
'SalesVolume': 'sum',
|
132 |
-
'UnitPrice': 'mean',
|
133 |
-
'Sales': 'sum'
|
134 |
-
}).reset_index()
|
135 |
|
136 |
-
|
137 |
-
def dt_from_fy_week(fyweek):
|
138 |
-
fy, w = re.findall(r'\d+', fyweek)
|
139 |
-
week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d")
|
140 |
-
return (week1_start + timedelta(weeks=int(w) - 1)).date()
|
141 |
|
142 |
-
#
|
143 |
-
df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
|
144 |
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
|
145 |
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
|
146 |
df["Promo"] = df["Promo"].astype("category")
|
147 |
-
|
148 |
-
# Split FyWeek into fiscal year and week number
|
149 |
-
df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
|
150 |
-
df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
|
151 |
|
152 |
-
# Filter the dataframe based on the selected item type
|
153 |
-
filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']]
|
154 |
|
155 |
# Find the top 3 values based on total SalesVolume in the selected grouping category
|
156 |
top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index
|
@@ -158,183 +170,78 @@ if st.session_state['active_card'] == 'card2':
|
|
158 |
# Filter the data for only the top 3 values
|
159 |
top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
|
160 |
|
161 |
-
#
|
162 |
agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
|
163 |
'SalesVolume': 'sum',
|
164 |
'UnitPrice': 'mean'
|
165 |
}).reset_index()
|
166 |
|
167 |
-
#
|
|
|
|
|
|
|
168 |
for value in top_3_values:
|
169 |
value_data = agg_df[agg_df[group_by_option] == value]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
-
# Create a new figure for each group
|
172 |
-
fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6))
|
173 |
|
174 |
-
# Plot SalesVolume
|
175 |
-
sns.lineplot(data=value_data, x='Dt', y='SalesVolume', ax=axd)
|
176 |
-
axd.set_title(f"SalesVolume - {value} ({group_by_option})")
|
177 |
-
axd.grid(True, linestyle='--', color='gray', alpha=0.7)
|
178 |
|
179 |
-
|
180 |
-
axd.axhline(value_data['SalesVolume'].mean(), ls="--", color="r")
|
181 |
-
axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
|
182 |
-
axd.set_xticklabels([])
|
183 |
-
|
184 |
-
# Plot UnitPrice
|
185 |
-
sns.lineplot(data=value_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd')
|
186 |
-
axp.set_title(f"UnitPrice - {value} ({group_by_option})")
|
187 |
-
axp.grid(True, linestyle='--', color='gray', alpha=0.7)
|
188 |
-
|
189 |
-
# Plot mean line for UnitPrice
|
190 |
-
axp.axhline(value_data['UnitPrice'].mean(), ls="--", color="r")
|
191 |
-
axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
|
192 |
-
axp.tick_params(axis='x', rotation=90)
|
193 |
-
|
194 |
-
# Adjust layout for each figure
|
195 |
-
plt.tight_layout()
|
196 |
-
|
197 |
-
# Display the plot in Streamlit
|
198 |
-
st.pyplot(fig)
|
199 |
-
###############################################################################################
|
200 |
-
|
201 |
-
########################################### CARD #3 ####################################################
|
202 |
-
# Check which card was selected using session state
|
203 |
-
if st.session_state['active_card'] == 'card3':
|
204 |
-
# Dropdown for selecting the Item Type
|
205 |
-
st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(),
|
206 |
-
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
|
207 |
-
df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
|
208 |
-
'SalesVolume': 'sum',
|
209 |
-
'UnitPrice':'mean',
|
210 |
-
'Sales': 'sum'}).reset_index()
|
211 |
-
# add promo and date columns to data
|
212 |
-
def dt_from_fy_week(fyweek):
|
213 |
-
|
214 |
-
fy, w = re.findall(r'\d+', fyweek)
|
215 |
-
|
216 |
-
week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d")
|
217 |
-
|
218 |
-
return (week1_start + timedelta(weeks=int(w)-1)).date()
|
219 |
-
|
220 |
-
df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
|
221 |
-
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
|
222 |
-
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo')
|
223 |
-
df["Promo"] = df["Promo"].astype("category")
|
224 |
-
# Split FyWeek into fiscal year and week number
|
225 |
-
df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
|
226 |
-
# df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int)
|
227 |
-
df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
|
228 |
-
|
229 |
-
# Define the fiscal years
|
230 |
-
years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"]
|
231 |
-
|
232 |
-
# Set up a 2x2 grid of subplots for the four years
|
233 |
-
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
234 |
-
|
235 |
-
# Loop through each year and create a plot in the grid
|
236 |
-
for i, fy in enumerate(years):
|
237 |
-
ax = axs.flat[i]
|
238 |
-
|
239 |
-
# Plot Promo data
|
240 |
-
sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")],
|
241 |
-
x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo")
|
242 |
-
|
243 |
-
# Plot NoPromo data
|
244 |
-
sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")],
|
245 |
-
x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo")
|
246 |
-
|
247 |
-
# Set the title of each subplot
|
248 |
-
ax.set_title(f"{st.session_state['selected_item_type']} - {fy}")
|
249 |
-
ax.legend(loc="best")
|
250 |
-
|
251 |
-
# Set the overall title for the figure
|
252 |
-
fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years")
|
253 |
-
|
254 |
-
# Adjust layout to prevent overlap
|
255 |
-
fig.tight_layout(rect=[0, 0, 1, 0.95])
|
256 |
-
|
257 |
-
# Display the plot
|
258 |
-
st.pyplot(fig)
|
259 |
-
|
260 |
-
###############################################################################################
|
261 |
-
|
262 |
-
########################################### CARD #4 ####################################################
|
263 |
-
if st.session_state['active_card'] == 'card4':
|
264 |
-
# Define the fiscal years
|
265 |
-
years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024']
|
266 |
-
df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
|
267 |
-
'SalesVolume': 'sum',
|
268 |
-
'UnitPrice':'mean',
|
269 |
-
'Sales': 'sum'}).reset_index()
|
270 |
-
|
271 |
-
# Dropdown for selecting the Item Type (using session_state)
|
272 |
-
st.session_state['selected_item_type'] = st.selectbox('Select Item Type',
|
273 |
-
df['Itemtype'].unique(),
|
274 |
-
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
|
275 |
-
|
276 |
-
# Set up a 2x2 grid of subplots for the four years
|
277 |
-
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
278 |
-
axes = axes.flatten() # To access axes easily in a loop
|
279 |
-
|
280 |
-
# Loop through each year and plot the data
|
281 |
-
for i, year in enumerate(years):
|
282 |
-
# print(st.session_state['selected_item_type'])
|
283 |
-
# Filter data for the specific year and item type selected
|
284 |
-
cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)]
|
285 |
-
cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories()
|
286 |
-
cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories()
|
287 |
-
# print(cage_data['Itemtype'].unique())
|
288 |
-
relevant_container_codes = cage_data['Containercode'].unique()
|
289 |
-
print(relevant_container_codes)
|
290 |
-
# Calculate price bands
|
291 |
-
lower_band = cage_data['UnitPrice'].quantile(0.25)
|
292 |
-
median_band = cage_data['UnitPrice'].quantile(0.50)
|
293 |
-
higher_band = cage_data['UnitPrice'].quantile(0.75)
|
294 |
-
|
295 |
-
# Get data for each price band
|
296 |
-
lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band]
|
297 |
-
median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)]
|
298 |
-
higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band]
|
299 |
-
# print(lower_band_data['Containercode'].unique())
|
300 |
-
# Aggregate SalesVolume and average UnitPrice for each container code in each pricing band
|
301 |
-
lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg(
|
302 |
-
total_sales_volume=('SalesVolume', 'sum'),
|
303 |
-
avg_unit_price=('UnitPrice', 'mean')
|
304 |
-
).reset_index()
|
305 |
-
|
306 |
-
median_band_agg = median_band_data.groupby('Containercode',observed=True).agg(
|
307 |
-
total_sales_volume=('SalesVolume', 'sum'),
|
308 |
-
avg_unit_price=('UnitPrice', 'mean')
|
309 |
-
).reset_index()
|
310 |
-
|
311 |
-
higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg(
|
312 |
-
total_sales_volume=('SalesVolume', 'sum'),
|
313 |
-
avg_unit_price=('UnitPrice', 'mean')
|
314 |
-
).reset_index()
|
315 |
-
|
316 |
-
# Add the price band labels
|
317 |
-
lower_band_agg['PriceBand'] = 'Lower Band'
|
318 |
-
median_band_agg['PriceBand'] = 'Median Band'
|
319 |
-
higher_band_agg['PriceBand'] = 'Higher Band'
|
320 |
-
|
321 |
-
# Combine the data for plotting
|
322 |
-
combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg])
|
323 |
-
combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)]
|
324 |
-
|
325 |
-
# Plot Total Sales Volume for each price band in the current subplot
|
326 |
-
sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i])
|
327 |
-
|
328 |
-
# Set the title and customize x-axis for each subplot
|
329 |
-
axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}")
|
330 |
-
axes[i].set_xlabel('Container Code')
|
331 |
-
axes[i].set_ylabel('Total Sales Volume')
|
332 |
-
axes[i].tick_params(axis='x', rotation=45)
|
333 |
-
|
334 |
-
# Adjust the layout so titles and labels don't overlap
|
335 |
-
plt.tight_layout()
|
336 |
-
|
337 |
-
# Display the plot in Streamlit
|
338 |
-
st.pyplot(fig)
|
339 |
-
|
340 |
-
###############################################################################################
|
|
|
5 |
import seaborn as sns
|
6 |
import matplotlib.dates as mdates
|
7 |
import plotly.express as px
|
8 |
+
import plotly.graph_objects as go
|
9 |
import re
|
10 |
from datetime import datetime, timedelta
|
11 |
import warnings
|
12 |
+
import time
|
13 |
+
import dask.dataframe as dd
|
14 |
+
|
15 |
+
@st.cache_data
|
16 |
+
def date_from_week(year, week):
|
17 |
+
# Assuming the fiscal year starts in August and the week starts from August 1st
|
18 |
+
base_date = pd.to_datetime((year - 1).astype(str) + '-08-01')
|
19 |
+
dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days')
|
20 |
+
return dates
|
21 |
+
|
22 |
+
@st.cache_data
|
23 |
+
def load_data(active_card):
|
24 |
+
# st.write(f"{active_card}")
|
25 |
+
# Define columns common to multiple cards if there are any
|
26 |
+
common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales']
|
27 |
+
|
28 |
+
# Columns specific to cards
|
29 |
+
card_specific_cols = {
|
30 |
+
'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'],
|
31 |
+
'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
|
32 |
+
}
|
33 |
+
|
34 |
+
# Choose columns based on the active card
|
35 |
+
required_columns = card_specific_cols.get(active_card, common_cols)
|
36 |
+
|
37 |
+
# Define the data types for efficient memory usage
|
38 |
+
dtype_spec = {
|
39 |
+
'FyWeek': 'string',
|
40 |
+
'Fy': 'category', # Add data type for 'Fy' if it's used
|
41 |
+
'Itemtype': 'category',
|
42 |
+
'Chaincode': 'category',
|
43 |
+
'State': 'category',
|
44 |
+
"Store": "category",
|
45 |
+
'Containercode': 'category',
|
46 |
+
"Address": "string",
|
47 |
+
"Zipcode": "float",
|
48 |
+
"City": "category",
|
49 |
+
'SalesVolume': 'float',
|
50 |
+
'UnitPrice': 'float',
|
51 |
+
'Sales': 'float'
|
52 |
+
}
|
53 |
+
|
54 |
+
# Read only the necessary columns
|
55 |
+
# st.write(required_columns)
|
56 |
+
ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec)
|
57 |
+
df = ddf.compute()
|
58 |
+
|
59 |
+
|
60 |
+
if active_card in ['card2']:
|
61 |
+
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
|
62 |
+
'SalesVolume': 'sum',
|
63 |
+
'UnitPrice': 'mean',
|
64 |
+
'Sales': 'sum'
|
65 |
+
}).reset_index()
|
66 |
+
df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True)
|
67 |
+
df['Week'] = df['Week'].astype(int) # Convert 'Week' to int
|
68 |
+
df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int
|
69 |
+
df['Dt'] = date_from_week(df['Year'], df['Week'])
|
70 |
+
|
71 |
+
# st.write(df.columns)
|
72 |
+
return df
|
73 |
+
|
74 |
+
# Display logo
|
75 |
+
st.image("bonnie.png", width=150) # Adjust width as needed
|
76 |
+
|
77 |
+
# Display title
|
78 |
+
st.title("Bonnie Plants Pricing & Sales Analytics Dashboard")
|
79 |
+
|
80 |
+
# Close the div for logo and title
|
81 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
82 |
+
|
83 |
+
# Initialize session state for storing which card was clicked and item type
|
84 |
if 'active_card' not in st.session_state:
|
85 |
st.session_state['active_card'] = None
|
86 |
if 'selected_item_type' not in st.session_state:
|
87 |
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
|
88 |
+
|
|
|
|
|
89 |
if 'selected_feature' not in st.session_state:
|
90 |
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
|
91 |
|
92 |
+
# Card selection buttons
|
93 |
+
col1, col2 = st.columns(2)
|
|
|
94 |
# Define buttons for plot categories, update session state when clicked
|
95 |
with col1:
|
96 |
if st.button("Sales Volume Trend for Item Category"):
|
|
|
100 |
if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
|
101 |
st.session_state['active_card'] = 'card2'
|
102 |
|
103 |
+
start_time=time.time()
|
104 |
+
# st.write(st.session_state['active_card'])
|
105 |
+
df = load_data(st.session_state['active_card'])
|
106 |
+
time_taken = time.time() - start_time
|
107 |
+
st.write(f"Data loaded in {time_taken:.2f} seconds")
|
108 |
|
|
|
|
|
|
|
109 |
|
110 |
+
# Initialize session state for storing the selected state and feature
|
111 |
+
if 'selected_state' not in st.session_state:
|
112 |
+
st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
|
113 |
|
114 |
+
############################################ CARD #1 ####################################################
|
115 |
if st.session_state['active_card'] == 'card1':
|
116 |
+
# st.write("Processing card1...")
|
117 |
+
# Dropdown for selecting the state
|
118 |
+
selected_state = st.selectbox('Select State', df['State'].unique())
|
119 |
+
# Dropdown for selecting the feature for grouping
|
120 |
+
selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',])
|
121 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
# Filter the dataframe based on selected state
|
123 |
+
filtered_df = df[df['State'] == selected_state]
|
124 |
+
|
125 |
+
# Time the grouping operation
|
126 |
+
start_time = time.time()
|
127 |
+
group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index()
|
128 |
+
time_taken = time.time() - start_time
|
129 |
+
|
130 |
+
# Plotting
|
131 |
+
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature,
|
132 |
+
title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}',
|
133 |
+
labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
st.plotly_chart(fig)
|
135 |
+
|
136 |
##########################################################################################################
|
137 |
|
138 |
+
|
139 |
########################################### CARD #2 ####################################################
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
if st.session_state['active_card'] == 'card2':
|
146 |
# Dropdown to select item type (using session_state)
|
147 |
st.session_state['selected_item_type'] = st.selectbox(
|
|
|
151 |
# Dropdown to select the grouping category (container code, chain code, or state)
|
152 |
group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])
|
153 |
|
154 |
+
# Multi-select checkbox to select multiple years
|
155 |
+
selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021])
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
+
st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}")
|
|
|
|
|
|
|
|
|
158 |
|
159 |
+
# Convert 'Dt' column to datetime
|
|
|
160 |
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
|
161 |
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
|
162 |
df["Promo"] = df["Promo"].astype("category")
|
|
|
|
|
|
|
|
|
163 |
|
164 |
+
# Filter the dataframe based on the selected item type and selected years
|
165 |
+
filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))]
|
166 |
|
167 |
# Find the top 3 values based on total SalesVolume in the selected grouping category
|
168 |
top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index
|
|
|
170 |
# Filter the data for only the top 3 values
|
171 |
top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
|
172 |
|
173 |
+
# Aggregate data
|
174 |
agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
|
175 |
'SalesVolume': 'sum',
|
176 |
'UnitPrice': 'mean'
|
177 |
}).reset_index()
|
178 |
|
179 |
+
# Create a new column 'week-year' for X-axis labels
|
180 |
+
agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y')
|
181 |
+
|
182 |
+
# Loop through the top 3 values and create separate plots using Plotly
|
183 |
for value in top_3_values:
|
184 |
value_data = agg_df[agg_df[group_by_option] == value]
|
185 |
+
# Assuming you have 'value_data' from your previous code
|
186 |
+
mean_sales_volume = value_data['SalesVolume'].mean()
|
187 |
+
mean_unit_price = value_data['UnitPrice'].mean()
|
188 |
+
|
189 |
+
# Create a Plotly figure
|
190 |
+
fig = go.Figure()
|
191 |
+
|
192 |
+
# Add SalesVolume trace
|
193 |
+
fig.add_trace(go.Scatter(
|
194 |
+
x=value_data['week-year'],
|
195 |
+
y=value_data['SalesVolume'],
|
196 |
+
mode='lines+markers',
|
197 |
+
name='SalesVolume',
|
198 |
+
line=dict(color='blue'),
|
199 |
+
hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}'
|
200 |
+
))
|
201 |
+
|
202 |
+
# Add UnitPrice trace on a secondary Y-axis
|
203 |
+
fig.add_trace(go.Scatter(
|
204 |
+
x=value_data['week-year'],
|
205 |
+
y=value_data['UnitPrice'],
|
206 |
+
mode='lines+markers',
|
207 |
+
name='UnitPrice',
|
208 |
+
line=dict(color='green'),
|
209 |
+
yaxis='y2',
|
210 |
+
hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}'
|
211 |
+
))
|
212 |
+
# Add mean line for SalesVolume
|
213 |
+
fig.add_shape(type="line",
|
214 |
+
x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
|
215 |
+
y0=mean_sales_volume, y1=mean_sales_volume,
|
216 |
+
line=dict(color="blue", width=2, dash="dash"),
|
217 |
+
xref='x', yref='y')
|
218 |
+
|
219 |
+
# Add mean line for UnitPrice (on secondary Y-axis)
|
220 |
+
fig.add_shape(type="line",
|
221 |
+
x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
|
222 |
+
y0=mean_unit_price, y1=mean_unit_price,
|
223 |
+
line=dict(color="green", width=2, dash="dash"),
|
224 |
+
xref='x', yref='y2')
|
225 |
+
|
226 |
+
# Update layout for dual axes
|
227 |
+
fig.update_layout(
|
228 |
+
template='plotly_white',
|
229 |
+
title=f"SalesVolume and UnitPrice - {value} ({group_by_option})",
|
230 |
+
xaxis_title='Week-Year',
|
231 |
+
yaxis_title='Sales Volume',
|
232 |
+
yaxis2=dict(title='UnitPrice', overlaying='y', side='right'),
|
233 |
+
legend=dict(x=0.9, y=1.15),
|
234 |
+
hovermode="x unified", # Show both values in a tooltip
|
235 |
+
height=600,
|
236 |
+
margin=dict(l=50, r=50, t=50, b=50)
|
237 |
+
)
|
238 |
+
|
239 |
+
# Rotate X-axis labels
|
240 |
+
fig.update_xaxes(tickangle=90)
|
241 |
+
|
242 |
+
# Display the Plotly figure in Streamlit
|
243 |
+
st.plotly_chart(fig, use_container_width=True)
|
244 |
|
|
|
|
|
245 |
|
|
|
|
|
|
|
|
|
246 |
|
247 |
+
##########################################################################################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|